Half-way start a encoding feature to write byte string values into output
diff --git a/inc/qcbor/UsefulBuf.h b/inc/qcbor/UsefulBuf.h
index 1662fcc..4550026 100644
--- a/inc/qcbor/UsefulBuf.h
+++ b/inc/qcbor/UsefulBuf.h
@@ -1286,6 +1286,58 @@
 static inline int UsefulOutBuf_IsBufferNULL(UsefulOutBuf *pUOutBuf);
 
 
+static inline UsefulBuf UsefulOutBuf_Stuff(UsefulOutBuf *pUOutBuf)
+{
+   UsefulBuf R;
+
+   R.ptr = (uint8_t *)pUOutBuf->UB.ptr + pUOutBuf->data_len;
+   R.len = UsefulOutBuf_RoomLeft(pUOutBuf);
+
+   return R;
+}
+
+
+// TODO: return code
+static inline void UsefulOutBuf_StuffDone(UsefulOutBuf *pMe, size_t uAmount)
+{
+   if(pMe->err) {
+       // Already in error state.
+       return;
+    }
+
+    /* 0. Sanity check the UsefulOutBuf structure */
+    // A "counter measure". If magic number is not the right number it
+    // probably means me was not initialized or it was corrupted. Attackers
+    // can defeat this, but it is a hurdle and does good with very
+    // little code.
+   // if(pMe->magic != USEFUL_OUT_BUF_MAGIC) {
+      // pMe->err = 1;
+      // return;  // Magic number is wrong due to uninitalization or corrption
+   // }
+
+    // Make sure valid data is less than buffer size. This would only occur
+    // if there was corruption of me, but it is also part of the checks to
+    // be sure there is no pointer arithmatic under/overflow.
+    if(pMe->data_len > pMe->UB.len) {  // Check #1
+       pMe->err = 1;
+       // Offset of valid data is off the end of the UsefulOutBuf due to
+       // uninitialization or corruption
+       return;
+    }
+
+    /* 1. Will it fit? */
+    // WillItFit() is the same as: NewData.len <= (me->UB.len - me->data_len)
+    // Check #1 makes sure subtraction in RoomLeft will not wrap around
+    if(! UsefulOutBuf_WillItFit(pMe, uAmount)) { // Check #2
+       // The new data will not fit into the the buffer.
+       pMe->err = 1;
+       return;
+    }
+
+   pMe->data_len += uAmount;
+}
+
+
 /**
  *  @brief Returns the resulting valid data in a UsefulOutBuf
  *
diff --git a/inc/qcbor/qcbor_encode.h b/inc/qcbor/qcbor_encode.h
index 0abc594..d8f1262 100644
--- a/inc/qcbor/qcbor_encode.h
+++ b/inc/qcbor/qcbor_encode.h
@@ -840,6 +840,31 @@
 static void QCBOREncode_AddBytesToMapN(QCBOREncodeContext *pCtx, int64_t nLabel, UsefulBufC Bytes);
 
 
+/*
+ The purpose of this is to be able to output the bytes that make
+ up a byte string directly to the QCBOR output buffer so you don't
+ need to have a copy of it in memory. This is particularly useful
+ if the byte string is large, for example the encrypted payload
+ of a COSE_Encrypt message.
+
+QCBOREncode_StartBytes
+   pPlace -- The pointer where the value of the byte string is written
+             and the maximum number of bytes that can be written.
+
+QCBOREncode_EndBytes
+   uAmount -- The number of bytes written into pPlace.
+
+When QCBOREncode_EndBytes is called, the correct CBOR header
+ will be inserted in front of the byte string value.
+
+ TODO: finish this documentation, write the implementation, tests the code.
+
+ */
+void QCBOREncode_StartBytes(QCBOREncodeContext *pCtx, UsefulBuf *pPlace);
+
+void QCBOREncode_EndBytes(QCBOREncodeContext *pCtx, size_t uAmount);
+
+
 /**
  @brief Add a binary UUID to the encoded output.
 
diff --git a/src/qcbor_encode.c b/src/qcbor_encode.c
index 7f9c169..979b631 100644
--- a/src/qcbor_encode.c
+++ b/src/qcbor_encode.c
@@ -931,6 +931,54 @@
 }
 
 
+void QCBOREncode_StartBytes(QCBOREncodeContext *pMe, UsefulBuf *pPlace)
+{
+   /* Add one item to the nesting level we are in for the new map or array */
+   IncrementMapOrArrayCount(pMe);
+
+   /* The offset where the length of an array or map will get written
+    * is stored in a uint32_t, not a size_t to keep stack usage
+    * smaller. This checks to be sure there is no wrap around when
+    * recording the offset.  Note that on 64-bit machines CBOR larger
+    * than 4GB can be encoded as long as no array/map offsets occur
+    * past the 4GB mark, but the public interface says that the
+    * maximum is 4GB to keep the discussion simpler.
+    */
+   size_t uEndPosition = UsefulOutBuf_GetEndPosition(&(pMe->OutBuf));
+
+   /* QCBOR_MAX_ARRAY_OFFSET is slightly less than UINT32_MAX so this
+    * code can run on a 32-bit machine and tests can pass on a 32-bit
+    * machine. If it was exactly UINT32_MAX, then this code would not
+    * compile or run on a 32-bit machine and an #ifdef or some machine
+    * size detection would be needed reducing portability.
+    */
+   if(uEndPosition >= QCBOR_MAX_ARRAY_OFFSET) {
+      pMe->uError = QCBOR_ERR_BUFFER_TOO_LARGE;
+
+      *pPlace = NULLUsefulBuf;
+
+   } else {
+      /* Increase nesting level because this is a map or array.  Cast
+       * from size_t to uin32_t is safe because of check above.
+       */
+      // TODO: proper type constant
+      pMe->uError = Nesting_Increase(&(pMe->nesting), 200, (uint32_t)uEndPosition);
+
+      *pPlace = UsefulOutBuf_Stuff(&(pMe->OutBuf));
+   }
+}
+
+
+void QCBOREncode_EndBytes(QCBOREncodeContext *pMe, size_t uAmount)
+{
+   UsefulOutBuf_StuffDone(&(pMe->OutBuf), uAmount);
+   // TODO: sort out the major type
+   InsertCBORHead(pMe,
+                  CBOR_MAJOR_TYPE_BYTE_STRING,
+                  uAmount);
+}
+
+
 /*
  * Public function for closing arrays and maps. See qcbor/qcbor_encode.h
  */