Add QCBOREncode_Tell and QCBOREncode_SubString (#251)

* QCBOREncode_Tell and QCBOREncode_SubString

* Test and documentation improvements

* test, doc, back out error checks that didn't work

* more tests; tidiness

* Small documentation improvement

* Add RetreiveUndecodedInput

* Proper factoring for UsefulOutBuf Storage

* nits

---------

Co-authored-by: Laurence Lundblade <lgl@securitytheory.com>
diff --git a/inc/qcbor/UsefulBuf.h b/inc/qcbor/UsefulBuf.h
index 3b7afd7..38bdb8d 100644
--- a/inc/qcbor/UsefulBuf.h
+++ b/inc/qcbor/UsefulBuf.h
@@ -43,6 +43,9 @@
 
  when         who             what, where, why
  --------     ----            --------------------------------------------------
+ 08/14/2024   llundblade      Add UsefulOutBuf_RetrieveOutputStorage().
+ 08/13/2024   llundblade      Add UsefulInputBuf_RetrieveUndecodedInput().
+ 08/08/2024   llundblade      Add UsefulOutBuf_SubString().
  10/05/2024   llundblade      Add Xxx_OffsetToPointer.
  19/12/2022   llundblade      Document that adding empty data is allowed.
  4/11/2022    llundblade      Add GetOutPlace and Advance to UsefulOutBuf.
@@ -1381,6 +1384,35 @@
 UsefulBufC UsefulOutBuf_CopyOut(UsefulOutBuf *pUOutBuf, UsefulBuf Dest);
 
 
+/**
+ * @beief Return a substring of the output data.
+ *
+ * @param[in] pUOutBuf  Pointer to the @ref UsefulOutBuf.
+ * @param[in] uStart    Offset of start of substring.
+ * @param[in] uLen      Length of substring.
+ *
+ * This is the same as UsefulOutBuf_OutUBuf(), but returns a
+ * substring. @c NULLUsefulBufC is returned if the requested substring
+ * is off the end of the output bytes or if in error state.
+ */
+UsefulBufC UsefulOutBuf_SubString(UsefulOutBuf *pUOutBuf,
+                                  const size_t  uStart,
+                                  const size_t  uLen);
+
+
+/**
+ * @brief Retrieve the storage buffer passed in to UsefulOutBuf_Init().
+ *
+ * @param[in] pUOutBuf  The encoding context.
+ *
+ * @return The output storage buffer passed to UsefulOutBuf_Init().
+ *
+ * This doesn't give any information about how much has been encoded
+ * or the error state. It just returns the exact @ref UsefulOutBuf given
+ * to UsefulOutBuf_Init().
+ */
+static UsefulBuf UsefulOutBuf_RetrieveOutputStorage(UsefulOutBuf *pUOutBuf);
+
 
 
 /**
@@ -1724,6 +1756,16 @@
 static void UsefulInputBuf_SetBufferLength(UsefulInputBuf *pUInBuf, size_t uNewLen);
 
 
+/**
+ * @brief  Retrieve the undecoded input buffer.
+ *
+ * @param[in] pUInBuf  Pointer to the @ref UsefulInputBuf.
+ *
+ * @return The input that was given to UsefulInputBuf_Init().
+ *
+ * A simple convenience method, should it be useful to get the original input back.
+ */
+static UsefulBufC UsefulInputBuf_RetrieveUndecodedInput(UsefulInputBuf *pUInBuf);
 
 
 /*----------------------------------------------------------
@@ -2228,6 +2270,12 @@
 }
 
 
+static inline UsefulBuf UsefulOutBuf_RetrieveOutputStorage(UsefulOutBuf *pMe)
+{
+   return pMe->UB;
+}
+
+
 
 
 static inline void UsefulInputBuf_Init(UsefulInputBuf *pMe, UsefulBufC UB)
@@ -2496,6 +2544,11 @@
     pMe->UB.len = uNewLen;
 }
 
+static inline UsefulBufC UsefulInputBuf_RetrieveUndecodedInput(UsefulInputBuf *pMe)
+{
+   return pMe->UB;
+}
+
 
 #ifdef __cplusplus
 }
diff --git a/inc/qcbor/qcbor_decode.h b/inc/qcbor/qcbor_decode.h
index 0d70d4d..cc8fec9 100644
--- a/inc/qcbor/qcbor_decode.h
+++ b/inc/qcbor/qcbor_decode.h
@@ -1239,6 +1239,19 @@
 
 
 /**
+ * @brief  Retrieve the undecoded input buffer.
+ *
+ * @param[in]  pCtx  The decode context.
+ *
+ * @return The input that was given to QCBORDecode_Init().
+ *
+ * A simple convenience method, should it be useful to get the original input back.
+ */
+static UsefulBufC
+QCBORDecode_RetrieveUndecodedInput(QCBORDecodeContext *pCtx);
+
+
+/**
  * @brief Get the decoding error.
  *
  * @param[in] pCtx    The decoder context.
@@ -1608,6 +1621,12 @@
    return (uint32_t)UsefulInputBuf_Tell(&(pMe->InBuf));
 }
 
+static inline UsefulBufC
+QCBORDecode_RetrieveUndecodedInput(QCBORDecodeContext *pMe)
+{
+   return UsefulInputBuf_RetrieveUndecodedInput(&(pMe->InBuf));
+}
+
 static inline QCBORError
 QCBORDecode_GetError(QCBORDecodeContext *pMe)
 {
diff --git a/inc/qcbor/qcbor_encode.h b/inc/qcbor/qcbor_encode.h
index 83c167e..f3ea182 100644
--- a/inc/qcbor/qcbor_encode.h
+++ b/inc/qcbor/qcbor_encode.h
@@ -2226,7 +2226,8 @@
  *
  * This may be called multiple times. It will always return the
  * same. It can also be interleaved with calls to
- * QCBOREncode_FinishGetSize().
+ * QCBOREncode_FinishGetSize(). See QCBOREncode_SubString() for a
+ * means to get the thus-far-encoded CBOR.
  *
  * QCBOREncode_GetErrorState() can be called to get the current
  * error state in order to abort encoding early as an optimization, but
@@ -2253,22 +2254,35 @@
 
 
 /**
- * @brief Indicate whether output buffer is NULL or not.
+ * @brief Indicate whether the output storage buffer is NULL.
  *
  * @param[in] pCtx  The encoding context.
  *
  * @return 1 if the output buffer is @c NULL.
  *
- * Sometimes a @c NULL input buffer is given to QCBOREncode_Init() so
- * that the size of the generated CBOR can be calculated without
- * allocating a buffer for it. This returns 1 when the output buffer
- * is @c NULL and 0 when it is not.
+ * As described in QCBOREncode_Init(), @c Storage.ptr may be give as @c NULL
+ * for output size calculation. This returns 1 when that is the true, and 0 if not.
  */
 static int
 QCBOREncode_IsBufferNULL(QCBOREncodeContext *pCtx);
 
 
 /**
+ * @brief Retrieve the storage buffer passed in to QCBOREncode_Init().
+ *
+ * @param[in] pCtx  The encoding context.
+ *
+ * @return The output storage buffer passed to QCBOREncode_Init().
+ *
+ * This doesn't give any information about how much has been encoded
+ * or the error state. It just returns the exact @ref UsefulOutBuf given
+ * to QCBOREncode_Init().
+ */
+static UsefulBuf
+QCBOREncode_RetrieveOutputStorage(QCBOREncodeContext *pCtx);
+
+
+/**
  * @brief Get the encoding error state.
  *
  * @param[in] pCtx  The encoding context.
@@ -2286,6 +2300,72 @@
 
 
 /**
+ * @brief Returns current end of encoded data.
+ *
+ * @param[in] pCtx  The encoding context.
+ *
+ * @return Byte offset of end of encoded data.
+ *
+ * The purpose of this is to enable cryptographic hashing over a
+ * subpart of thus far CBOR-encoded data. Then perhaps a signature
+ * over the hashed CBOR is added to the encoded output. There is
+ * nothing specific to hashing or signing in this, so this can be used
+ * for other too.
+ *
+ * Call this to get the offset of the start of the encoded
+ * to-be-hashed CBOR items, then call QCBOREncode_SubString().
+ * QCBOREncode_Tell() can also be called twice, first to get the
+ * offset of the start and second for the offset of the end. Those
+ * offsets can be applied to the output storage buffer.
+ *
+ * This will return successfully even if the encoder is in the error
+ * state.
+ *
+ * WARNING: All definite-length arrays and maps opened before the
+ * first call to QCBOREncode_Tell() must not be closed until the
+ * substring is obtained and processed. Similarly, every
+ * definite-length array or map opened after the first call to
+ * QCBOREncode_Tell() must be closed before the substring is obtained
+ * and processed.  The same applies for opened byte strings. There is
+ * no detection of these errors. This occurs because QCBOR goes back
+ * and inserts the lengths of definite-length arrays and maps when
+ * they are closed. This insertion will make the offsets incorrect.
+ */
+static size_t
+QCBOREncode_Tell(QCBOREncodeContext *pCtx);
+
+
+/**
+ * @brief Get a substring of encoded CBOR for cryptographic hash
+ *
+ * @param[in] pCtx  The encoding context.
+ * @param[in] uStart  The start offset of substring.
+ *
+ * @return Pointer and length of of substring.
+ *
+ * @c uStart is obtained by calling QCBOREncode_Tell() before encoding
+ * the first item in the substring. Then encode some data items. Then
+ * call this. The substring returned contains the encoded data items.
+ *
+ * The substring may have deeply nested arrays and maps as long as any
+ * opened after the call to QCBOREncode_Tell() are closed before this
+ * is called.
+ *
+ * This will return @c NULLUsefulBufC if the encoder is in the error
+ * state or if @c uStart is beyond the end of the thus-far encoded
+ * data items.
+ *
+ * If @c uStart is 0, all the thus-far-encoded CBOR will be returned.
+ * Unlike QCBOREncode_Finish(), this will succeed even if some arrays
+ * and maps are not closed.
+ *
+ * See important usage WARNING in QCBOREncode_Tell()
+ */
+UsefulBufC
+QCBOREncode_SubString(QCBOREncodeContext *pCtx, const size_t uStart);
+
+
+/**
  * Encode the "head" of a CBOR data item.
  *
  * @param Buffer       Buffer to output the encoded head to; must be
@@ -4019,6 +4099,14 @@
    return UsefulOutBuf_IsBufferNULL(&(pMe->OutBuf));
 }
 
+
+static inline UsefulBuf
+QCBOREncode_RetrieveOutputStorage(QCBOREncodeContext *pMe)
+{
+   return UsefulOutBuf_RetrieveOutputStorage(&(pMe->OutBuf));
+}
+
+
 static inline QCBORError
 QCBOREncode_GetErrorState(QCBOREncodeContext *pMe)
 {
@@ -4040,6 +4128,12 @@
 }
 
 
+static inline size_t
+QCBOREncode_Tell(QCBOREncodeContext *pMe)
+{
+   return UsefulOutBuf_GetEndPosition(&(pMe->OutBuf));
+}
+
 /* ========================================================================
      END OF PRIVATE INLINE IMPLEMENTATION
    ======================================================================== */
diff --git a/src/UsefulBuf.c b/src/UsefulBuf.c
index f90790b..4a7970f 100644
--- a/src/UsefulBuf.c
+++ b/src/UsefulBuf.c
@@ -44,6 +44,7 @@
 
  when        who          what, where, why
  --------    ----         ---------------------------------------------------
+ 08/08/2024  llundblade   Add UsefulOutBuf_SubString().
  21/05/2024  llundblade   Comment formatting and some code tidiness.
  19/12/2022  llundblade   Don't pass NULL to memmove when adding empty data.
  4/11/2022   llundblade   Add GetOutPlace and Advance to UsefulOutBuf
@@ -413,6 +414,35 @@
 }
 
 
+/*
+ * Public function -- see UsefulBuf.h
+ *
+ * Code Reviewers: THIS FUNCTION DOES POINTER MATH
+ */
+UsefulBufC UsefulOutBuf_SubString(UsefulOutBuf *pMe,
+                                  const size_t  uStart,
+                                  const size_t  uLen)
+{
+   const UsefulBufC Tmp = UsefulOutBuf_OutUBuf(pMe);
+
+   if(UsefulBuf_IsNULLC(Tmp)) {
+      return NULLUsefulBufC;
+   }
+
+   if(uStart > Tmp.len) {
+      return NULLUsefulBufC;
+   }
+
+   if(Tmp.len - uStart < uLen) {
+      return NULLUsefulBufC;
+   }
+
+   UsefulBufC SubString;
+   SubString.ptr = (const uint8_t *)Tmp.ptr + uStart;
+   SubString.len = uLen;
+
+   return SubString;
+}
 
 
 /*
diff --git a/src/qcbor_encode.c b/src/qcbor_encode.c
index 2a99110..218e12c 100644
--- a/src/qcbor_encode.c
+++ b/src/qcbor_encode.c
@@ -166,7 +166,7 @@
 {
    return pNesting->pCurrentNesting == &pNesting->pArrays[0] ? false : true;
 }
-#endif /* QCBOR_DISABLE_ENCODE_USAGE_GUARDS */
+#endif /* ! QCBOR_DISABLE_ENCODE_USAGE_GUARDS */
 
 
 
@@ -1085,3 +1085,28 @@
 
    return nReturn;
 }
+
+
+/*
+ * Public function to get substring of encoded-so-far. See qcbor/qcbor_encode.h
+ */
+UsefulBufC
+QCBOREncode_SubString(QCBOREncodeContext *pMe, const size_t uStart)
+{
+   if(pMe->uError) {
+      return NULLUsefulBufC;
+   }
+
+   /* An attempt was made to detect usage errors by comparing uStart
+    * to offsets of open arrays and maps in pMe->nesting, but it is
+    * not possible because there's not enough information in just
+    * the offset. It's not possible to known if Tell() was called before
+    * or after an Open(). To detect this error, the nesting level
+    * would also need to be known. This is not frequently used, so
+    * it is not worth adding this complexity.
+    */
+
+   const size_t uEnd = QCBOREncode_Tell(pMe);
+
+   return UsefulOutBuf_SubString(&(pMe->OutBuf), uStart, uEnd - uStart);
+}
diff --git a/test/UsefulBuf_Tests.c b/test/UsefulBuf_Tests.c
index 6d2b5a4..83e9a68 100644
--- a/test/UsefulBuf_Tests.c
+++ b/test/UsefulBuf_Tests.c
@@ -127,6 +127,30 @@
       goto Done;
    }
 
+   Out = UsefulOutBuf_SubString(&UOB, 10, 8);
+   if(UsefulBuf_IsNULLC(Out) ||
+      UsefulBuf_Compare(UsefulBuf_FROM_SZ_LITERAL("unbounce"), Out) ||
+      UsefulOutBuf_GetError(&UOB)) {
+      szReturn = "SubString substring";
+      goto Done;
+   }
+
+   Out = UsefulOutBuf_SubString(&UOB, 0, Expected.len);
+   if(UsefulBuf_IsNULLC(Out) ||
+      UsefulBuf_Compare(Expected, Out) ||
+      UsefulOutBuf_GetError(&UOB)) {
+      szReturn = "SubString all";
+      goto Done;
+   }
+
+   Out = UsefulOutBuf_SubString(&UOB, Expected.len, 0);
+   if(UsefulBuf_IsNULLC(Out) ||
+      UsefulBuf_Compare(UsefulBuf_FROM_SZ_LITERAL(""), Out) ||
+      UsefulOutBuf_GetError(&UOB)) {
+      szReturn = "SubString empty";
+      goto Done;
+   }
+
    /* Now test the size calculation mode */
    UsefulOutBuf_Init(&UOB, SizeCalculateUsefulBuf);
 
@@ -246,7 +270,7 @@
       return "Bad insertion point not caught";
 
 
-   UsefulBuf_MAKE_STACK_UB(outBuf2,10);
+   UsefulBuf_MAKE_STACK_UB(outBuf2, 10);
 
    UsefulOutBuf_Init(&UOB, outBuf2);
 
@@ -260,6 +284,29 @@
       return "insert with data should have failed";
    }
 
+   UsefulOutBuf_Init(&UOB, outBuf2);
+   UsefulOutBuf_AppendString(&UOB, "abc123");
+
+   UsefulBufC Out = UsefulOutBuf_SubString(&UOB, 7, 1);
+   if(!UsefulBuf_IsNULLC(Out)) {
+      return "SubString start should fail off end 1";
+   }
+   Out = UsefulOutBuf_SubString(&UOB, 5, 3);
+   if(!UsefulBuf_IsNULLC(Out)) {
+      return "SubString len should fail off end 2";
+   }
+   Out = UsefulOutBuf_SubString(&UOB, 0, 7);
+   if(!UsefulBuf_IsNULLC(Out)) {
+      return "SubString len should fail off end 3";
+   }
+   Out = UsefulOutBuf_SubString(&UOB, 7, 0);
+   if(!UsefulBuf_IsNULLC(Out)) {
+      return "SubString len should fail off end 4";
+   }
+   Out = UsefulOutBuf_SubString(&UOB, 6, 1);
+   if(!UsefulBuf_IsNULLC(Out)) {
+      return "SubString len should fail off end 5";
+   }
 
    UsefulOutBuf_Init(&UOB, (UsefulBuf){NULL, SIZE_MAX - 5});
    UsefulOutBuf_AppendData(&UOB, "123456789", SIZE_MAX -6);
diff --git a/test/qcbor_decode_tests.c b/test/qcbor_decode_tests.c
index 25c9a85..7434f0d 100644
--- a/test/qcbor_decode_tests.c
+++ b/test/qcbor_decode_tests.c
@@ -10165,6 +10165,18 @@
       return -107;
    }
 
+   QCBORDecode_Init(&DCtx,
+                    UsefulBuf_FROM_BYTE_ARRAY_LITERAL(pValidMapEncoded),
+                    QCBOR_DECODE_MODE_NORMAL);
+
+   UsefulBufC Xx = QCBORDecode_RetrieveUndecodedInput(&DCtx);
+   if(Xx.ptr != pValidMapEncoded) {
+      return -200;
+   }
+   if(Xx.len != sizeof(pValidMapEncoded)) {
+      return -201;
+   }
+
    return 0;
 }
 
diff --git a/test/qcbor_encode_tests.c b/test/qcbor_encode_tests.c
index be5548c..44f97ab 100644
--- a/test/qcbor_encode_tests.c
+++ b/test/qcbor_encode_tests.c
@@ -180,6 +180,11 @@
    }
 
 
+   UsefulBuf Tmp = QCBOREncode_RetrieveOutputStorage(&EC);
+   if(Tmp.ptr != spBigBuf && Tmp.len != sizeof(spBigBuf)) {
+      return -111;
+   }
+
    // Make another encoded message with the CBOR from the previous
    // put into this one
    UsefulBuf_MAKE_STACK_UB(MemoryForEncoded2, 20);
@@ -196,6 +201,8 @@
    if(QCBOREncode_Finish(&EC, &Encoded2)) {
       return -5;
    }
+
+
     /*
      [                // 0    1:3
         451,          // 1    1:2
@@ -2652,6 +2659,12 @@
       return -11;
    }
 
+   UsefulBuf Tmp;
+   Tmp = QCBOREncode_RetrieveOutputStorage(&EC);
+   if(Tmp.ptr != NULL && Tmp.len != UINT32_MAX) {
+      return -111;
+   }
+
    /* ------ QCBOR_ERR_UNSUPPORTED -------- */
    QCBOREncode_Init(&EC, Large);
    QCBOREncode_OpenArray(&EC);
@@ -3109,3 +3122,85 @@
 
    return 0;
 }
+
+
+int32_t SubStringTest(void)
+{
+   QCBOREncodeContext EC;
+   size_t             uStart;
+   size_t             uCurrent;
+   UsefulBufC         SS;
+   UsefulBufC         Encoded;
+   QCBORError         uErr;
+
+   QCBOREncode_Init(&EC, UsefulBuf_FROM_BYTE_ARRAY(spBigBuf));
+   QCBOREncode_OpenArray(&EC);
+   uStart = QCBOREncode_Tell(&EC);
+   QCBOREncode_AddInt64(&EC, 0);
+   SS = QCBOREncode_SubString(&EC, uStart);
+   if(UsefulBuf_Compare(SS, (UsefulBufC){"\x00", 1})) {
+      return 1;
+   }
+
+   QCBOREncode_OpenArray(&EC);
+
+   QCBOREncode_CloseArray(&EC);
+   SS = QCBOREncode_SubString(&EC, uStart);
+   if(UsefulBuf_Compare(SS, (UsefulBufC){"\x00\x80", 2})) {
+      return 3;
+   }
+
+
+   /* Try it on a sequence */
+   QCBOREncode_Init(&EC, UsefulBuf_FROM_BYTE_ARRAY(spBigBuf));
+   uStart = QCBOREncode_Tell(&EC);
+   QCBOREncode_AddInt64(&EC, 1);
+   QCBOREncode_AddInt64(&EC, 1);
+   QCBOREncode_AddInt64(&EC, 1);
+   QCBOREncode_AddInt64(&EC, 1);
+   SS = QCBOREncode_SubString(&EC, uStart);
+   if(UsefulBuf_Compare(SS, (UsefulBufC){"\x01\x01\x01\x01", 4})) {
+      return 10;
+   }
+
+   uCurrent = QCBOREncode_Tell(&EC);
+   if(!UsefulBuf_IsNULLC(QCBOREncode_SubString(&EC, uCurrent+1))) {
+      return 11;
+   }
+
+#ifndef QCBOR_DISABLE_ENCODE_USAGE_GUARDS
+   /* Now cause an error */
+   QCBOREncode_OpenMap(&EC);
+   QCBOREncode_CloseArray(&EC);
+   if(!UsefulBuf_IsNULLC(QCBOREncode_SubString(&EC, uStart))) {
+      return 15;
+   }
+#endif /* ! QCBOR_DISABLE_ENCODE_USAGE_GUARDS */
+
+
+   QCBOREncode_Init(&EC, UsefulBuf_FROM_BYTE_ARRAY(spBigBuf));
+   QCBOREncode_AddInt64(&EC, 1);
+   QCBOREncode_AddInt64(&EC, 1);
+   uStart = QCBOREncode_Tell(&EC);
+   QCBOREncode_OpenMap(&EC);
+   QCBOREncode_OpenMapInMapN(&EC, 3);
+   QCBOREncode_OpenArrayInMapN(&EC, 4);
+   QCBOREncode_AddInt64(&EC, 0);
+   QCBOREncode_CloseArray(&EC);
+   QCBOREncode_CloseMap(&EC);
+   QCBOREncode_CloseMap(&EC);
+   SS = QCBOREncode_SubString(&EC, uStart);
+   if(UsefulBuf_Compare(SS, (UsefulBufC){"\xA1\x03\xA1\x04\x81\x00", 6})) {
+      return 20;
+   }
+
+   uErr = QCBOREncode_Finish(&EC, &Encoded);
+   if(uErr) {
+      return 21;
+   }
+   if(UsefulBuf_Compare(Encoded, (UsefulBufC){"\x01\x01\xA1\x03\xA1\x04\x81\x00", 8})) {
+      return 22;
+   }
+
+   return 0;
+}
diff --git a/test/qcbor_encode_tests.h b/test/qcbor_encode_tests.h
index 8452009..43a6290 100644
--- a/test/qcbor_encode_tests.h
+++ b/test/qcbor_encode_tests.h
@@ -191,5 +191,7 @@
 int32_t OpenCloseBytesTest(void);
 
 
+int32_t SubStringTest(void);
+
 
 #endif /* defined(__QCBOR__qcbor_encode_tests__) */
diff --git a/test/run_tests.c b/test/run_tests.c
index d1d49ed..140eb53 100644
--- a/test/run_tests.c
+++ b/test/run_tests.c
@@ -162,6 +162,7 @@
     TEST_ENTRY(ExponentAndMantissaEncodeTests),
 #endif /* QCBOR_DISABLE_EXP_AND_MANTISSA */
     TEST_ENTRY(ParseEmptyMapInMapTest),
+    TEST_ENTRY(SubStringTest),
     TEST_ENTRY(BoolTest)
 };