Fix bug handling nested tags; code size reduction (#64) This fixes a bug processing nested tags. It also reduces size of the decode implementation by about 150 bytes. The tag processing code is commented better. Co-authored-by: Laurence Lundblade <lgl@securitytheory.com>

commit: 9961530244bba779e53a4ef83ef1a9721e6d9eeb [log] [tgz]
author: Laurence Lundblade <laurencelundblade@users.noreply.github.com> Sun Nov 29 11:19:47 2020 -0800
committer: GitHub <noreply@github.com> Sun Nov 29 11:19:47 2020 -0800
tree: 46a4d817e3b275687d8e2ee8d221d04de4d968b2
parent: f6da33c93bd3a42970329a6813392a2621d7cc03 [diff]
diff --git a/README.md b/README.md
index 1b84140..da5f58b 100644
--- a/README.md
+++ b/README.md

@@ -231,8 +231,8 @@
     |               | smallest | largest |  
     |---------------|----------|---------|
     | encode only   |      850 |    2100 |
-    | decode only   |     2500 |   13500 |
-    | combined      |     3350 |   15600 |
+    | decode only   |     2350 |   13500 |
+    | combined      |     3200 |   15600 |
     
  From the table above, one can see that the amount of code pulled in
  from the QCBOR library varies a lot, ranging from 1KB to 15KB.  The

diff --git a/src/qcbor_decode.c b/src/qcbor_decode.c
index efe6ebc..585a517 100644
--- a/src/qcbor_decode.c
+++ b/src/qcbor_decode.c

@@ -1667,23 +1667,6 @@
 }
 
 
-/*
- Mostly just assign the right data type for the date string.
- */
-static inline QCBORError DecodeDateString(QCBORItem *pDecodedItem)
-{
-   if(pDecodedItem->uDataType != QCBOR_TYPE_TEXT_STRING) {
-      return QCBOR_ERR_BAD_OPT_TAG;
-   }
-
-   const UsefulBufC Temp        = pDecodedItem->val.string;
-   pDecodedItem->val.dateString = Temp;
-   pDecodedItem->uDataType      = QCBOR_TYPE_DATE_STRING;
-   ShiftTags(pDecodedItem);
-   return QCBOR_SUCCESS;
-}
-
-
 
 /*
  The epoch formatted date. Turns lots of different forms of encoding
@@ -1767,24 +1750,6 @@
 }
 
 
-/*
- Mostly just assign the right data type for the bignum.
- */
-static inline QCBORError DecodeBigNum(QCBORItem *pDecodedItem)
-{
-   // Stack Use: UsefulBuf 1  -- 16
-   if(pDecodedItem->uDataType != QCBOR_TYPE_BYTE_STRING) {
-      return QCBOR_ERR_BAD_OPT_TAG;
-   }
-   const UsefulBufC Temp    = pDecodedItem->val.string;
-   pDecodedItem->val.bigNum = Temp;
-   const bool bIsPosBigNum = (bool)(pDecodedItem->uTags[0] == CBOR_TAG_POS_BIGNUM);
-   pDecodedItem->uDataType  = (uint8_t)(bIsPosBigNum ? QCBOR_TYPE_POSBIGNUM
-                                                     : QCBOR_TYPE_NEGBIGNUM);
-   return QCBOR_SUCCESS;
-}
-
-
 #ifndef QCBOR_CONFIG_DISABLE_EXP_AND_MANTISSA
 /*
  Decode decimal fractions and big floats.
@@ -1887,71 +1852,6 @@
 #endif /* QCBOR_CONFIG_DISABLE_EXP_AND_MANTISSA */
 
 
-static inline QCBORError DecodeURI(QCBORItem *pDecodedItem)
-{
-   if(pDecodedItem->uDataType != QCBOR_TYPE_TEXT_STRING) {
-      return QCBOR_ERR_BAD_OPT_TAG;
-   }
-   pDecodedItem->uDataType = QCBOR_TYPE_URI;
-   return QCBOR_SUCCESS;
-}
-
-
-static inline QCBORError DecodeB64URL(QCBORItem *pDecodedItem)
-{
-   if(pDecodedItem->uDataType != QCBOR_TYPE_TEXT_STRING) {
-      return QCBOR_ERR_BAD_OPT_TAG;
-   }
-   pDecodedItem->uDataType = QCBOR_TYPE_BASE64URL;
-
-   return QCBOR_SUCCESS;
-}
-
-
-static inline QCBORError DecodeB64(QCBORItem *pDecodedItem)
-{
-   if(pDecodedItem->uDataType != QCBOR_TYPE_TEXT_STRING) {
-      return QCBOR_ERR_BAD_OPT_TAG;
-   }
-   pDecodedItem->uDataType = QCBOR_TYPE_BASE64;
-
-   return QCBOR_SUCCESS;
-}
-
-
-static inline QCBORError DecodeRegex(QCBORItem *pDecodedItem)
-{
-   if(pDecodedItem->uDataType != QCBOR_TYPE_TEXT_STRING) {
-      return QCBOR_ERR_BAD_OPT_TAG;
-   }
-   pDecodedItem->uDataType = QCBOR_TYPE_REGEX;
-
-   return QCBOR_SUCCESS;
-}
-
-
-static inline QCBORError DecodeWrappedCBOR(QCBORItem *pDecodedItem)
-{
-   if(pDecodedItem->uDataType != QCBOR_TYPE_BYTE_STRING) {
-      return QCBOR_ERR_BAD_OPT_TAG;
-   }
-   pDecodedItem->uDataType = QBCOR_TYPE_WRAPPED_CBOR;
-
-   return QCBOR_SUCCESS;
-}
-
-
-static inline QCBORError DecodeWrappedCBORSequence(QCBORItem *pDecodedItem)
-{
-   if(pDecodedItem->uDataType != QCBOR_TYPE_BYTE_STRING) {
-      return QCBOR_ERR_BAD_OPT_TAG;
-   }
-   pDecodedItem->uDataType = QBCOR_TYPE_WRAPPED_CBOR_SEQUENCE;
-
-   return QCBOR_SUCCESS;
-}
-
-
 static inline QCBORError DecodeMIME(QCBORItem *pDecodedItem)
 {
    if(pDecodedItem->uDataType == QCBOR_TYPE_TEXT_STRING) {
@@ -1967,19 +1867,88 @@
 }
 
 
-static inline QCBORError DecodeUUID(QCBORItem *pDecodedItem)
+/*
+ * Table of CBOR tags whose content is either a text string or a byte
+ * string. The table maps the CBOR tag to the QCBOR type. The high-bit
+ * of uQCBORtype indicates the content should be a byte string rather
+ * than a text string
+ */
+struct StringTagMapEntry {
+   uint16_t uTagNumber;
+   uint8_t  uQCBORtype;
+};
+
+#define IS_BYTE_STRING_BIT 0x80
+#define QCBOR_TYPE_MASK   ~IS_BYTE_STRING_BIT
+
+static const struct StringTagMapEntry StringTagMap[] = {
+   {CBOR_TAG_DATE_STRING,   QCBOR_TYPE_DATE_STRING},
+   {CBOR_TAG_POS_BIGNUM,    QCBOR_TYPE_POSBIGNUM | IS_BYTE_STRING_BIT},
+   {CBOR_TAG_NEG_BIGNUM,    QCBOR_TYPE_NEGBIGNUM | IS_BYTE_STRING_BIT},
+   {CBOR_TAG_CBOR,          QBCOR_TYPE_WRAPPED_CBOR | IS_BYTE_STRING_BIT},
+   {CBOR_TAG_URI,           QCBOR_TYPE_URI},
+   {CBOR_TAG_B64URL,        QCBOR_TYPE_BASE64URL},
+   {CBOR_TAG_B64,           QCBOR_TYPE_BASE64},
+   {CBOR_TAG_REGEX,         QCBOR_TYPE_REGEX},
+   {CBOR_TAG_BIN_UUID,      QCBOR_TYPE_UUID | IS_BYTE_STRING_BIT},
+   {CBOR_TAG_CBOR_SEQUENCE, QBCOR_TYPE_WRAPPED_CBOR_SEQUENCE | IS_BYTE_STRING_BIT},
+   {CBOR_TAG_INVALID16,     QCBOR_TYPE_NONE}
+};
+
+
+/*
+ * Process the CBOR tags that whose content is a byte string or a text
+ * string and for which the string is just passed on to the caller.
+ *
+ * This maps the CBOR tag to the QCBOR type and checks the content
+ * type.  Nothing more. It may not be the most important
+ * functionality, but it part of implementing as much of RFC 7049 as
+ * possible.
+ *
+ * This returns QCBOR_SUCCESS if the tag was procssed,
+ * QCBOR_ERR_UNSUPPORTED if the tag was not processed and
+ * QCBOR_ERR_BAD_OPT_TAG if the content type was wrong for the tag.
+ */
+static inline
+QCBORError ProcessTaggedString(uint16_t uTag, QCBORItem *pDecodedItem)
 {
-   if(pDecodedItem->uDataType != QCBOR_TYPE_BYTE_STRING) {
+   /* This only works on tags that were not mapped; no need for other yet */
+   if(uTag > QCBOR_LAST_UNMAPPED_TAG) {
+      return QCBOR_ERR_UNSUPPORTED;
+   }
+
+   unsigned uIndex;
+   for(uIndex = 0; StringTagMap[uIndex].uTagNumber != CBOR_TAG_INVALID16; uIndex++) {
+      if(StringTagMap[uIndex].uTagNumber == uTag) {
+         break;
+      }
+   }
+
+   const uint8_t uQCBORType = StringTagMap[uIndex].uQCBORtype;
+   if(uQCBORType == QCBOR_TYPE_NONE) {
+      /* repurpose this error to mean, not handled here */
+      return QCBOR_ERR_UNSUPPORTED;
+   }
+
+   uint8_t uExpectedType = QCBOR_TYPE_TEXT_STRING;
+   if(uQCBORType & IS_BYTE_STRING_BIT) {
+      uExpectedType = QCBOR_TYPE_BYTE_STRING;
+   }
+
+   if(pDecodedItem->uDataType != uExpectedType) {
       return QCBOR_ERR_BAD_OPT_TAG;
    }
-   pDecodedItem->uDataType = QCBOR_TYPE_UUID;
 
+   pDecodedItem->uDataType = (uint8_t)(uQCBORType & QCBOR_TYPE_MASK);
    return QCBOR_SUCCESS;
 }
 
 
 /*
- Public function, see header qcbor/qcbor_decode.h file
+ * CBOR tag numbers for the item were decoded in GetNext_TaggedItem(),
+ * but the whole tag was not decoded. Here, the whole tags (tag number
+ * and tag content) that are supported by QCBOR are decoded. This is a
+ * quick pass through for items that are not tags.
  */
 static QCBORError
 QCBORDecode_GetNextTag(QCBORDecodeContext *me, QCBORItem *pDecodedItem)
@@ -1991,87 +1960,60 @@
       goto Done;
    }
 
-   for(unsigned uTagIndex = 0; uTagIndex < QCBOR_MAX_TAGS_PER_ITEM; uTagIndex++) {
-      switch(pDecodedItem->uTags[uTagIndex]) {
+   /* When there are no tag numbers for the item, this exits first
+    * thing and effectively does nothing.
+    *
+    * This loops over all the tag numbers accumulated for this item
+    * trying to decode and interpret them. This stops at the end of
+    * the list or at the first tag number that can't be interpreted by
+    * this code. This is effectively a recursive processing of the
+    * tags number list that handles nested tags.
+    */
+   while(1) {
+      /* Don't bother to unmap tags via QCBORITem.uTags since this
+       * code only works on tags less than QCBOR_LAST_UNMAPPED_TAG.
+       */
+      const uint16_t uTagToProcess = pDecodedItem->uTags[0];
 
-         // Many of the functions here only just map a CBOR tag to
-         // a QCBOR_TYPE for a string and could probably be
-         // implemented with less object code. This implementation
-         // of string types takes about 120 bytes of object code
-         // (that is always linked and not removed by dead stripping).
-         case CBOR_TAG_DATE_STRING:
-         uReturn = DecodeDateString(pDecodedItem);
+      if(uTagToProcess == CBOR_TAG_INVALID16) {
+         /* Hit the end of the tag list. A successful exit. */
          break;
 
-         case CBOR_TAG_DATE_EPOCH:
+      } else if(uTagToProcess == CBOR_TAG_DATE_EPOCH) {
          uReturn = DecodeDateEpoch(pDecodedItem);
-         break;
-
-         case CBOR_TAG_POS_BIGNUM:
-         case CBOR_TAG_NEG_BIGNUM:
-         uReturn = DecodeBigNum(pDecodedItem);
-         break;
 
 #ifndef QCBOR_CONFIG_DISABLE_EXP_AND_MANTISSA
-         case CBOR_TAG_DECIMAL_FRACTION:
-         case CBOR_TAG_BIGFLOAT:
-         // For aggregate tagged types, what goes into pTags is only collected
-         // from the surrounding data item, not the contents, so pTags is not
-         // passed on here.
-
+      } else if(uTagToProcess == CBOR_TAG_DECIMAL_FRACTION ||
+                uTagToProcess == CBOR_TAG_BIGFLOAT) {
          uReturn = QCBORDecode_MantissaAndExponent(me, pDecodedItem);
-         break;
 #endif /* QCBOR_CONFIG_DISABLE_EXP_AND_MANTISSA */
 
-         case CBOR_TAG_CBOR:
-         uReturn = DecodeWrappedCBOR(pDecodedItem);
-         break;
-
-         case CBOR_TAG_CBOR_SEQUENCE:
-         uReturn = DecodeWrappedCBORSequence(pDecodedItem);
-         break;
-
-         case CBOR_TAG_URI:
-         uReturn = DecodeURI(pDecodedItem);
-         break;
-
-         case CBOR_TAG_B64URL:
-         uReturn = DecodeB64URL(pDecodedItem);
-         break;
-
-         case CBOR_TAG_B64:
-         uReturn = DecodeB64(pDecodedItem);
-         break;
-
-         case CBOR_TAG_MIME:
-         case CBOR_TAG_BINARY_MIME:
+      } else if(uTagToProcess == CBOR_TAG_MIME ||
+                uTagToProcess == CBOR_TAG_BINARY_MIME) {
          uReturn = DecodeMIME(pDecodedItem);
-         break;
 
-         case CBOR_TAG_REGEX:
-         uReturn = DecodeRegex(pDecodedItem);
-         break;
+      } else {
+         /* See if it is a pass-through byte/text string tag; process if so */
+         uReturn = ProcessTaggedString(pDecodedItem->uTags[0], pDecodedItem);
 
-         case CBOR_TAG_BIN_UUID:
-         uReturn = DecodeUUID(pDecodedItem);
-         break;
-
-         case CBOR_TAG_INVALID16:
-         // The end of the tag list or no tags
-         // Successful exit from the loop.
-         goto Done;
-
-         default:
-         // A tag that is not understood
-         // A successful exit from the loop
-         goto Done;
-
+         if(uReturn == QCBOR_ERR_UNSUPPORTED) {
+            /* It wasn't a pass-through byte/text string tag so it is
+             * an unknown tag. This is the exit from the loop on the
+             * first unknown tag.  It is a successful exit.
+             */
+            uReturn = QCBOR_SUCCESS;
+            break;
+         }
       }
+
       if(uReturn != QCBOR_SUCCESS) {
-         goto Done;
+         /* Error exit from the loop */
+         break;
       }
-      // A tag was successfully processed, shift it
-      // out of the list of tags returned.
+
+      /* A tag was successfully processed, shift it out of the list of
+       * tags returned. This is the loop increment.
+       */
       ShiftTags(pDecodedItem);
    }
 

diff --git a/test/qcbor_decode_tests.c b/test/qcbor_decode_tests.c
index 9a5dc6d..0327f2b 100644
--- a/test/qcbor_decode_tests.c
+++ b/test/qcbor_decode_tests.c

@@ -2207,8 +2207,8 @@
    0xc1,
    0x62, 'h', 'i', // wrong type tagged
 
-   // CBOR_TAG_B64
-   0xcf, 0xd8, 0x22, 0xc1, // 0xee, // Epoch date with extra tags
+   // CBOR_TAG_ENC_AS_B64
+   0xcf, 0xd8, 0x16, 0xc1, // 0xee, // Epoch date with extra tags
    0x1a, 0x53, 0x72, 0x4E, 0x01,
 
    0xc1, // tag for epoch date
@@ -2252,6 +2252,7 @@
 #endif /* QCBOR_DISABLE_FLOAT_HW_USE */
 
 
+
 int32_t DateParseTest()
 {
    QCBORDecodeContext DCtx;
@@ -2292,15 +2293,16 @@
       return -6;
    }
 
-   // Epoch date with extra CBOR_TAG_B64 tag that doesn't really mean anything
-   // but want to be sure extra tag doesn't cause a problem
+   // Epoch date wrapped in an CBOR_TAG_ENC_AS_B64 and an unknown tag.
+   // The date is decoded and the two tags are returned. This is to
+   // make sure the wrapping of epoch date in another tag works OK.
    if((uError = QCBORDecode_GetNext(&DCtx, &Item))) {
       return -7;
    }
    if(Item.uDataType != QCBOR_TYPE_DATE_EPOCH ||
       Item.val.epochDate.nSeconds != 1400000001 ||
       Item.val.epochDate.fSecondsFraction != 0 ||
-      !QCBORDecode_IsTagged(&DCtx, &Item, CBOR_TAG_B64)) {
+      !QCBORDecode_IsTagged(&DCtx, &Item, CBOR_TAG_ENC_AS_B64)) {
       return -8;
    }
commit	9961530244bba779e53a4ef83ef1a9721e6d9eeb	[log] [tgz]
author	Laurence Lundblade <laurencelundblade@users.noreply.github.com>	Sun Nov 29 11:19:47 2020 -0800
committer	GitHub <noreply@github.com>	Sun Nov 29 11:19:47 2020 -0800
tree	46a4d817e3b275687d8e2ee8d221d04de4d968b2
parent	f6da33c93bd3a42970329a6813392a2621d7cc03 [diff]