CDE and dCBOR encode-side support (#193)

A mode that errors out if non-preferred serialization is attempted.

Layered on top of that is CDE mode that always sorts maps.

Layered on top of the dCBOR mode, that disallows a few things and unifies encoding of the float and integer number spaces.  There is no change to decoding, though some are planned.

NaN Payloads can no longer be output by default. You must explicitly allow them. This is a non-compatibility with QCBOR 1.x

65-bit negative integers also cannot be output without explicitly allowing them.



* CDE and dCBOR support

* Tests for single precision and exponent boundaries

* test complete and passing; code clean up; bug fix

* Add comments for encoding modes

* tidy up and documentation

* Check point progress on CDE, preferred and dCBOR

* documentation updates

* Minor test fixes; minor code improvements

* full test fan out passes

* Formatting and documentation nits

---------

Co-authored-by: Laurence Lundblade <lgl@securitytheory.com>
diff --git a/src/ieee754.c b/src/ieee754.c
index 2d98159..002ca40 100644
--- a/src/ieee754.c
+++ b/src/ieee754.c
@@ -1,5 +1,5 @@
 /* ==========================================================================
- * ieee754.c -- floating-point conversion between half, double & single-precision
+ * ieee754.c -- floating-point conversion for half, double & single-precision
  *
  * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
  * Copyright (c) 2021, Arm Limited. All rights reserved.
@@ -11,20 +11,14 @@
  * Created on 7/23/18
  * ========================================================================== */
 
-/*
- * Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
- * QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
- */
 #include "qcbor/qcbor_common.h"
 
-#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
-
 #include "ieee754.h"
 #include <string.h> /* For memcpy() */
 
 
 /*
- * This code has long lines and is easier to read because of
+ * This has long lines and is easier to read because of
  * them. Some coding guidelines prefer 80 column lines (can they not
  * afford big displays?).
  *
@@ -164,6 +158,10 @@
    return u64;
 }
 
+
+#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
+
+
 static inline double
 CopyUint64ToDouble(uint64_t u64)
 {
@@ -184,7 +182,7 @@
 
 
 /**
- * @brief Assemble sign, significand and exponent into single precision float.
+ * @brief Assemble sign, significand and exponent into double precision float.
  *
  * @param[in] uDoubleSign              0 if positive, 1 if negative
  * @pararm[in] uDoubleSignificand      Bits of the significand
@@ -208,6 +206,7 @@
 }
 
 
+/* Public function; see ieee754.h */
 double
 IEEE754_HalfToDouble(uint16_t uHalfPrecision)
 {
@@ -315,7 +314,7 @@
 
 /*  Public function; see ieee754.h */
 IEEE754_union
-IEEE754_SingleToHalf(float f)
+IEEE754_SingleToHalf(const float f, const int bNoNaNPayload)
 {
    IEEE754_union result;
    uint32_t      uDroppedBits;
@@ -357,28 +356,36 @@
          result.uSize  = IEEE754_UNION_IS_HALF;
          result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
       } else {
-         /* The NaN can only be converted if no payload bits are lost
-          * per RFC 8949 section 4.1 that defines Preferred
-          * Serializaton. Note that Deterministically Encode CBOR in
-          * section 4.2 allows for some variation of this rule, but at
-          * the moment this implementation is of Preferred
-          * Serialization, not CDE. As of December 2023, we are also
-          * expecting an update to CDE. This code may need to be
-          * updated for CDE.
-          */
-         uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
-         if(uDroppedBits == 0) {
-            /* --- IS CONVERTABLE NAN --- */
-            uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+         if(bNoNaNPayload) {
+            /* --- REQUIRE CANNONICAL NAN --- */
             result.uSize  = IEEE754_UNION_IS_HALF;
             result.uValue = IEEE754_AssembleHalf(uSingleSign,
-                                                 uHalfSignificand,
+                                                 HALF_QUIET_NAN_BIT,
                                                  HALF_EXPONENT_INF_OR_NAN);
-
          } else {
-            /* --- IS UNCONVERTABLE NAN --- */
-            result.uSize   = IEEE754_UNION_IS_SINGLE;
-            result.uValue  = uSingle;
+            /* The NaN can only be converted if no payload bits are lost
+             * per RFC 8949 section 4.1 that defines Preferred
+             * Serializaton. Note that Deterministically Encode CBOR in
+             * section 4.2 allows for some variation of this rule, but at
+             * the moment this implementation is of Preferred
+             * Serialization, not CDE. As of December 2023, we are also
+             * expecting an update to CDE. This code may need to be
+             * updated for CDE.
+             */
+            uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
+            if(uDroppedBits == 0) {
+               /* --- IS CONVERTABLE NAN --- */
+               uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+               result.uSize  = IEEE754_UNION_IS_HALF;
+               result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                                    uHalfSignificand,
+                                                    HALF_EXPONENT_INF_OR_NAN);
+
+            } else {
+               /* --- IS UNCONVERTABLE NAN --- */
+               result.uSize   = IEEE754_UNION_IS_SINGLE;
+               result.uValue  = uSingle;
+            }
          }
       }
    } else {
@@ -495,7 +502,7 @@
  * This handles all subnormals and NaN payloads.
  */
 static IEEE754_union
-IEEE754_DoubleToSingle(double d)
+IEEE754_DoubleToSingle(const double d)
 {
    IEEE754_union Result;
    int64_t       nExponentDifference;
@@ -514,7 +521,6 @@
    const uint64_t uDoubleSign             = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
    const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;
 
-
     if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
         if(uDoubleSignificand == 0) {
             /* --- IS ZERO --- */
@@ -619,7 +625,9 @@
 
 /* Public function; see ieee754.h */
 IEEE754_union
-IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision)
+IEEE754_DoubleToSmaller(const double d,
+                        const int    bAllowHalfPrecision,
+                        const int    bNoNanPayload)
 {
    IEEE754_union result;
 
@@ -629,15 +637,228 @@
       /* Cast to uint32_t is OK, because value was just successfully
        * converted to single. */
       float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
-      result = IEEE754_SingleToHalf(uSingle);
+      result = IEEE754_SingleToHalf(uSingle, bNoNanPayload);
    }
 
    return result;
 }
 
 
-#else /* QCBOR_DISABLE_PREFERRED_FLOAT */
+static int
+IEEE754_Private_CountNonZeroBits(int nMax, uint64_t uTarget)
+{
+   int      nNonZeroBitsCount;
+   uint64_t uMask;
 
-int ieee754_dummy_place_holder;
+   for(nNonZeroBitsCount = nMax; nNonZeroBitsCount > 0; nNonZeroBitsCount--) {
+      uMask = (0x01UL << nMax) >> nNonZeroBitsCount;
+      if(uMask & uTarget) {
+         break;
+      }
+   }
+   return nNonZeroBitsCount;
+}
+
+
+/* Public function; see ieee754.h */
+struct IEEE754_ToInt
+IEEE754_DoubleToInt(const double d)
+{
+   int64_t              nNonZeroBitsCount;
+   struct IEEE754_ToInt Result;
+   uint64_t             uInteger;
+
+   /* Pull the three parts out of the double-precision float. Most
+    * work is done with uint64_t which helps avoid integer promotions
+    * and static analyzer complaints.
+    */
+   const uint64_t uDouble                 = CopyDoubleToUint64(d);
+   const uint64_t uDoubleBiasedExponent   = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
+   /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */
+   const int64_t  nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
+   const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;
+
+   if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
+      if(uDoubleSignificand == 0) {
+         /* --- POSITIVE AND NEGATIVE ZERO --- */
+         Result.integer.un_signed = 0;
+         Result.type              = IEEE754_ToInt_IS_UINT;
+      } else {
+         /* --- SUBNORMAL --- */
+         Result.type = IEEE754_ToInt_NO_CONVERSION;
+      }
+   } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
+      if(uDoubleSignificand != 0) {
+         /* --- NAN --- */
+         Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */
+      } else  {
+         /* --- INIFINITY --- */
+         Result.type = IEEE754_ToInt_NO_CONVERSION;
+      }
+   } else if(nDoubleUnbiasedExponent < 0 ||
+             (nDoubleUnbiasedExponent >= ((uDouble & DOUBLE_SIGN_MASK) ? 63 : 64))) {
+      /* --- Exponent out of range --- */
+      Result.type = IEEE754_ToInt_NO_CONVERSION;
+   } else {
+      /* Count down from 52 to the number of bits that are not zero in
+       * the significand. This counts from the least significant bit
+       * until a non-zero bit is found to know if it is a whole
+       * number.
+       *
+       * Conversion only fails when the input is too large or is not a
+       * whole number, never because of lack of precision because
+       * 64-bit integers always have more precision than the 52-bits
+       * of a double.
+       */
+      nNonZeroBitsCount = IEEE754_Private_CountNonZeroBits(DOUBLE_NUM_SIGNIFICAND_BITS, uDoubleSignificand);
+
+      if(nNonZeroBitsCount && nNonZeroBitsCount > nDoubleUnbiasedExponent) {
+         /* --- Not a whole number --- */
+         Result.type = IEEE754_ToInt_NO_CONVERSION;
+      } else {
+         /* --- CONVERTABLE WHOLE NUMBER --- */
+         /* Add in the one that is implied in normal floats */
+         uInteger = uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS);
+         /* Factor in the exponent */
+         if(nDoubleUnbiasedExponent < DOUBLE_NUM_SIGNIFICAND_BITS) {
+            /* Numbers less than 2^52 with up to 52 significant bits */
+            uInteger >>= DOUBLE_NUM_SIGNIFICAND_BITS - nDoubleUnbiasedExponent;
+         } else {
+            /* Numbers greater than 2^52 with at most 52 significant bits */
+            uInteger <<= nDoubleUnbiasedExponent - DOUBLE_NUM_SIGNIFICAND_BITS;
+         }
+         if(uDouble & DOUBLE_SIGN_MASK) {
+            /* Cast safe because exponent range check above */
+            Result.integer.is_signed = -((int64_t)uInteger);
+            Result.type              = IEEE754_ToInt_IS_INT;
+         } else {
+            Result.integer.un_signed = uInteger;
+            Result.type              = IEEE754_ToInt_IS_UINT;
+         }
+      }
+   }
+
+   return Result;
+}
+
+
+/* Public function; see ieee754.h */
+struct IEEE754_ToInt
+IEEE754_SingleToInt(const float f)
+{
+   int32_t              nNonZeroBitsCount;
+   struct IEEE754_ToInt Result;
+   uint64_t             uInteger;
+
+   /* Pull the three parts out of the single-precision float. Most
+    * work is done with uint32_t which helps avoid integer promotions
+    * and static analyzer complaints.
+    */
+   const uint32_t uSingle                 = CopyFloatToUint32(f);
+   const uint32_t uSingleBiasedExponent   = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
+   /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */
+   const int32_t  nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
+   const uint32_t uSingleleSignificand    = uSingle & SINGLE_SIGNIFICAND_MASK;
+
+   if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
+      if(uSingleleSignificand == 0 && !(uSingle & SINGLE_SIGN_MASK)) {
+         /* --- POSITIVE AND NEGATIVE ZERO --- */
+         Result.integer.un_signed = 0;
+         Result.type              = IEEE754_ToInt_IS_UINT;
+      } else {
+         /* --- Subnormal --- */
+         Result.type = IEEE754_ToInt_NO_CONVERSION;
+      }
+   } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
+      /* --- NAN or INFINITY --- */
+      if(uSingleleSignificand != 0) {
+         Result.type = IEEE754_ToInt_NaN; /* dCBOR doesn't care about payload */
+      } else  {
+         Result.type = IEEE754_ToInt_NO_CONVERSION;
+      }
+   } else if(nSingleUnbiasedExponent < 0 ||
+             (nSingleUnbiasedExponent >= ((uSingle & SINGLE_SIGN_MASK) ? 63 : 64))) {
+      /* --- Exponent out of range --- */
+       Result.type = IEEE754_ToInt_NO_CONVERSION;
+    } else {
+      /* Count down from 23 to the number of bits that are not zero in
+       * the significand. This counts from the least significant bit
+       * until a non-zero bit is found.
+       *
+       * Conversion only fails when the input is too large or is not a
+       * whole number, never because of lack of precision because
+       * 64-bit integers always have more precision than the 52-bits
+       * of a double.
+       */
+      nNonZeroBitsCount = IEEE754_Private_CountNonZeroBits(SINGLE_NUM_SIGNIFICAND_BITS, uSingleleSignificand);
+
+      if(nNonZeroBitsCount && nNonZeroBitsCount > nSingleUnbiasedExponent) {
+         /* --- Not a whole number --- */
+         Result.type = IEEE754_ToInt_NO_CONVERSION;
+      } else {
+         /* --- CONVERTABLE WHOLE NUMBER --- */
+         /* Add in the one that is implied in normal floats */
+         uInteger = uSingleleSignificand + (1ULL << SINGLE_NUM_SIGNIFICAND_BITS);
+         /* Factor in the exponent */
+         if(nSingleUnbiasedExponent < SINGLE_NUM_SIGNIFICAND_BITS) {
+            /* Numbers less than 2^23 with up to 23 significant bits */
+            uInteger >>= SINGLE_NUM_SIGNIFICAND_BITS - nSingleUnbiasedExponent;
+         } else {
+            /* Numbers greater than 2^23 with at most 23 significant bits*/
+            uInteger <<= nSingleUnbiasedExponent - SINGLE_NUM_SIGNIFICAND_BITS;
+         }
+         if(uSingle & SINGLE_SIGN_MASK) {
+            Result.integer.is_signed = -((int64_t)uInteger);
+            Result.type              = IEEE754_ToInt_IS_INT;
+         } else {
+            Result.integer.un_signed = uInteger;
+            Result.type              = IEEE754_ToInt_IS_UINT;
+         }
+      }
+   }
+
+   return Result;
+}
 
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
+
+
+
+/* Public function; see ieee754.h */
+int
+IEEE754_IsNotStandardDoubleNaN(const double d)
+{
+   const uint64_t uDouble                 = CopyDoubleToUint64(d);
+   const uint64_t uDoubleBiasedExponent   = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
+   /* Cast safe because of mask above; exponents < DOUBLE_EXPONENT_MAX */
+   const int64_t  nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
+   const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;
+
+   if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN &&
+      uDoubleSignificand != 0 &&
+      uDoubleSignificand != DOUBLE_QUIET_NAN_BIT) {
+      return 1;
+   } else {
+      return 0;
+   }
+}
+
+
+/* Public function; see ieee754.h */
+int
+IEEE754_IsNotStandardSingleNaN(const float f)
+{
+   const uint32_t uSingle                 = CopyFloatToUint32(f);
+   const uint32_t uSingleBiasedExponent   = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
+   /* Cast safe because of mask above; exponents < SINGLE_EXPONENT_MAX */
+   const int32_t  nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
+   const uint32_t uSingleleSignificand    = uSingle & SINGLE_SIGNIFICAND_MASK;
+
+   if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN &&
+      uSingleleSignificand != 0 &&
+      uSingleleSignificand != SINGLE_QUIET_NAN_BIT) {
+      return 1;
+   } else {
+      return 0;
+   }
+}