Floating point tests and fixes related to #ifdefs

commit: f7c0adb4e92590f6a4fe08c6d4867f3b42890d59 [log] [tgz]
author: Laurence Lundblade <lgl@securitytheory.com> Sat Aug 08 20:20:58 2020 -0700
committer: Laurence Lundblade <lgl@securitytheory.com> Sat Aug 08 20:20:58 2020 -0700
tree: 4a258576c17f9268781f4933928c8800f54b7a95
parent: 440806365e9ab1f679542874b0fe0ae4fae3e0ca [diff]
diff --git a/src/ieee754.c b/src/ieee754.c
index 8b2efa8..216cd00 100644
--- a/src/ieee754.c
+++ b/src/ieee754.c

@@ -400,74 +400,11 @@
 }
 
 
-// Public function; see ieee754.h
-double IEEE754_FloatToDouble(uint32_t uFloat)
-{
-    // Pull out the three parts of the single-precision float.  Do all
-    // the work in 64 bits because that is what the end result is.  It
-    // may give smaller code size and will keep static analyzers
-    // happier.
-    const uint64_t uSingleSignificand      = uFloat & SINGLE_SIGNIFICAND_MASK;
-    const int64_t  nSingleUnBiasedExponent = (int64_t)((uFloat & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
-    const uint64_t uSingleSign             = (uFloat & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
 
-
-    // Make the three parts of hte single-precision number
-    uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
-    if(nSingleUnBiasedExponent == SINGLE_EXPONENT_ZERO) {
-        // 0 or subnormal
-        uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
-        if(uSingleSignificand) {
-            // Subnormal case
-            uDoubleBiasedExponent = -SINGLE_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS + 1;
-            // A single-precision subnormal can always be converted to
-            // a normal double-precision float because the ranges line
-            // up
-            uDoubleSignificand = uSingleSignificand;
-            // Shift bits from right of the decimal to left, reducing
-            // the exponent by 1 each time
-            do {
-                uDoubleSignificand <<= 1;
-                uDoubleBiasedExponent--;
-                // TODO: is this right? Where does 0x400 come from?
-            } while ((uDoubleSignificand & 0x400) == 0);
-            uDoubleSignificand &= SINGLE_SIGNIFICAND_MASK;
-            uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
-        } else {
-            // Just zero
-            uDoubleSignificand = 0;
-        }
-    } else if(nSingleUnBiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
-        // NaN or Inifinity
-        uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
-        if(uSingleSignificand) {
-            // NaN
-            // First preserve the NaN payload from half to single
-            // TODO: check this
-            uDoubleSignificand = uSingleSignificand & ~SINGLE_QUIET_NAN_BIT;
-            if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
-                // Next, set qNaN if needed since half qNaN bit is not copied above
-                uDoubleSignificand |= DOUBLE_QUIET_NAN_BIT;
-            }
-        } else {
-            // Infinity
-            uDoubleSignificand = 0;
-        }
-    } else {
-        // Normal number
-        uDoubleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
-        uDoubleSignificand    = uSingleSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
-    }
-    uDoubleSign = uSingleSign;
-
-
-    // Shift the 3 parts into place as a double-precision
-    const uint64_t uDouble = uDoubleSignificand |
-                            (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
-                            (uDoubleSign << DOUBLE_SIGN_SHIFT);
-    return CopyUint64ToDouble(uDouble);
-}
-
+/*
+ IEEE754_FloatToDouble(uint32_t uFloat) was created but is not needed. It can be retrieved from
+github history if needed.
+*/
 
 
 

diff --git a/src/ieee754.h b/src/ieee754.h
index d614825..d37532a 100644
--- a/src/ieee754.h
+++ b/src/ieee754.h

@@ -91,14 +91,6 @@
 double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
 
 
-/*
- Convert float to double-precision without using any
- floating-point HW or compiler-supplied SW.
- This is a loss-less conversion.
- */
-double IEEE754_FloatToDouble(uint32_t ufloat);
-
-
 // Both tags the value and gives the size
 #define IEEE754_UNION_IS_HALF   2
 #define IEEE754_UNION_IS_SINGLE 4

diff --git a/src/qcbor_decode.c b/src/qcbor_decode.c
index b5140c1..280a99a 100644
--- a/src/qcbor_decode.c
+++ b/src/qcbor_decode.c

@@ -797,25 +797,43 @@
 
       case HALF_PREC_FLOAT:
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
+         // Half-precision is returned as a double.
          // The cast to uint16_t is safe because the encoded value
          // was 16 bits. It was widened to 64 bits to be passed in here.
          pDecodedItem->val.dfnum = IEEE754_HalfToDouble((uint16_t)uNumber);
          pDecodedItem->uDataType = QCBOR_TYPE_DOUBLE;
 #else
-         nReturn = QCBOR_ERR_HALF_PRECISION_UNSUPPORTED;
+         nReturn = QCBOR_ERR_HALF_PRECISION_DISABLED;
 #endif
          break;
       case SINGLE_PREC_FLOAT:
-#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
+         // Single precision is normally returned as a double
+         // since double is widely supported, there is no loss of
+         // precision, it makes it easy for the caller in
+         // most cases and it can be converted back to single
+         // with no loss of precision
+         //
          // The cast to uint32_t is safe because the encoded value
          // was 32 bits. It was widened to 64 bits to be passed in here.
-         pDecodedItem->val.dfnum = IEEE754_FloatToDouble((uint32_t)uNumber);
-         pDecodedItem->uDataType = QCBOR_TYPE_DOUBLE;
+         {
+            const float f = UsefulBufUtil_CopyUint32ToFloat((uint32_t)uNumber);
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
+            // In the normal case, use HW to convert float to double.
+            pDecodedItem->val.dfnum = (double)f;
+            pDecodedItem->uDataType = QCBOR_TYPE_DOUBLE;
 #else
-         pDecodedItem->val.fnum = UsefulBufUtil_CopyUint32ToFloat((uint32_t)uNumber);
-         pDecodedItem->uDataType = QCBOR_TYPE_FLOAT;
+            // Use of float HW is disabled, return as a float.
+            pDecodedItem->val.fnum = f;
+            pDecodedItem->uDataType = QCBOR_TYPE_FLOAT;
+
+            // IEEE754_FloatToDouble() could be used here to return
+            // as a double, but it adds object code and most likely
+            // anyone disabling FLOAT HW use doesn't care about
+            // floats and wants to save object code.
 #endif
+         }
          break;
+
       case DOUBLE_PREC_FLOAT:
          pDecodedItem->val.dfnum = UsefulBufUtil_CopyUint64ToDouble(uNumber);
          pDecodedItem->uDataType = QCBOR_TYPE_DOUBLE;
@@ -1591,7 +1609,7 @@
           requires floating point conversion to integers and
           comparison which requires either floating point HW
           or a SW library. */
-         nReturn = QCBOR_ERR_FLOAT_DATE_UNSUPPORTED;
+         nReturn = QCBOR_ERR_FLOAT_DATE_DISABLED;
 #endif /* QCBOR_DISABLE_FLOAT_HW_USE */
          break;
 
@@ -3445,7 +3463,7 @@
    uint64_t uResult;
 
    // Take the absolute value of the mantissa and convert to unsigned.
-   // TODO: this should be possible in one intruction
+   // Improvement: this should be possible in one instruction
    uint64_t uMantissa = nMantissa > 0 ? (uint64_t)nMantissa : (uint64_t)-nMantissa;
 
    // Do the exponentiation of the positive mantissa
@@ -3557,25 +3575,7 @@
 }
 
 
-static inline UsefulBufC ConvertIntToBigNum(uint64_t uInt, UsefulBuf Buffer)
-{
-   while((uInt & 0xff00000000000000UL) == 0) {
-      uInt = uInt << 8;
-   };
 
-   UsefulOutBuf UOB;
-
-   UsefulOutBuf_Init(&UOB, Buffer);
-
-   while(uInt) {
-      const uint64_t xx = uInt & 0xff00000000000000UL;
-      UsefulOutBuf_AppendByte(&UOB, (uint8_t)((uInt & 0xff00000000000000UL) >> 56));
-      uInt = uInt << 8;
-      (void)xx;
-   }
-
-   return UsefulOutBuf_OutUBuf(&UOB);
-}
 
 #include "fenv.h"
 
@@ -3595,13 +3595,18 @@
 static QCBORError ConvertInt64(const QCBORItem *pItem, uint32_t uConvertTypes, int64_t *pnValue)
 {
    switch(pItem->uDataType) {
-      // TODO: float when ifdefs are set
+      case QCBOR_TYPE_FLOAT:
       case QCBOR_TYPE_DOUBLE:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
          if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
             // TODO: what about under/overflow here?
             // Invokes the floating-point HW and/or compiler-added libraries
             feclearexcept(FE_ALL_EXCEPT);
-            *pnValue = llround(pItem->val.dfnum);
+            if(pItem->uDataType == QCBOR_TYPE_DOUBLE) {
+               *pnValue = llround(pItem->val.dfnum);
+            } else {
+               *pnValue = lroundf(pItem->val.fnum);
+            }
             if(fetestexcept(FE_INVALID)) {
                // TODO: better error code
                return QCBOR_ERR_CONVERSION_UNDER_OVER_FLOW;
@@ -3609,6 +3614,9 @@
          } else {
             return  QCBOR_ERR_UNEXPECTED_TYPE;
          }
+#else
+         return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
          break;
 
       case QCBOR_TYPE_INT64:
@@ -3707,8 +3715,6 @@
  */
 static QCBORError Int64ConvertAll(const QCBORItem *pItem, uint32_t uConvertTypes, int64_t *pnValue)
 {
-   QCBORError uErr;
-
    switch(pItem->uDataType) {
 
       case QCBOR_TYPE_POSBIGNUM:
@@ -3752,7 +3758,8 @@
 
       case QCBOR_TYPE_DECIMAL_FRACTION_POS_BIGNUM:
          if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
-            int64_t nMantissa;
+            int64_t    nMantissa;
+            QCBORError uErr;
             uErr = ConvertPositiveBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
             if(uErr) {
                return uErr;
@@ -3768,7 +3775,8 @@
 
       case QCBOR_TYPE_DECIMAL_FRACTION_NEG_BIGNUM:
          if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
-            int64_t nMantissa;
+            int64_t    nMantissa;
+            QCBORError uErr;
             uErr = ConvertNegativeBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
             if(uErr) {
                return uErr;
@@ -3784,7 +3792,8 @@
 
       case QCBOR_TYPE_BIGFLOAT_POS_BIGNUM:
          if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
-            int64_t nMantissa;
+            int64_t    nMantissa;
+            QCBORError uErr;
             uErr = ConvertPositiveBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
             if(uErr) {
                return uErr;
@@ -3800,7 +3809,8 @@
 
       case QCBOR_TYPE_BIGFLOAT_NEG_BIGNUM:
          if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
-            int64_t nMantissa;
+            int64_t    nMantissa;
+            QCBORError uErr;
             uErr = ConvertNegativeBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
             if(uErr) {
                return uErr;
@@ -3892,9 +3902,11 @@
 static QCBORError ConvertUint64(const QCBORItem *pItem, uint32_t uConvertTypes, uint64_t *puValue)
 {
    switch(pItem->uDataType) {
-           // TODO: type flaot
         case QCBOR_TYPE_DOUBLE:
+        case QCBOR_TYPE_FLOAT:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
            if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
+              // TODO: this code needs work
               feclearexcept(FE_ALL_EXCEPT);
               double dRounded = round(pItem->val.dfnum);
               // TODO: over/underflow
@@ -3912,6 +3924,9 @@
            } else {
               return QCBOR_ERR_UNEXPECTED_TYPE;
            }
+#else
+         return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
            break;
 
         case QCBOR_TYPE_INT64:
@@ -4006,8 +4021,6 @@
 */
 static QCBORError Uint64ConvertAll(const QCBORItem *pItem, uint32_t uConvertTypes, uint64_t *puValue)
 {
-   QCBORError uErr;
-
    switch(pItem->uDataType) {
 
       case QCBOR_TYPE_POSBIGNUM:
@@ -4053,7 +4066,8 @@
       case QCBOR_TYPE_DECIMAL_FRACTION_POS_BIGNUM:
          if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
             // TODO: Would be better to convert to unsigned
-            int64_t nMantissa;
+            int64_t    nMantissa;
+            QCBORError uErr;
             uErr = ConvertPositiveBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
             if(uErr != QCBOR_SUCCESS) {
                return uErr;
@@ -4078,7 +4092,8 @@
       case QCBOR_TYPE_BIGFLOAT_POS_BIGNUM:
          if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
             // TODO: Would be better to convert to unsigned
-            int64_t nMantissa;
+            int64_t    nMantissa;
+            QCBORError uErr;
             uErr =  ConvertPositiveBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
             if(uErr != QCBOR_SUCCESS) {
                return uErr;
@@ -4176,7 +4191,20 @@
 static QCBORError ConvertDouble(const QCBORItem *pItem, uint32_t uConvertTypes, double *pdValue)
 {
    switch(pItem->uDataType) {
-      // TODO: float when ifdefs are set
+      case QCBOR_TYPE_FLOAT:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
+         if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
+            if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
+               *pdValue = (double)pItem->val.fnum;
+            } else {
+               return QCBOR_ERR_UNEXPECTED_TYPE;
+            }
+         }
+#else
+         return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif
+         break;
+
       case QCBOR_TYPE_DOUBLE:
          if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
             if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
@@ -4188,6 +4216,7 @@
          break;
 
       case QCBOR_TYPE_INT64:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
          if(uConvertTypes & QCBOR_CONVERT_TYPE_XINT64) {
             // TODO: how does this work?
             *pdValue = (double)pItem->val.int64;
@@ -4195,15 +4224,22 @@
          } else {
             return QCBOR_ERR_UNEXPECTED_TYPE;
          }
+#else
+         return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
          break;
 
       case QCBOR_TYPE_UINT64:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
          if(uConvertTypes & QCBOR_CONVERT_TYPE_XINT64) {
              *pdValue = (double)pItem->val.uint64;
          } else {
             return QCBOR_ERR_UNEXPECTED_TYPE;
          }
          break;
+#else
+         return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
 
       default:
          return QCBOR_ERR_UNEXPECTED_TYPE;
@@ -4272,7 +4308,7 @@
 }
 
 
-
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
 static double ConvertBigNumToDouble(const UsefulBufC BigNum)
 {
    double dResult;
@@ -4289,15 +4325,17 @@
 
    return dResult;
 }
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
+
 
 static QCBORError DoubleConvertAll(const QCBORItem *pItem, uint32_t uConvertTypes, double *pdValue)
 {
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
    /*
    https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html
 
    */
    switch(pItem->uDataType) {
-         // TODO: type float
 
 #ifndef QCBOR_CONFIG_DISABLE_EXP_AND_MANTISSA
       case QCBOR_TYPE_DECIMAL_FRACTION:
@@ -4380,6 +4418,14 @@
    }
 
    return QCBOR_SUCCESS;
+
+#else
+   (void)pItem;
+   (void)uConvertTypes;
+   (void)pdValue;
+   return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
+
 }
 
 
@@ -4455,6 +4501,27 @@
 
 
 #ifndef QCBOR_CONFIG_DISABLE_EXP_AND_MANTISSA
+static inline UsefulBufC ConvertIntToBigNum(uint64_t uInt, UsefulBuf Buffer)
+{
+   while((uInt & 0xff00000000000000UL) == 0) {
+      uInt = uInt << 8;
+   };
+
+   UsefulOutBuf UOB;
+
+   UsefulOutBuf_Init(&UOB, Buffer);
+
+   while(uInt) {
+      const uint64_t xx = uInt & 0xff00000000000000UL;
+      UsefulOutBuf_AppendByte(&UOB, (uint8_t)((uInt & 0xff00000000000000UL) >> 56));
+      uInt = uInt << 8;
+      (void)xx;
+   }
+
+   return UsefulOutBuf_OutUBuf(&UOB);
+}
+
+
 static QCBORError MantissaAndExponentTypeHandler(QCBORDecodeContext *pMe,
                                                  TagSpecification    TagSpec,
                                                  QCBORItem          *pItem)
commit	f7c0adb4e92590f6a4fe08c6d4867f3b42890d59	[log] [tgz]
author	Laurence Lundblade <lgl@securitytheory.com>	Sat Aug 08 20:20:58 2020 -0700
committer	Laurence Lundblade <lgl@securitytheory.com>	Sat Aug 08 20:20:58 2020 -0700
tree	4a258576c17f9268781f4933928c8800f54b7a95
parent	440806365e9ab1f679542874b0fe0ae4fae3e0ca [diff]