Floating point tests and fixes related to #ifdefs
diff --git a/src/ieee754.c b/src/ieee754.c
index 8b2efa8..216cd00 100644
--- a/src/ieee754.c
+++ b/src/ieee754.c
@@ -400,74 +400,11 @@
}
-// Public function; see ieee754.h
-double IEEE754_FloatToDouble(uint32_t uFloat)
-{
- // Pull out the three parts of the single-precision float. Do all
- // the work in 64 bits because that is what the end result is. It
- // may give smaller code size and will keep static analyzers
- // happier.
- const uint64_t uSingleSignificand = uFloat & SINGLE_SIGNIFICAND_MASK;
- const int64_t nSingleUnBiasedExponent = (int64_t)((uFloat & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
- const uint64_t uSingleSign = (uFloat & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
-
- // Make the three parts of hte single-precision number
- uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
- if(nSingleUnBiasedExponent == SINGLE_EXPONENT_ZERO) {
- // 0 or subnormal
- uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
- if(uSingleSignificand) {
- // Subnormal case
- uDoubleBiasedExponent = -SINGLE_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS + 1;
- // A single-precision subnormal can always be converted to
- // a normal double-precision float because the ranges line
- // up
- uDoubleSignificand = uSingleSignificand;
- // Shift bits from right of the decimal to left, reducing
- // the exponent by 1 each time
- do {
- uDoubleSignificand <<= 1;
- uDoubleBiasedExponent--;
- // TODO: is this right? Where does 0x400 come from?
- } while ((uDoubleSignificand & 0x400) == 0);
- uDoubleSignificand &= SINGLE_SIGNIFICAND_MASK;
- uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
- } else {
- // Just zero
- uDoubleSignificand = 0;
- }
- } else if(nSingleUnBiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
- // NaN or Inifinity
- uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
- if(uSingleSignificand) {
- // NaN
- // First preserve the NaN payload from half to single
- // TODO: check this
- uDoubleSignificand = uSingleSignificand & ~SINGLE_QUIET_NAN_BIT;
- if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
- // Next, set qNaN if needed since half qNaN bit is not copied above
- uDoubleSignificand |= DOUBLE_QUIET_NAN_BIT;
- }
- } else {
- // Infinity
- uDoubleSignificand = 0;
- }
- } else {
- // Normal number
- uDoubleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
- uDoubleSignificand = uSingleSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
- }
- uDoubleSign = uSingleSign;
-
-
- // Shift the 3 parts into place as a double-precision
- const uint64_t uDouble = uDoubleSignificand |
- (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
- (uDoubleSign << DOUBLE_SIGN_SHIFT);
- return CopyUint64ToDouble(uDouble);
-}
-
+/*
+ IEEE754_FloatToDouble(uint32_t uFloat) was created but is not needed. It can be retrieved from
+github history if needed.
+*/
diff --git a/src/ieee754.h b/src/ieee754.h
index d614825..d37532a 100644
--- a/src/ieee754.h
+++ b/src/ieee754.h
@@ -91,14 +91,6 @@
double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
-/*
- Convert float to double-precision without using any
- floating-point HW or compiler-supplied SW.
- This is a loss-less conversion.
- */
-double IEEE754_FloatToDouble(uint32_t ufloat);
-
-
// Both tags the value and gives the size
#define IEEE754_UNION_IS_HALF 2
#define IEEE754_UNION_IS_SINGLE 4
diff --git a/src/qcbor_decode.c b/src/qcbor_decode.c
index b5140c1..280a99a 100644
--- a/src/qcbor_decode.c
+++ b/src/qcbor_decode.c
@@ -797,25 +797,43 @@
case HALF_PREC_FLOAT:
#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
+ // Half-precision is returned as a double.
// The cast to uint16_t is safe because the encoded value
// was 16 bits. It was widened to 64 bits to be passed in here.
pDecodedItem->val.dfnum = IEEE754_HalfToDouble((uint16_t)uNumber);
pDecodedItem->uDataType = QCBOR_TYPE_DOUBLE;
#else
- nReturn = QCBOR_ERR_HALF_PRECISION_UNSUPPORTED;
+ nReturn = QCBOR_ERR_HALF_PRECISION_DISABLED;
#endif
break;
case SINGLE_PREC_FLOAT:
-#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
+ // Single precision is normally returned as a double
+ // since double is widely supported, there is no loss of
+ // precision, it makes it easy for the caller in
+ // most cases and it can be converted back to single
+ // with no loss of precision
+ //
// The cast to uint32_t is safe because the encoded value
// was 32 bits. It was widened to 64 bits to be passed in here.
- pDecodedItem->val.dfnum = IEEE754_FloatToDouble((uint32_t)uNumber);
- pDecodedItem->uDataType = QCBOR_TYPE_DOUBLE;
+ {
+ const float f = UsefulBufUtil_CopyUint32ToFloat((uint32_t)uNumber);
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
+ // In the normal case, use HW to convert float to double.
+ pDecodedItem->val.dfnum = (double)f;
+ pDecodedItem->uDataType = QCBOR_TYPE_DOUBLE;
#else
- pDecodedItem->val.fnum = UsefulBufUtil_CopyUint32ToFloat((uint32_t)uNumber);
- pDecodedItem->uDataType = QCBOR_TYPE_FLOAT;
+ // Use of float HW is disabled, return as a float.
+ pDecodedItem->val.fnum = f;
+ pDecodedItem->uDataType = QCBOR_TYPE_FLOAT;
+
+ // IEEE754_FloatToDouble() could be used here to return
+ // as a double, but it adds object code and most likely
+ // anyone disabling FLOAT HW use doesn't care about
+ // floats and wants to save object code.
#endif
+ }
break;
+
case DOUBLE_PREC_FLOAT:
pDecodedItem->val.dfnum = UsefulBufUtil_CopyUint64ToDouble(uNumber);
pDecodedItem->uDataType = QCBOR_TYPE_DOUBLE;
@@ -1591,7 +1609,7 @@
requires floating point conversion to integers and
comparison which requires either floating point HW
or a SW library. */
- nReturn = QCBOR_ERR_FLOAT_DATE_UNSUPPORTED;
+ nReturn = QCBOR_ERR_FLOAT_DATE_DISABLED;
#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
break;
@@ -3445,7 +3463,7 @@
uint64_t uResult;
// Take the absolute value of the mantissa and convert to unsigned.
- // TODO: this should be possible in one intruction
+ // Improvement: this should be possible in one instruction
uint64_t uMantissa = nMantissa > 0 ? (uint64_t)nMantissa : (uint64_t)-nMantissa;
// Do the exponentiation of the positive mantissa
@@ -3557,25 +3575,7 @@
}
-static inline UsefulBufC ConvertIntToBigNum(uint64_t uInt, UsefulBuf Buffer)
-{
- while((uInt & 0xff00000000000000UL) == 0) {
- uInt = uInt << 8;
- };
- UsefulOutBuf UOB;
-
- UsefulOutBuf_Init(&UOB, Buffer);
-
- while(uInt) {
- const uint64_t xx = uInt & 0xff00000000000000UL;
- UsefulOutBuf_AppendByte(&UOB, (uint8_t)((uInt & 0xff00000000000000UL) >> 56));
- uInt = uInt << 8;
- (void)xx;
- }
-
- return UsefulOutBuf_OutUBuf(&UOB);
-}
#include "fenv.h"
@@ -3595,13 +3595,18 @@
static QCBORError ConvertInt64(const QCBORItem *pItem, uint32_t uConvertTypes, int64_t *pnValue)
{
switch(pItem->uDataType) {
- // TODO: float when ifdefs are set
+ case QCBOR_TYPE_FLOAT:
case QCBOR_TYPE_DOUBLE:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
// TODO: what about under/overflow here?
// Invokes the floating-point HW and/or compiler-added libraries
feclearexcept(FE_ALL_EXCEPT);
- *pnValue = llround(pItem->val.dfnum);
+ if(pItem->uDataType == QCBOR_TYPE_DOUBLE) {
+ *pnValue = llround(pItem->val.dfnum);
+ } else {
+ *pnValue = lroundf(pItem->val.fnum);
+ }
if(fetestexcept(FE_INVALID)) {
// TODO: better error code
return QCBOR_ERR_CONVERSION_UNDER_OVER_FLOW;
@@ -3609,6 +3614,9 @@
} else {
return QCBOR_ERR_UNEXPECTED_TYPE;
}
+#else
+ return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
break;
case QCBOR_TYPE_INT64:
@@ -3707,8 +3715,6 @@
*/
static QCBORError Int64ConvertAll(const QCBORItem *pItem, uint32_t uConvertTypes, int64_t *pnValue)
{
- QCBORError uErr;
-
switch(pItem->uDataType) {
case QCBOR_TYPE_POSBIGNUM:
@@ -3752,7 +3758,8 @@
case QCBOR_TYPE_DECIMAL_FRACTION_POS_BIGNUM:
if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
- int64_t nMantissa;
+ int64_t nMantissa;
+ QCBORError uErr;
uErr = ConvertPositiveBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
if(uErr) {
return uErr;
@@ -3768,7 +3775,8 @@
case QCBOR_TYPE_DECIMAL_FRACTION_NEG_BIGNUM:
if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
- int64_t nMantissa;
+ int64_t nMantissa;
+ QCBORError uErr;
uErr = ConvertNegativeBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
if(uErr) {
return uErr;
@@ -3784,7 +3792,8 @@
case QCBOR_TYPE_BIGFLOAT_POS_BIGNUM:
if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
- int64_t nMantissa;
+ int64_t nMantissa;
+ QCBORError uErr;
uErr = ConvertPositiveBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
if(uErr) {
return uErr;
@@ -3800,7 +3809,8 @@
case QCBOR_TYPE_BIGFLOAT_NEG_BIGNUM:
if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
- int64_t nMantissa;
+ int64_t nMantissa;
+ QCBORError uErr;
uErr = ConvertNegativeBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
if(uErr) {
return uErr;
@@ -3892,9 +3902,11 @@
static QCBORError ConvertUint64(const QCBORItem *pItem, uint32_t uConvertTypes, uint64_t *puValue)
{
switch(pItem->uDataType) {
- // TODO: type flaot
case QCBOR_TYPE_DOUBLE:
+ case QCBOR_TYPE_FLOAT:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
+ // TODO: this code needs work
feclearexcept(FE_ALL_EXCEPT);
double dRounded = round(pItem->val.dfnum);
// TODO: over/underflow
@@ -3912,6 +3924,9 @@
} else {
return QCBOR_ERR_UNEXPECTED_TYPE;
}
+#else
+ return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
break;
case QCBOR_TYPE_INT64:
@@ -4006,8 +4021,6 @@
*/
static QCBORError Uint64ConvertAll(const QCBORItem *pItem, uint32_t uConvertTypes, uint64_t *puValue)
{
- QCBORError uErr;
-
switch(pItem->uDataType) {
case QCBOR_TYPE_POSBIGNUM:
@@ -4053,7 +4066,8 @@
case QCBOR_TYPE_DECIMAL_FRACTION_POS_BIGNUM:
if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
// TODO: Would be better to convert to unsigned
- int64_t nMantissa;
+ int64_t nMantissa;
+ QCBORError uErr;
uErr = ConvertPositiveBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
if(uErr != QCBOR_SUCCESS) {
return uErr;
@@ -4078,7 +4092,8 @@
case QCBOR_TYPE_BIGFLOAT_POS_BIGNUM:
if(uConvertTypes & QCBOR_CONVERT_TYPE_DECIMAL_FRACTION) {
// TODO: Would be better to convert to unsigned
- int64_t nMantissa;
+ int64_t nMantissa;
+ QCBORError uErr;
uErr = ConvertPositiveBigNumToSigned(pItem->val.expAndMantissa.Mantissa.bigNum, &nMantissa);
if(uErr != QCBOR_SUCCESS) {
return uErr;
@@ -4176,7 +4191,20 @@
static QCBORError ConvertDouble(const QCBORItem *pItem, uint32_t uConvertTypes, double *pdValue)
{
switch(pItem->uDataType) {
- // TODO: float when ifdefs are set
+ case QCBOR_TYPE_FLOAT:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
+ if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
+ if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
+ *pdValue = (double)pItem->val.fnum;
+ } else {
+ return QCBOR_ERR_UNEXPECTED_TYPE;
+ }
+ }
+#else
+ return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif
+ break;
+
case QCBOR_TYPE_DOUBLE:
if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
if(uConvertTypes & QCBOR_CONVERT_TYPE_FLOAT) {
@@ -4188,6 +4216,7 @@
break;
case QCBOR_TYPE_INT64:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
if(uConvertTypes & QCBOR_CONVERT_TYPE_XINT64) {
// TODO: how does this work?
*pdValue = (double)pItem->val.int64;
@@ -4195,15 +4224,22 @@
} else {
return QCBOR_ERR_UNEXPECTED_TYPE;
}
+#else
+ return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
break;
case QCBOR_TYPE_UINT64:
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
if(uConvertTypes & QCBOR_CONVERT_TYPE_XINT64) {
*pdValue = (double)pItem->val.uint64;
} else {
return QCBOR_ERR_UNEXPECTED_TYPE;
}
break;
+#else
+ return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
default:
return QCBOR_ERR_UNEXPECTED_TYPE;
@@ -4272,7 +4308,7 @@
}
-
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
static double ConvertBigNumToDouble(const UsefulBufC BigNum)
{
double dResult;
@@ -4289,15 +4325,17 @@
return dResult;
}
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
+
static QCBORError DoubleConvertAll(const QCBORItem *pItem, uint32_t uConvertTypes, double *pdValue)
{
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
/*
https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html
*/
switch(pItem->uDataType) {
- // TODO: type float
#ifndef QCBOR_CONFIG_DISABLE_EXP_AND_MANTISSA
case QCBOR_TYPE_DECIMAL_FRACTION:
@@ -4380,6 +4418,14 @@
}
return QCBOR_SUCCESS;
+
+#else
+ (void)pItem;
+ (void)uConvertTypes;
+ (void)pdValue;
+ return QCBOR_ERR_HW_FLOAT_DISABLED;
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
+
}
@@ -4455,6 +4501,27 @@
#ifndef QCBOR_CONFIG_DISABLE_EXP_AND_MANTISSA
+static inline UsefulBufC ConvertIntToBigNum(uint64_t uInt, UsefulBuf Buffer)
+{
+ while((uInt & 0xff00000000000000UL) == 0) {
+ uInt = uInt << 8;
+ };
+
+ UsefulOutBuf UOB;
+
+ UsefulOutBuf_Init(&UOB, Buffer);
+
+ while(uInt) {
+ const uint64_t xx = uInt & 0xff00000000000000UL;
+ UsefulOutBuf_AppendByte(&UOB, (uint8_t)((uInt & 0xff00000000000000UL) >> 56));
+ uInt = uInt << 8;
+ (void)xx;
+ }
+
+ return UsefulOutBuf_OutUBuf(&UOB);
+}
+
+
static QCBORError MantissaAndExponentTypeHandler(QCBORDecodeContext *pMe,
TagSpecification TagSpec,
QCBORItem *pItem)