Fix preferred serialization of subnormals (#192)
Preferred Serialization now fully supports conversion to/from half, single and double subnormals. This includes NaN payloads.
The tests for floating-point are much better organized and give greater coverage.
IEEE 754 code is better organized and cleaner.
* Fix preferred serialization of subnormals (checkpoint)
* Check point progress
* Preferred float mostly working and tests passing
* added NaN tests
* Fix up ieee754.h; a few other compiler warnings
* decoding NaN payloads fix; rework half-double; tests
* Code tidyness
* indent to 3, not 4
* TODO's are done in other test; code tidy
* test running with float HW use disabled
* Remove / rearrange float tests
* Fix full float ifdef test fan out
* Code tidiness; sort out final TODO's
---------
Co-authored-by: Laurence Lundblade <lgl@securitytheory.com>
diff --git a/QCBOR.xcodeproj/project.pbxproj b/QCBOR.xcodeproj/project.pbxproj
index c1e6cd7..6e69fbd 100644
--- a/QCBOR.xcodeproj/project.pbxproj
+++ b/QCBOR.xcodeproj/project.pbxproj
@@ -150,8 +150,8 @@
0FA9BEB9216DC7AD00BA646B /* qcbor_encode_tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = qcbor_encode_tests.h; path = test/qcbor_encode_tests.h; sourceTree = "<group>"; };
0FA9BEBB216DE31700BA646B /* UsefulBuf_Tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UsefulBuf_Tests.h; path = test/UsefulBuf_Tests.h; sourceTree = "<group>"; };
0FA9BEBC216DE31700BA646B /* UsefulBuf_Tests.c */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.c; name = UsefulBuf_Tests.c; path = test/UsefulBuf_Tests.c; sourceTree = "<group>"; tabWidth = 3; };
- E73B57572161CA680080D658 /* ieee754.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ieee754.h; path = src/ieee754.h; sourceTree = "<group>"; };
- E73B57582161CA690080D658 /* ieee754.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ieee754.c; path = src/ieee754.c; sourceTree = "<group>"; };
+ E73B57572161CA680080D658 /* ieee754.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.h; name = ieee754.h; path = src/ieee754.h; sourceTree = "<group>"; tabWidth = 3; };
+ E73B57582161CA690080D658 /* ieee754.c */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.c; name = ieee754.c; path = src/ieee754.c; sourceTree = "<group>"; tabWidth = 3; };
E73B575A2161CA7C0080D658 /* float_tests.c */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.c; name = float_tests.c; path = test/float_tests.c; sourceTree = "<group>"; tabWidth = 3; };
E73B575B2161CA7C0080D658 /* half_to_double_from_rfc7049.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = half_to_double_from_rfc7049.h; path = test/half_to_double_from_rfc7049.h; sourceTree = "<group>"; };
E73B575C2161CA7C0080D658 /* float_tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = float_tests.h; path = test/float_tests.h; sourceTree = "<group>"; };
diff --git a/src/ieee754.c b/src/ieee754.c
index a8079f8..2d98159 100644
--- a/src/ieee754.c
+++ b/src/ieee754.c
@@ -1,71 +1,63 @@
-/*==============================================================================
- ieee754.c -- floating-point conversion between half, double & single-precision
-
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
- Copyright (c) 2021, Arm Limited. All rights reserved.
-
- SPDX-License-Identifier: BSD-3-Clause
-
- See BSD-3-Clause license in README.md
-
- Created on 7/23/18
- =============================================================================*/
+/* ==========================================================================
+ * ieee754.c -- floating-point conversion between half, double & single-precision
+ *
+ * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * See BSD-3-Clause license in README.md
+ *
+ * Created on 7/23/18
+ * ========================================================================== */
/*
- Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
- QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
+ * Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
+ * QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
*/
#include "qcbor/qcbor_common.h"
#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
#include "ieee754.h"
-#include <string.h> // For memcpy()
+#include <string.h> /* For memcpy() */
/*
- This code is written for clarity and verifiability, not for size, on
- the assumption that the optimizer will do a good job. The LLVM
- optimizer, -Os, does seem to do the job and the resulting object code
- is smaller from combining code for the many different cases (normal,
- subnormal, infinity, zero...) for the conversions. GCC is no where near
- as good.
-
- This code has really long lines and is much easier to read because of
- them. Some coding guidelines prefer 80 column lines (can they not afford
- big displays?). It would make this code much worse even to wrap at 120
- columns.
-
- Dead stripping is also really helpful to get code size down when
- floating-point encoding is not needed. (If this is put in a library
- and linking is against the library, then dead stripping is automatic).
-
- This code works solely using shifts and masks and thus has no
- dependency on any math libraries. It can even work if the CPU doesn't
- have any floating-point support, though that isn't the most useful
- thing to do.
-
- The memcpy() dependency is only for CopyFloatToUint32() and friends
- which only is needed to avoid type punning when converting the actual
- float bits to an unsigned value so the bit shifts and masks can work.
- */
-
-/*
- The references used to write this code:
-
- - IEEE 754-2008, particularly section 3.6 and 6.2.1
-
- - https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
-
- - https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
-
- - https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
-
- - https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
+ * This code has long lines and is easier to read because of
+ * them. Some coding guidelines prefer 80 column lines (can they not
+ * afford big displays?).
+ *
+ * This code works solely using shifts and masks and thus has no
+ * dependency on any math libraries. It can even work if the CPU
+ * doesn't have any floating-point support, though that isn't the most
+ * useful thing to do.
+ *
+ * The memcpy() dependency is only for CopyFloatToUint32() and friends
+ * which only is needed to avoid type punning when converting the
+ * actual float bits to an unsigned value so the bit shifts and masks
+ * can work.
+ *
+ * The references used to write this code:
+ *
+ * IEEE 754-2008, particularly section 3.6 and 6.2.1
+ *
+ * https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
+ *
+ * https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
+ *
+ * https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
+ *
+ * https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
+ *
+ * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
+ * needed. It can be retrieved from github history if needed.
*/
-// ----- Half Precsion -----------
+
+
+/* ----- Half Precsion ----------- */
#define HALF_NUM_SIGNIFICAND_BITS (10)
#define HALF_NUM_EXPONENT_BITS (5)
#define HALF_NUM_SIGN_BITS (1)
@@ -74,16 +66,16 @@
#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
-#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits // 0x03ff
+#define HALF_SIGNIFICAND_MASK (0x3ffU) // The lower 10 bits
#define HALF_EXPONENT_MASK (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
-#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // // 0x8000 1 bit of sign
+#define HALF_SIGN_MASK (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
#define HALF_QUIET_NAN_BIT (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
/* Biased Biased Unbiased Use
- 0x00 0 -15 0 and subnormal
- 0x01 1 -14 Smallest normal exponent
- 0x1e 30 15 Largest normal exponent
- 0x1F 31 16 NaN and Infinity */
+ * 0x00 0 -15 0 and subnormal
+ * 0x01 1 -14 Smallest normal exponent
+ * 0x1e 30 15 Largest normal exponent
+ * 0x1F 31 16 NaN and Infinity */
#define HALF_EXPONENT_BIAS (15)
#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
@@ -91,7 +83,7 @@
#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
-// ------ Single-Precision --------
+/* ------ Single-Precision -------- */
#define SINGLE_NUM_SIGNIFICAND_BITS (23)
#define SINGLE_NUM_EXPONENT_BITS (8)
#define SINGLE_NUM_SIGN_BITS (1)
@@ -106,19 +98,19 @@
#define SINGLE_QUIET_NAN_BIT (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
/* Biased Biased Unbiased Use
- 0x0000 0 -127 0 and subnormal
- 0x0001 1 -126 Smallest normal exponent
- 0x7f 127 0 1
- 0xfe 254 127 Largest normal exponent
- 0xff 255 128 NaN and Infinity */
+ * 0x0000 0 -127 0 and subnormal
+ * 0x0001 1 -126 Smallest normal exponent
+ * 0x7f 127 0 1
+ * 0xfe 254 127 Largest normal exponent
+ * 0xff 255 128 NaN and Infinity */
#define SINGLE_EXPONENT_BIAS (127)
-#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
-#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
-#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
-#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
+#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS)
+#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1)
+#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS)
+#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1)
-// --------- Double-Precision ----------
+/* --------- Double-Precision ---------- */
#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
#define DOUBLE_NUM_EXPONENT_BITS (11)
#define DOUBLE_NUM_SIGN_BITS (1)
@@ -134,372 +126,518 @@
/* Biased Biased Unbiased Use
- 0x00000000 0 -1023 0 and subnormal
- 0x00000001 1 -1022 Smallest normal exponent
- 0x000007fe 2046 1023 Largest normal exponent
- 0x000007ff 2047 1024 NaN and Infinity */
+ * 0x00000000 0 -1023 0 and subnormal
+ * 0x00000001 1 -1022 Smallest normal exponent
+ * 0x000007fe 2046 1023 Largest normal exponent
+ * 0x000007ff 2047 1024 NaN and Infinity */
#define DOUBLE_EXPONENT_BIAS (1023)
-#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
-#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
-#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
-#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
+#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS)
+#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1)
+#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS)
+#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1)
+
/*
- Convenient functions to avoid type punning, compiler warnings and
- such. The optimizer reduces them to a simple assignment. This is a
- crusty corner of C. It shouldn't be this hard.
-
- These are also in UsefulBuf.h under a different name. They are copied
- here to avoid a dependency on UsefulBuf.h. There is no object code
- size impact because these always optimze down to a simple assignment.
+ * Convenient functions to avoid type punning, compiler warnings and
+ * such. The optimizer reduces them to a simple assignment. This is a
+ * crusty corner of C. It shouldn't be this hard.
+ *
+ * These are also in UsefulBuf.h under a different name. They are copied
+ * here to avoid a dependency on UsefulBuf.h. There is no object code
+ * size impact because these always optimze down to a simple assignment.
*/
-static inline uint32_t CopyFloatToUint32(float f)
+static inline uint32_t
+CopyFloatToUint32(float f)
{
- uint32_t u32;
- memcpy(&u32, &f, sizeof(uint32_t));
- return u32;
+ uint32_t u32;
+ memcpy(&u32, &f, sizeof(uint32_t));
+ return u32;
}
-static inline uint64_t CopyDoubleToUint64(double d)
+static inline uint64_t
+CopyDoubleToUint64(double d)
{
- uint64_t u64;
- memcpy(&u64, &d, sizeof(uint64_t));
- return u64;
+ uint64_t u64;
+ memcpy(&u64, &d, sizeof(uint64_t));
+ return u64;
}
-static inline double CopyUint64ToDouble(uint64_t u64)
+static inline double
+CopyUint64ToDouble(uint64_t u64)
{
- double d;
- memcpy(&d, &u64, sizeof(uint64_t));
- return d;
+ double d;
+ memcpy(&d, &u64, sizeof(uint64_t));
+ return d;
+}
+
+static inline float
+CopyUint32ToSingle(uint32_t u32)
+{
+ float f;
+ memcpy(&f, &u32, sizeof(uint32_t));
+ return f;
}
-// Public function; see ieee754.h
-uint16_t IEEE754_FloatToHalf(float f)
-{
- // Pull the three parts out of the single-precision float
- const uint32_t uSingle = CopyFloatToUint32(f);
- const int32_t nSingleUnbiasedExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
- const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
- const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
- // Now convert the three parts to half-precision.
-
- // All works is done on uint32_t with conversion to uint16_t at
- // the end. This avoids integer promotions that static analyzers
- // complain about and reduces code size.
- uint32_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
-
- if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
- // +/- Infinity and NaNs -- single biased exponent is 0xff
- uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
- if(!uSingleSignificand) {
- // Infinity
- uHalfSignificand = 0;
- } else {
- // Copy the LSBs of the NaN payload that will fit from the
- // single to the half
- uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
- if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
- // It's a qNaN; copy the qNaN bit
- uHalfSignificand |= HALF_QUIET_NAN_BIT;
- } else {
- // It's an sNaN; make sure the significand is not zero
- // so it stays a NaN This is needed because not all
- // significand bits are copied from single
- if(!uHalfSignificand) {
- // Set the LSB. This is what wikipedia shows for
- // sNAN.
- uHalfSignificand |= 0x01;
- }
- }
- }
- } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
- // 0 or a subnormal number -- singled biased exponent is 0
- uHalfBiasedExponent = 0;
- uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
- } else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
- // Exponent is too large to express in half-precision; round
- // up to infinity
- uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
- uHalfSignificand = 0;
- } else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
- // Exponent is too small to express in half-precision normal;
- // make it a half-precision subnormal
- uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
- uHalfSignificand = 0;
- // Could convert some of these values to a half-precision
- // subnormal, but the layer above this will never use it. See
- // layer above. There is code to do this in github history
- // for this file, but it was removed because it was never
- // invoked.
- } else {
- // The normal case, exponent is in range for half-precision
- uHalfBiasedExponent = (uint32_t)(nSingleUnbiasedExponent + HALF_EXPONENT_BIAS);
- uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
- }
- uHalfSign = uSingleSign;
-
- // Put the 3 values in the right place for a half precision
- const uint32_t uHalfPrecision = uHalfSignificand |
- (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
- (uHalfSign << HALF_SIGN_SHIFT);
- // Cast is safe because all the masks and shifts above work to
- // make a half precision value which is only 16 bits.
- return (uint16_t)uHalfPrecision;
-}
-
-
-// Public function; see ieee754.h
-uint16_t IEEE754_DoubleToHalf(double d)
-{
- // Pull the three parts out of the double-precision float
- const uint64_t uDouble = CopyDoubleToUint64(d);
- const int64_t nDoubleUnbiasedExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
- const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
- const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
-
- // Now convert the three parts to half-precision.
-
- // All works is done on uint64_t with conversion to uint16_t at
- // the end. This avoids integer promotions that static analyzers
- // complain about. Other options are for these to be unsigned int
- // or fast_int16_t. Code size doesn't vary much between all these
- // options for 64-bit LLVM, 64-bit GCC and 32-bit Armv7 LLVM.
- uint64_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
-
- if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
- // +/- Infinity and NaNs -- single biased exponent is 0xff
- uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
- if(!uDoubleSignificand) {
- // Infinity
- uHalfSignificand = 0;
- } else {
- // Copy the LSBs of the NaN payload that will fit from the
- // double to the half
- uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
- if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
- // It's a qNaN; copy the qNaN bit
- uHalfSignificand |= HALF_QUIET_NAN_BIT;
- } else {
- // It's an sNaN; make sure the significand is not zero
- // so it stays a NaN This is needed because not all
- // significand bits are copied from single
- if(!uHalfSignificand) {
- // Set the LSB. This is what wikipedia shows for
- // sNAN.
- uHalfSignificand |= 0x01;
- }
- }
- }
- } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
- // 0 or a subnormal number -- double biased exponent is 0
- uHalfBiasedExponent = 0;
- uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
- } else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
- // Exponent is too large to express in half-precision; round
- // up to infinity; TODO, is this really true?
- uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
- uHalfSignificand = 0;
- } else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
- // Exponent is too small to express in half-precision; round
- // down to zero
- uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
- uHalfSignificand = 0;
- // Could convert some of these values to a half-precision
- // subnormal, but the layer above this will never use it. See
- // layer above. There is code to do this in github history
- // for this file, but it was removed because it was never
- // invoked.
- } else {
- // The normal case, exponent is in range for half-precision
- uHalfBiasedExponent = (uint32_t)(nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS);
- uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
- }
- uHalfSign = uDoubleSign;
-
-
- // Put the 3 values in the right place for a half precision
- const uint64_t uHalfPrecision = uHalfSignificand |
- (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
- (uHalfSign << HALF_SIGN_SHIFT);
- // Cast is safe because all the masks and shifts above work to
- // make a half precision value which is only 16 bits.
- return (uint16_t)uHalfPrecision;
-}
-
-
-/*
- EEE754_HalfToFloat() was created but is not needed. It can be retrieved from
- github history if needed.
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uDoubleSign 0 if positive, 1 if negative
+ * @pararm[in] uDoubleSignificand Bits of the significand
+ * @param[in] nDoubleUnBiasedExponent Exponent
+ *
+ * This returns the bits for a single-precision float, a binary64
+ * as specified in IEEE754.
*/
-
-
-// Public function; see ieee754.h
-double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
+static double
+IEEE754_AssembleDouble(uint64_t uDoubleSign,
+ uint64_t uDoubleSignificand,
+ int64_t nDoubleUnBiasedExponent)
{
- // Pull out the three parts of the half-precision float. Do all
- // the work in 64 bits because that is what the end result is. It
- // may give smaller code size and will keep static analyzers
- // happier.
- const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
- const int64_t nHalfUnBiasedExponent = (int64_t)((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
- const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
+ uint64_t uDoubleBiasedExponent;
+
+ uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
+
+ return CopyUint64ToDouble(uDoubleSignificand |
+ (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
+ (uDoubleSign << DOUBLE_SIGN_SHIFT));
+}
- // Make the three parts of hte single-precision number
- uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
- if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
- // 0 or subnormal
- uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
- if(uHalfSignificand) {
- // Subnormal case
- uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
- // A half-precision subnormal can always be converted to a
- // normal double-precision float because the ranges line
- // up
- uDoubleSignificand = uHalfSignificand;
- // Shift bits from right of the decimal to left, reducing
- // the exponent by 1 each time
- do {
- uDoubleSignificand <<= 1;
- uDoubleBiasedExponent--;
- } while ((uDoubleSignificand & 0x400) == 0);
- uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
- uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+double
+IEEE754_HalfToDouble(uint16_t uHalfPrecision)
+{
+ uint64_t uDoubleSignificand;
+ int64_t nDoubleUnBiasedExponent;
+ double dResult;
+
+ /* Pull out the three parts of the half-precision float. Do all
+ * the work in 64 bits because that is what the end result is. It
+ * may give smaller code size and will keep static analyzers
+ * happier.
+ */
+ const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
+ const uint64_t uHalfBiasedExponent = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
+ const int64_t nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
+ const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
+
+ if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
+ /* 0 or subnormal */
+ if(uHalfSignificand) {
+ /* --- SUBNORMAL --- */
+ /* A half-precision subnormal can always be converted to a
+ * normal double-precision float because the ranges line up.
+ * The exponent of a subnormal starts out at the min exponent
+ * for a normal. As the sub normal significand bits are
+ * shifted, left to normalize, the exponent is
+ * decremented. Shifting continues until fully normalized.
+ */
+ nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
+ uDoubleSignificand = uHalfSignificand;
+ do {
+ uDoubleSignificand <<= 1;
+ nDoubleUnBiasedExponent--;
+ } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
+ /* A normal has an implied 1 in the most significant
+ * position that a subnormal doesn't. */
+ uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
+ /* Must shift into place for a double significand */
+ uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
+
+ dResult = IEEE754_AssembleDouble(uHalfSign,
+ uDoubleSignificand,
+ nDoubleUnBiasedExponent);
+ } else {
+ /* --- ZERO --- */
+ dResult = IEEE754_AssembleDouble(uHalfSign,
+ 0,
+ DOUBLE_EXPONENT_ZERO);
+ }
+ } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
+ /* NaN or Inifinity */
+ if(uHalfSignificand) {
+ /* --- NaN --- */
+ /* Half-precision payloads always fit into double precision
+ * payloads. They are shifted left the same as a normal
+ * number significand.
+ */
+ uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+ dResult = IEEE754_AssembleDouble(uHalfSign,
+ uDoubleSignificand,
+ DOUBLE_EXPONENT_INF_OR_NAN);
+ } else {
+ /* --- INFINITY --- */
+ dResult = IEEE754_AssembleDouble(uHalfSign,
+ 0,
+ DOUBLE_EXPONENT_INF_OR_NAN);
+ }
+ } else {
+ /* --- NORMAL NUMBER --- */
+ uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+ dResult = IEEE754_AssembleDouble(uHalfSign,
+ uDoubleSignificand,
+ nHalfUnBiasedExponent);
+ }
+
+ return dResult;
+}
+
+
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uHalfSign 0 if positive, 1 if negative
+ * @pararm[in] uHalfSignificand Bits of the significand
+ * @param[in] nHalfUnBiasedExponent Exponent
+ *
+ * This returns the bits for a single-precision float, a binary32 as
+ * specified in IEEE754. It is returned as a uint64_t rather than a
+ * uint32_t or a float for convenience of usage.
+ */
+static uint32_t
+IEEE754_AssembleHalf(uint32_t uHalfSign,
+ uint32_t uHalfSignificand,
+ int32_t nHalfUnBiasedExponent)
+{
+ uint32_t uHalfUnbiasedExponent;
+
+ uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);
+
+ return uHalfSignificand |
+ (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
+ (uHalfSign << HALF_SIGN_SHIFT);
+}
+
+
+/* Public function; see ieee754.h */
+IEEE754_union
+IEEE754_SingleToHalf(float f)
+{
+ IEEE754_union result;
+ uint32_t uDroppedBits;
+ int32_t nExponentDifference;
+ int32_t nShiftAmount;
+ uint32_t uHalfSignificand;
+
+ /* Pull the three parts out of the double-precision float Most work
+ * is done with uint32_t which helps avoid integer promotions and
+ * static analyzer complaints.
+ */
+ const uint32_t uSingle = CopyFloatToUint32(f);
+ const uint32_t uSingleBiasedExponent = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
+ const int32_t nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
+ const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
+ const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
+
+ if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
+ if(uSingleSignificand == 0) {
+ /* --- IS ZERO --- */
+ result.uSize = IEEE754_UNION_IS_HALF;
+ result.uValue = IEEE754_AssembleHalf(uSingleSign,
+ 0,
+ HALF_EXPONENT_ZERO);
+ } else {
+ /* --- IS SINGLE SUBNORMAL --- */
+ /* The largest single subnormal is slightly less than the
+ * largest single normal which is 2^-149 or
+ * 2.2040517676619426e-38. The smallest half subnormal is
+ * 2^-14 or 5.9604644775390625E-8. There is no overlap so
+ * single subnormals can't be converted to halfs of any sort.
+ */
+ result.uSize = IEEE754_UNION_IS_SINGLE;
+ result.uValue = uSingle;
+ }
+ } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
+ if(uSingleSignificand == 0) {
+ /* ---- IS INFINITY ---- */
+ result.uSize = IEEE754_UNION_IS_HALF;
+ result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
+ } else {
+ /* The NaN can only be converted if no payload bits are lost
+ * per RFC 8949 section 4.1 that defines Preferred
+ * Serializaton. Note that Deterministically Encode CBOR in
+ * section 4.2 allows for some variation of this rule, but at
+ * the moment this implementation is of Preferred
+ * Serialization, not CDE. As of December 2023, we are also
+ * expecting an update to CDE. This code may need to be
+ * updated for CDE.
+ */
+ uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
+ if(uDroppedBits == 0) {
+ /* --- IS CONVERTABLE NAN --- */
+ uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+ result.uSize = IEEE754_UNION_IS_HALF;
+ result.uValue = IEEE754_AssembleHalf(uSingleSign,
+ uHalfSignificand,
+ HALF_EXPONENT_INF_OR_NAN);
+
+ } else {
+ /* --- IS UNCONVERTABLE NAN --- */
+ result.uSize = IEEE754_UNION_IS_SINGLE;
+ result.uValue = uSingle;
+ }
+ }
+ } else {
+ /* ---- REGULAR NUMBER ---- */
+ /* A regular single can be converted to a regular half if the
+ * single's exponent is in the smaller range of a half and if no
+ * precision is lost in the significand.
+ */
+ if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
+ nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
+ (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
+ uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+
+ /* --- CONVERT TO HALF NORMAL --- */
+ result.uSize = IEEE754_UNION_IS_HALF;
+ result.uValue = IEEE754_AssembleHalf(uSingleSign,
+ uHalfSignificand,
+ nSingleUnbiasedExponent);
+ } else {
+ /* Unable to convert to a half normal. See if it can be
+ * converted to a half subnormal. To do that, the exponent
+ * must be in range and no precision can be lost in the
+ * signficand.
+ *
+ * This is more complicated because the number is not
+ * normalized. The signficand must be shifted proprotionally
+ * to the exponent and 1 must be added in. See
+ * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+ *
+ * Exponents -14 to -24 map to a shift of 0 to 10 of the
+ * significand. The largest value of a half subnormal has an
+ * exponent of -14. Subnormals are not normalized like
+ * normals meaning they lose precision as the numbers get
+ * smaller. Normals don't lose precision because the exponent
+ * allows all the bits of the significand to be significant.
+ */
+ /* The exponent of the largest possible half-precision
+ * subnormal is HALF_EXPONENT_MIN (-14). Exponents larger
+ * than this are normal and handled above. We're going to
+ * shift the significand right by at least this amount.
+ */
+ nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
+
+ /* In addition to the shift based on the exponent's value,
+ * the single significand has to be shifted right to fit into
+ * a half-precision significand */
+ nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+
+ /* Must add 1 in to the possible significand because there is
+ * an implied 1 for normal values and not for subnormal
+ * values. See equations here:
+ * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+ */
+ uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
+
+ /* If only zero bits get shifted out, this can be converted
+ * to subnormal */
+ if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
+ nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
+ uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
+ /* --- CONVERTABLE TO HALF SUBNORMAL --- */
+ result.uSize = IEEE754_UNION_IS_HALF;
+ result.uValue = IEEE754_AssembleHalf(uSingleSign,
+ uHalfSignificand,
+ HALF_EXPONENT_ZERO);
+ } else {
+ /* --- DO NOT CONVERT --- */
+ result.uSize = IEEE754_UNION_IS_SINGLE;
+ result.uValue = uSingle;
+ }
+ }
+ }
+
+ return result;
+}
+
+
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uSingleSign 0 if positive, 1 if negative
+ * @pararm[in] uSingleSignificand Bits of the significand
+ * @param[in] nSingleUnBiasedExponent Exponent
+ *
+ * This returns the bits for a single-precision float, a binary32 as
+ * specified in IEEE754. It is returned as a uint64_t rather than a
+ * uint32_t or a float for convenience of usage.
+ */
+static uint64_t
+IEEE754_AssembleSingle(uint64_t uSingleSign,
+ uint64_t uSingleSignificand,
+ int64_t nSingleUnBiasedExponent)
+{
+ uint64_t uSingleBiasedExponent;
+
+ uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);
+
+ return uSingleSignificand |
+ (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
+ (uSingleSign << SINGLE_SIGN_SHIFT);
+}
+
+
+/**
+ * @brief Convert a double-precision float to single-precision.
+ *
+ * @param[in] d The value to convert.
+ *
+ * @returns Either unconverted value or value converted to single-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
+ */
+static IEEE754_union
+IEEE754_DoubleToSingle(double d)
+{
+ IEEE754_union Result;
+ int64_t nExponentDifference;
+ int64_t nShiftAmount;
+ uint64_t uSingleSignificand;
+ uint64_t uDroppedBits;
+
+
+ /* Pull the three parts out of the double-precision float. Most
+ * work is done with uint64_t which helps avoid integer promotions
+ * and static analyzer complaints.
+ */
+ const uint64_t uDouble = CopyDoubleToUint64(d);
+ const uint64_t uDoubleBiasedExponent = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
+ const int64_t nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
+ const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
+ const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
+
+
+ if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
+ if(uDoubleSignificand == 0) {
+ /* --- IS ZERO --- */
+ Result.uSize = IEEE754_UNION_IS_SINGLE;
+ Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+ 0,
+ SINGLE_EXPONENT_ZERO);
} else {
- // Just zero
- uDoubleSignificand = 0;
- }
- } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
- // NaN or Inifinity
- uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
- if(uHalfSignificand) {
- // NaN
- // First preserve the NaN payload from half to single
- uDoubleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
- if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
- // Next, set qNaN if needed since half qNaN bit is not
- // copied above
- uDoubleSignificand |= DOUBLE_QUIET_NAN_BIT;
+ /* --- IS DOUBLE SUBNORMAL --- */
+ /* The largest double subnormal is slightly less than the
+ * largest double normal which is 2^-1022 or
+ * 2.2250738585072014e-308. The smallest single subnormal
+ * is 2^-149 or 1.401298464324817e-45. There is no
+ * overlap so double subnormals can't be converted to
+ * singles of any sort.
+ */
+ Result.uSize = IEEE754_UNION_IS_DOUBLE;
+ Result.uValue = uDouble;
+ }
+ } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
+ if(uDoubleSignificand == 0) {
+ /* ---- IS INFINITY ---- */
+ Result.uSize = IEEE754_UNION_IS_SINGLE;
+ Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+ 0,
+ SINGLE_EXPONENT_INF_OR_NAN);
+ } else {
+ /* The NaN can only be converted if no payload bits are
+ * lost per RFC 8949 section 4.1 that defines Preferred
+ * Serializaton. Note that Deterministically Encode CBOR
+ * in section 4.2 allows for some variation of this rule,
+ * but at the moment this implementation is of Preferred
+ * Serialization, not CDE. As of December 2023, we are
+ * also expecting an update to CDE. This code may need to
+ * be updated for CDE.
+ */
+ uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
+ if(uDroppedBits == 0) {
+ /* --- IS CONVERTABLE NAN --- */
+ uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+ Result.uSize = IEEE754_UNION_IS_SINGLE;
+ Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+ uSingleSignificand,
+ SINGLE_EXPONENT_INF_OR_NAN);
+ } else {
+ /* --- IS UNCONVERTABLE NAN --- */
+ Result.uSize = IEEE754_UNION_IS_DOUBLE;
+ Result.uValue = uDouble;
}
+ }
+ } else {
+ /* ---- REGULAR NUMBER ---- */
+ /* A regular double can be converted to a regular single if
+ * the double's exponent is in the smaller range of a single
+ * and if no precision is lost in the significand.
+ */
+ uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
+ if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
+ nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
+ uDroppedBits == 0) {
+ /* --- IS CONVERTABLE TO SINGLE --- */
+ uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+ Result.uSize = IEEE754_UNION_IS_SINGLE;
+ Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+ uSingleSignificand,
+ nDoubleUnbiasedExponent);
} else {
- // Infinity
- uDoubleSignificand = 0;
+ /* Unable to convert to a single normal. See if it can be
+ * converted to a single subnormal. To do that, the
+ * exponent must be in range and no precision can be lost
+ * in the signficand.
+ *
+ * This is more complicated because the number is not
+ * normalized. The signficand must be shifted
+ * proprotionally to the exponent and 1 must be added
+ * in. See
+ * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+ */
+ nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
+ nShiftAmount = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+ uSingleSignificand = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
+
+ if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
+ nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
+ uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
+ /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
+ Result.uSize = IEEE754_UNION_IS_SINGLE;
+ Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+ uSingleSignificand,
+ SINGLE_EXPONENT_ZERO);
+ } else {
+ /* --- CAN NOT BE CONVERTED --- */
+ Result.uSize = IEEE754_UNION_IS_DOUBLE;
+ Result.uValue = uDouble;
+ }
}
- } else {
- // Normal number
- uDoubleBiasedExponent = (uint64_t)(nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
- uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
}
- uDoubleSign = uHalfSign;
-
- // Shift the 3 parts into place as a double-precision
- const uint64_t uDouble = uDoubleSignificand |
- (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
- (uDoubleSign << DOUBLE_SIGN_SHIFT);
- return CopyUint64ToDouble(uDouble);
+ return Result;
}
-
-/*
- IEEE754_FloatToDouble(uint32_t uFloat) was created but is not needed. It can be retrieved from
-github history if needed.
-*/
-
-
-
-// Public function; see ieee754.h
-IEEE754_union IEEE754_FloatToSmallest(float f)
+/* Public function; see ieee754.h */
+IEEE754_union
+IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision)
{
- IEEE754_union result;
+ IEEE754_union result;
- // Pull the neeed two parts out of the single-precision float
- const uint32_t uSingle = CopyFloatToUint32(f);
- const int32_t nSingleExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
- const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
+ result = IEEE754_DoubleToSingle(d);
- // Bit mask that is the significand bits that would be lost when
- // converting from single-precision to half-precision
- const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
+ if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
+ /* Cast to uint32_t is OK, because value was just successfully
+ * converted to single. */
+ float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
+ result = IEEE754_SingleToHalf(uSingle);
+ }
- // Optimizer will re organize so there is only one call to
- // IEEE754_FloatToHalf() in the final code.
- if(uSingle == 0) {
- // Value is 0.0000, not a a subnormal
- result.uSize = IEEE754_UNION_IS_HALF;
- result.uValue = IEEE754_FloatToHalf(f);
- } else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
- // NaN, +/- infinity
- result.uSize = IEEE754_UNION_IS_HALF;
- result.uValue = IEEE754_FloatToHalf(f);
- } else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
- // Normal number in exponent range and precision won't be lost
- result.uSize = IEEE754_UNION_IS_HALF;
- result.uValue = IEEE754_FloatToHalf(f);
- } else {
- // Subnormal, exponent out of range, or precision will be lost
- result.uSize = IEEE754_UNION_IS_SINGLE;
- result.uValue = uSingle;
- }
-
- return result;
+ return result;
}
-// Public function; see ieee754.h
-IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
-{
- IEEE754_union result;
- // Pull the needed two parts out of the double-precision float
- const uint64_t uDouble = CopyDoubleToUint64(d);
- const int64_t nDoubleExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
- const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
+#else /* QCBOR_DISABLE_PREFERRED_FLOAT */
- // Masks to check whether dropped significand bits are zero or not
- const uint64_t uDroppedHalfBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
- const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
-
- // This will not convert to half-precion or single-precision
- // subnormals. Values that could be converted will be output as
- // the double they are or occasionally to a normal single. This
- // could be implemented, but it is more code and would rarely be
- // used and rarely reduce the output size.
-
- // The various cases
- if(d == 0.0) { // Take care of positive and negative zero
- // Value is 0.0000, not a a subnormal
- result.uSize = IEEE754_UNION_IS_HALF;
- result.uValue = IEEE754_DoubleToHalf(d);
- } else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
- // NaN, +/- infinity
- result.uSize = IEEE754_UNION_IS_HALF;
- result.uValue = IEEE754_DoubleToHalf(d);
- } else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedHalfBits))) {
- // Can convert to half without precision loss
- result.uSize = IEEE754_UNION_IS_HALF;
- result.uValue = IEEE754_DoubleToHalf(d);
- } else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
- // Can convert to single without precision loss
- result.uSize = IEEE754_UNION_IS_SINGLE;
- result.uValue = CopyFloatToUint32((float)d);
- } else {
- // Can't convert without precision loss
- result.uSize = IEEE754_UNION_IS_DOUBLE;
- result.uValue = uDouble;
- }
-
- return result;
-}
-
-#else
-
-int x;
+int ieee754_dummy_place_holder;
#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
diff --git a/src/ieee754.h b/src/ieee754.h
index d37532a..863019b 100644
--- a/src/ieee754.h
+++ b/src/ieee754.h
@@ -1,14 +1,14 @@
-/*==============================================================================
- ieee754.c -- floating-point conversion between half, double & single-precision
-
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
-
- SPDX-License-Identifier: BSD-3-Clause
-
- See BSD-3-Clause license in README.md
-
- Created on 7/23/18
- =============================================================================*/
+/* ==========================================================================
+ * ieee754.h -- Conversion between half, double & single-precision floats
+ *
+ * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * See BSD-3-Clause license in README.md
+ *
+ * Created on 7/23/18
+ * ========================================================================== */
#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
@@ -18,130 +18,109 @@
#include <stdint.h>
-
-/*
- General comments
-
- This is a complete in that it handles all conversion cases including
- +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN and NaN
- payloads.
-
- This conforms to IEEE 754-2008, but note that this doesn't specify
- conversions, just the encodings.
-
- NaN payloads are preserved with alignment on the LSB. The qNaN bit is
- handled differently and explicity copied. It is always the MSB of the
- significand. The NaN payload MSBs (except the qNaN bit) are truncated
- when going from double or single to half.
-
- TODO: what does the C cast do with NaN payloads from
- double to single? It probably depends entirely on the
- CPU.
-
- */
-
-/*
- Most simply just explicilty encode the type you want, single or
- double. This works easily everywhere since standard C supports both
- these types and so does qcbor. This encoder also supports half
- precision and there's a few ways to use it to encode floating-point
- numbers in less space.
-
- Without losing precision, you can encode a single or double such that
- the special values of 0, NaN and Infinity encode as half-precision.
- This CBOR decodoer and most others should handle this properly.
-
- If you don't mind losing precision, then you can use half-precision.
- One way to do this is to set up your environment to use
- ___fp_16. Some compilers and CPUs support it even though it is not
- standard C. What is nice about this is that your program will use
- less memory and floating-point operations like multiplying, adding
- and such will be faster.
-
- Another way to make use of half-precision is to represent the values
- in your program as single or double, but encode them in CBOR as
- half-precision. This cuts the size of the encoded messages by 2 or 4,
- but doesn't reduce memory needs or speed because you are still using
- single or double in your code.
-
+/** @file ieee754.h
+ *
+ * This implements floating-point conversion between half, single and
+ * double precision floating-point numbers, in particular convesion to
+ * smaller representation (e.g., double to single) that does not lose
+ * precision for CBOR preferred serialization.
+ *
+ * This implementation works entirely with shifts and masks and does
+ * not require any floating-point HW or library.
+ *
+ * This conforms to IEEE 754-2008, but note that it doesn't specify
+ * conversions, just the encodings.
+ *
+ * This is complete, supporting +/- infinity, +/- zero, subnormals and
+ * NaN payloads. NaN payloads are converted to smaller by dropping the
+ * right most bits if they are zero and shifting to the right. If the
+ * rightmost bits are not zero the conversion is not performed. When
+ * converting from smaller to larger, the payload is shifted left and
+ * zero-padded. This is what is specified by CBOR preferred
+ * serialization and what modern HW conversion instructions do. CBOR
+ * CDE handling for NaN is not clearly specified, but upcoming
+ * documents may clarify this.
+ *
+ * There is no special handling of silent and quiet NaNs. It probably
+ * isn't necessary to transmit these special NaNs as there purpose is
+ * more for propgating errors up through some calculation. In many
+ * cases the handlng of the NaN payload will work for silent and quiet
+ * NaNs.
+ *
+ * A previous version of this was usable as a general library for
+ * conversion. This version is reduced to what is needed for CBOR.
*/
-
-/*
- Convert single-precision float to half-precision float. Precision
- and NaN payload bits will be lost. Too-large values will round up to
- infinity and too small to zero.
+/**
+ * @brief Convert half-precision float to double-precision float.
+ *
+ * @param[in] uHalfPrecision Half-prevision number to convert.
+ *
+ * @returns double-presion value.
+ *
+ * This is a lossless conversion because every half-precision value
+ * can be represented as a double. There is no error condition.
+ *
+ * There is no half-precision type in C, so it is represented here as
+ * a @c uint16_t. The bits of @c uHalfPrecision are as described for
+ * half-precision by IEEE 754.
*/
-uint16_t IEEE754_FloatToHalf(float f);
+double
+IEEE754_HalfToDouble(uint16_t uHalfPrecision);
-/*
- Convert double-precision float to half-precision float. Precision
- and NaN payload bits will be lost. Too-large values will round up to
- infinity and too small to zero.
+/** Holds a floating-point value that could be half, single or
+ * double-precision. The value is in a @c uint64_t that may be copied
+ * to a float or double. Simply casting uValue will usually work but
+ * may generate compiler or static analyzer warnings. Using
+ * UsefulBufUtil_CopyUint64ToDouble() or
+ * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
+ * any extra code).
*/
-uint16_t IEEE754_DoubleToHalf(double d);
-
-
-/*
- Convert half-precision float to double-precision float.
- This is a loss-less conversion.
- */
-double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
-
-
-// Both tags the value and gives the size
-#define IEEE754_UNION_IS_HALF 2
-#define IEEE754_UNION_IS_SINGLE 4
-#define IEEE754_UNION_IS_DOUBLE 8
-
typedef struct {
- uint8_t uSize; // One of IEEE754_IS_xxxx
- uint64_t uValue;
+ enum {IEEE754_UNION_IS_HALF = 2,
+ IEEE754_UNION_IS_SINGLE = 4,
+ IEEE754_UNION_IS_DOUBLE = 8,
+ } uSize; /* Size of uValue */
+ uint64_t uValue;
} IEEE754_union;
-/*
- Converts double-precision to single-precision or half-precision if
- possible without loss of precisions. If not, leaves it as a
- double. Only converts to single-precision unless bAllowHalfPrecision
- is set.
+/**
+ * @brief Convert a double to either single or half-precision.
+ *
+ * @param[in] d The value to convert.
+ * @param[in] bAllowHalfPrecision If true, convert to either half or
+ * single precision.
+ *
+ * @returns Unconverted value, or value converted to single or half-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
*/
-IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
+IEEE754_union
+IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);
-/*
- Converts double-precision to single-precision if possible without
- loss of precision. If not, leaves it as a double.
+
+/**
+ * @brief Convert a single-precision float to half-precision.
+ *
+ * @param[in] f The value to convert.
+ *
+ * @returns Either unconverted value or value converted to half-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
*/
-static inline IEEE754_union IEEE754_DoubleToSmall(double d)
-{
- return IEEE754_DoubleToSmallestInternal(d, 0);
-}
-
-
-/*
- Converts double-precision to single-precision or half-precision if
- possible without loss of precisions. If not, leaves it as a double.
- */
-static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
-{
- return IEEE754_DoubleToSmallestInternal(d, 1);
-}
-
-
-/*
- Converts single-precision to half-precision if possible without loss
- of precision. If not leaves as single-precision.
- */
-IEEE754_union IEEE754_FloatToSmallest(float f);
+IEEE754_union
+IEEE754_SingleToHalf(float f);
#endif /* ieee754_h */
-
#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
-
-
-
-
diff --git a/src/qcbor_encode.c b/src/qcbor_encode.c
index 53df657..f52692a 100644
--- a/src/qcbor_encode.c
+++ b/src/qcbor_encode.c
@@ -768,9 +768,9 @@
void QCBOREncode_AddDouble(QCBOREncodeContext *me, double dNum)
{
#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
- const IEEE754_union uNum = IEEE754_DoubleToSmallest(dNum);
+ const IEEE754_union uNum = IEEE754_DoubleToSmaller(dNum, true);
- QCBOREncode_AddType7(me, uNum.uSize, uNum.uValue);
+ QCBOREncode_AddType7(me, (uint8_t)uNum.uSize, uNum.uValue);
#else /* QCBOR_DISABLE_PREFERRED_FLOAT */
QCBOREncode_AddDoubleNoPreferred(me, dNum);
#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
@@ -794,9 +794,9 @@
void QCBOREncode_AddFloat(QCBOREncodeContext *me, float fNum)
{
#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
- const IEEE754_union uNum = IEEE754_FloatToSmallest(fNum);
+ const IEEE754_union uNum = IEEE754_SingleToHalf(fNum);
- QCBOREncode_AddType7(me, uNum.uSize, uNum.uValue);
+ QCBOREncode_AddType7(me, (uint8_t)uNum.uSize, uNum.uValue);
#else /* QCBOR_DISABLE_PREFERRED_FLOAT */
QCBOREncode_AddFloatNoPreferred(me, fNum);
#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
diff --git a/test/float_tests.c b/test/float_tests.c
index 2bf5fad..1a7ade1 100644
--- a/test/float_tests.c
+++ b/test/float_tests.c
@@ -1,32 +1,33 @@
-/*==============================================================================
- float_tests.c -- tests for float and conversion to/from half-precision
-
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
- Copyright (c) 2021, Arm Limited. All rights reserved.
-
- SPDX-License-Identifier: BSD-3-Clause
-
- See BSD-3-Clause license in README.md
-
- Created on 9/19/18
- =============================================================================*/
+/* ==========================================================================
+ * float_tests.c -- tests for float and conversion to/from half-precision
+ *
+ * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * See BSD-3-Clause license in README.md
+ *
+ * Created on 9/19/18
+ * ========================================================================= */
#include "float_tests.h"
#include "qcbor/qcbor_encode.h"
#include "qcbor/qcbor_decode.h"
#include "qcbor/qcbor_spiffy_decode.h"
-#include <math.h> // For INFINITY and NAN and isnan()
+#include <math.h> /* For INFINITY and NAN and isnan() */
-/* Make a test results code that includes three components
- * Return code is
- * xxxyyyzzz where zz is the error code, yy is the test number and zz is
- * check being performed
+
+/* Make a test results code that includes three components. Return code
+ * is xxxyyyzzz where zz is the error code, yy is the test number and
+ * zz is check being performed
*/
-static inline int32_t MakeTestResultCode(uint32_t uTestCase,
- uint32_t uTestNumber,
- QCBORError uErrorCode)
+static inline int32_t
+MakeTestResultCode(uint32_t uTestCase,
+ uint32_t uTestNumber,
+ QCBORError uErrorCode)
{
uint32_t uCode = (uTestCase * 1000000) +
(uTestNumber * 1000) +
@@ -40,585 +41,567 @@
#include "half_to_double_from_rfc7049.h"
-/*
- Half-precision values that are input to test half-precision decoding
+struct DoubleTestCase {
+ double dNumber;
+ double fNumber;
+ UsefulBufC Preferred;
+ UsefulBufC NotPreferred;
+ UsefulBufC CDE;
+ UsefulBufC DCBOR;
+};
- As decoded by http://cbor.me
- {"zero": 0.0,
- "infinitity": Infinity,
- "negative infinitity": -Infinity,
- "NaN": NaN,
- "one": 1.0,
- "one third": 0.333251953125,
- "largest half-precision": 65504.0,
- "too-large half-precision": Infinity,
- "smallest subnormal": 5.960464477539063e-8,
- "smallest normal": 0.00006097555160522461,
- "biggest subnormal": 0.00006103515625,
- "subnormal single": 0.0,
- 3: -2.0,
- 4: NaN,
- 5: NaN,
- 6: NaN,
- 7: NaN}
+/* Boundaries for all destination conversions to test at.
+ *
+ * smallest subnormal single 1.401298464324817e-45 2^^-149
+ * largest subnormal single 1.1754942106924411e-38 2^^-126
+ * smallest normal single 1.1754943508222875e-38
+ * largest single 3.4028234663852886E+38
+ *
+ * smallest subnormal half 5.9604644775390625E-8
+ * largest subnormal half 6.097555160522461E-5
+ * smallest normal half 6.103515625E-5
+ * largest half 65504.0
+ *
+ * Boundaries for origin conversions
+ * smallest subnormal double 5.0e-324 2^^-1074
+ * largest subnormal double
+ * smallest normal double 2.2250738585072014e-308 2^^-1022
+ * largest normal double 1.7976931348623157e308 2^^-1023
*/
-static const uint8_t spExpectedHalf[] = {
- 0xB1,
- 0x64,
- 0x7A, 0x65, 0x72, 0x6F,
- 0xF9, 0x00, 0x00, // half-precision 0.000
- 0x6A,
- 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
- 0xF9, 0x7C, 0x00, // Infinity
- 0x73,
- 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x69, 0x6E,
- 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
- 0xF9, 0xFC, 0x00, // -Inifinity
- 0x63,
- 0x4E, 0x61, 0x4E,
- 0xF9, 0x7E, 0x00, // NaN
- 0x63,
- 0x6F, 0x6E, 0x65,
- 0xF9, 0x3C, 0x00, // 1.0
- 0x69,
- 0x6F, 0x6E, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64,
- 0xF9, 0x35, 0x55, // half-precsion one third 0.333251953125
- 0x76,
- 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61, 0x6C,
- 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
- 0xF9, 0x7B, 0xFF, // largest half-precision 65504.0
- 0x78, 0x18,
- 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x20, 0x68,
- 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69,
- 0x6F, 0x6E,
- 0xF9, 0x7C, 0x00, // Infinity
- 0x72,
- 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75,
- 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
- 0xF9, 0x00, 0x01, // Smallest half-precision subnormal 0.000000059604645
- 0x71,
- 0x62, 0x69, 0x67, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62,
- 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
- 0xF9, 0x03, 0xFF, // Largest half-precision subnormal 0.0000609755516
- 0x6F,
- 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E, 0x6F,
- 0x72, 0x6D, 0x61, 0x6C,
- 0xF9, 0x04, 0x00, // Smallest half-precision normal 0.000061988
- 0x70,
- 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x73,
- 0x69, 0x6E, 0x67, 0x6C, 0x65,
- 0xF9, 0x00, 0x00,
- 0x03,
- 0xF9, 0xC0, 0x00, // -2
- 0x04,
- 0xF9, 0x7E, 0x00, // qNaN
- 0x05,
- 0xF9, 0x7C, 0x01, // sNaN
- 0x06,
- 0xF9, 0x7E, 0x0F, // qNaN with payload 0x0f
- 0x07,
- 0xF9, 0x7C, 0x0F, // sNaN with payload 0x0f
+
+/* Always four lines per test case so shell scripts can process into
+ * other formats. CDE and DCBOR standards are not complete yet,
+ * encodings are a guess. C string literals are used because they
+ * are the shortest notation. They are used __with a length__ . Null
+ * termination doesn't work because * there are zero bytes.
+ */
+static const struct DoubleTestCase DoubleTestCases[] = {
+ /* Zero */
+ {0.0, 0.0f,
+ {"\xF9\x00\x00", 3}, {"\xFB\x00\x00\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x00\x00", 3}, {"\xF9\x00\x00", 3}},
+
+ /* Negative Zero */
+ {-0.0, -0.0f,
+ {"\xF9\x80\x00", 3}, {"\xFB\x80\x00\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x80\x00", 3}, {"\xF9\x80\x00", 3}},
+
+ /* NaN */
+ {NAN, NAN,
+ {"\xF9\x7E\x00", 3}, {"\xFB\x7F\xF8\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x7E\x00", 3}, {"\xF9\x7E\x00", 3}},
+
+ /* Infinity */
+ {INFINITY, INFINITY,
+ {"\xF9\x7C\x00", 3}, {"\xFB\x7F\xF0\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x7C\x00", 3}, {"\xF9\x7C\x00", 3}},
+
+ /* Negative Infinity */
+ {-INFINITY, -INFINITY,
+ {"\xF9\xFC\x00", 3}, {"\xFB\xFF\xF0\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\xFC\x00", 3}, {"\xF9\xFC\x00", 3}},
+
+ /* 1.0 */
+ {1.0, 1.0f,
+ {"\xF9\x3C\x00", 3}, {"\xFB\x3F\xF0\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x3C\x00", 3}, {"\xF9\x3C\x00", 3}},
+
+ /* -2.0 -- a negative number that is not zero */
+ {-2.0, -2.0f,
+ {"\xF9\xC0\x00", 3}, {"\xFB\xC0\x00\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\xC0\x00", 3}, {"\xF9\x3C\x00", 3}},
+
+ /* 1/3 */
+ {0.333251953125, 0.333251953125f,
+ {"\xF9\x35\x55", 3}, {"\xFB\x3F\xD5\x54\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x35\x55", 3}, {"\xF9\x35\x55", 3}},
+
+ /* 5.9604644775390625E-8 -- smallest half-precision subnormal */
+ {5.9604644775390625E-8, 0.0f,
+ {"\xF9\x00\x01", 3}, {"\xFB\x3E\x70\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x00\x01", 3}, {"\xF9\x00\x01", 3}},
+
+ /* 3.0517578125E-5 -- a half-precision subnormal */
+ {3.0517578125E-5, 0.0f,
+ {"\xF9\x02\x00", 3}, {"\xFB\x3F\x00\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x02\x00", 3}, {"\xF9\x02\x00", 3}},
+
+ /* 6.097555160522461E-5 -- largest half-precision subnormal */
+ {6.097555160522461E-5, 0.0f,
+ {"\xF9\x03\xFF", 3}, {"\xFB\x3F\x0F\xF8\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x03\xFF", 3}, {"\xF9\04\00", 3}},
+
+ /* 6.103515625E-5 -- smallest possible half-precision normal */
+ {6.103515625E-5, 0.0f,
+ {"\xF9\04\00", 3}, {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\04\00", 3}, {"\xF9\04\00", 3}},
+
+ /* 6.1035156250000014E-5 -- slightly larger than smallest half-precision normal */
+ {6.1035156250000014E-5, 6.1035156250000014E-5f,
+ {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x01", 9},
+ {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x01", 9}},
+
+ /* 6.1035156249999993E-5 -- slightly smaller than smallest half-precision normal */
+ {6.1035156249999993E-5, 0.0f,
+ {"\xFB\x3F\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x3F\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9},
+ {"\xFB\x3F\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x3F\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}},
+
+ /* 65504.0 -- largest possible half-precision */
+ {65504.0, 0.0f,
+ {"\xF9\x7B\xFF", 3}, {"\xFB\x40\xEF\xFC\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x7B\xFF", 3}, {"\xF9\x7B\xFF", 3}},
+
+ /* 65504.1 -- exponent too large and too much precision to convert */
+ {65504.1, 0.0f,
+ {"\xFB\x40\xEF\xFC\x03\x33\x33\x33\x33", 9}, {"\xFB\x40\xEF\xFC\x03\x33\x33\x33\x33", 9},
+ {"\xFB\x40\xEF\xFC\x03\x33\x33\x33\x33", 9}, {"\xFB\x40\xEF\xFC\x03\x33\x33\x33\x33", 9}},
+
+ /* 65536.0 -- exponent too large but not too much precision for single */
+ {65536.0, 65536.0f,
+ {"\xFA\x47\x80\x00\x00", 5}, {"\xFB\x40\xF0\x00\x00\x00\x00\x00\x00", 9},
+ {"\xFA\x47\x80\x00\x00", 5}, {"\xFA\x47\x80\x00\x00", 5}},
+
+ /* 1.401298464324817e-45 -- smallest single subnormal */
+ {1.401298464324817e-45, 1.40129846E-45f,
+ {"\xFA\x00\x00\x00\x01", 5}, {"\xFB\x36\xA0\x00\x00\x00\x00\x00\x00", 9},
+ {"\xFA\x00\x00\x00\x01", 5}, {"\xFA\x00\x00\x00\x01", 5}},
+
+ /* 5.8774717541114375E-39 -- slightly smaller than the smallest
+ // single normal */
+ {5.8774717541114375E-39, 5.87747175E-39f,
+ {"\xFA\x00\x40\x00\x00", 5}, {"\xFB\x38\x00\x00\x00\x00\x00\x00\x00", 9},
+ {"\xFA\x00\x40\x00\x00", 5}, {"\xFA\x00\x40\x00\x00", 5}},
+
+ /* 1.1754942106924411e-38 -- largest single subnormal */
+ {1.1754942106924411E-38, 1.17549421E-38f,
+ {"\xFA\x00\x7f\xff\xff", 5}, {"\xFB\x38\x0f\xff\xff\xC0\x00\x00\x00", 9},
+ {"\xFA\x00\x7f\xff\xff", 5}, {"\xFA\x00\x7f\xff\xff", 5} },
+
+ /* 1.1754943508222874E-38 -- slightly bigger than smallest single normal */
+ {1.1754943508222874E-38, 0.0f,
+ {"\xFB\x38\x0f\xff\xff\xff\xff\xff\xff", 9}, {"\xFB\x38\x0f\xff\xff\xff\xff\xff\xff", 9},
+ {"\xFB\x38\x0f\xff\xff\xff\xff\xff\xff", 9}, {"\xFB\x38\x0f\xff\xff\xff\xff\xff\xff", 9}},
+
+ /* 1.1754943508222875e-38 -- smallest single normal */
+ {1.1754943508222875e-38, 1.17549435E-38f,
+ {"\xFA\x00\x80\x00\x00", 5}, {"\xFB\x38\x10\x00\x00\x00\x00\x00\x00", 9},
+ {"\xFA\x00\x80\x00\x00", 5}, {"\xFA\x00\x80\x00\x00", 5}},
+
+ /* 1.1754943508222875e-38 -- slightly bigger than smallest single normal */
+ {1.1754943508222878e-38, 0.0f,
+ {"\xFB\x38\x10\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x38\x10\x00\x00\x00\x00\x00\x01", 9},
+ {"\xFB\x38\x10\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x38\x10\x00\x00\x00\x00\x00\x01", 9}},
+
+ /* 16777216 -- converts to single without loss */
+ {16777216, 16777216,
+ {"\xFA\x4B\x80\x00\x00", 5}, {"\xFB\x41\x70\x00\x00\x00\x00\x00\x00", 9},
+ {"\xFA\x4B\x80\x00\x00", 5}, {"\xFA\x4B\x80\x00\x00", 5}},
+
+ /* 16777217 -- one more than above and fails conversion to single */
+ {16777217, 16777216,
+ {"\xFB\x41\x70\x00\x00\x10\x00\x00\x00", 9}, {"\xFB\x41\x70\x00\x00\x10\x00\x00\x00", 9},
+ {"\xFB\x41\x70\x00\x00\x10\x00\x00\x00", 9}, {"\xFB\x41\x70\x00\x00\x10\x00\x00\x00", 9}},
+
+ /* 3.4028234663852886E+38 -- largest possible single normal */
+ {3.4028234663852886E+38, 3.40282347E+38f,
+ {"\xFA\x7F\x7F\xFF\xFF", 5}, {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x00", 9},
+ {"\xFA\x7F\x7F\xFF\xFF", 5}, {"\xFA\x7F\x7F\xFF\xFF", 5}},
+
+ /* 3.402823466385289E+38 -- slightly larger than largest possible single */
+ {3.402823466385289E+38, 0.0f,
+ {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x01", 9}, {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x01", 9},
+ {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x01", 9}, {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x01", 9}},
+
+ /* 3.402823669209385e+38 -- exponent larger by one than largest possible single */
+ {3.402823669209385e+38, 0.0f,
+ {"\xFB\x47\xF0\x00\x00\x00\x00\x00\x00", 9}, {"\xFB\x47\xF0\x00\x00\x00\x00\x00\x00", 9},
+ {"\xFB\x47\xF0\x00\x00\x00\x00\x00\x00", 9}, {"\xFB\x47\xF0\x00\x00\x00\x00\x00\x00", 9}},
+
+ /* 5.0e-324 -- smallest double subnormal normal */
+ {5.0e-324, 0.0f,
+ {"\xFB\x00\x00\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x00\x00\x00\x00\x00\x00\x00\x01", 9},
+ {"\xFB\x00\x00\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x00\x00\x00\x00\x00\x00\x00\x01", 9}},
+
+ /* 2.2250738585072009E−308 -- largest double subnormal */
+ {2.2250738585072009e-308, 0.0f,
+ {"\xFB\x00\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x00\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9},
+ {"\xFB\x00\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x00\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}},
+
+ /* 2.2250738585072014e-308 -- smallest double normal */
+ {2.2250738585072014e-308, 0.0f,
+ {"\xFB\x00\x10\x00\x00\x00\x00\x00\x00", 9}, {"\xFB\x00\x10\x00\x00\x00\x00\x00\x00", 9},
+ {"\xFB\x00\x10\x00\x00\x00\x00\x00\x00", 9}, {"\xFB\x00\x10\x00\x00\x00\x00\x00\x00", 9}},
+
+ /* 1.7976931348623157E308 -- largest double normal */
+ {1.7976931348623157e308, 0.0f,
+ {"\xFB\x7F\xEF\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x7F\xEF\xFF\xFF\xFF\xFF\xFF\xFF", 9},
+ {"\xFB\x7F\xEF\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x7F\xEF\xFF\xFF\xFF\xFF\xFF\xFF", 9}},
+
+ /* List terminator */
+ {0.0, 0.0f, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0} }
};
-inline static bool CheckDouble(double d, uint64_t u)
+struct NaNTestCase {
+ uint64_t uDouble;
+ uint32_t uSingle;
+ UsefulBufC Preferred;
+ UsefulBufC NotPreferred;
+ UsefulBufC CDE;
+ UsefulBufC DCBOR;
+};
+
+/* Always four lines per test case so shell scripts can process into
+ * other formats. CDE and DCBOR standards are not complete yet,
+ * encodings are a guess. C string literals are used because they
+ * are the shortest notation. They are used __with a length__ . Null
+ * termination doesn't work because there are zero bytes.
+ */
+static const struct NaNTestCase NaNTestCases[] = {
+
+ /* Payload with most significant bit set, a qNaN by most implementations */
+ {0x7ff8000000000000, 0x00000000,
+ {"\xF9\x7E\x00", 3}, {"\xFB\x7F\xF8\x00\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x7E\x00", 3}, {"\xF9\x7E\x00", 3}},
+
+ /* Payload with single rightmost set */
+ {0x7ff8000000000001, 0x00000000,
+ {"\xFB\x7F\xF8\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x7F\xF8\x00\x00\x00\x00\x00\x01", 9},
+ {"\xF9\x7E\x00", 3}, {"\xF9\x7E\x00", 3}},
+
+ /* Payload with 10 leftmost bits set -- converts to half */
+ {0x7ffffc0000000000, 0x00000000,
+ {"\xF9\x7F\xFF", 3}, {"\xFB\x7F\xFF\xFC\x00\x00\x00\x00\x00", 9},
+ {"\xF9\x7E\x00", 3}, {"\xF9\x7E\x00", 3}},
+
+ /* Payload with 10 rightmost bits set -- cannot convert to half */
+ {0x7ff80000000003ff, 0x00000000,
+ {"\xFB\x7F\xF8\x00\x00\x00\x00\x03\xFF", 9}, {"\xFB\x7F\xF8\x00\x00\x00\x00\x03\xFF", 9},
+ {"\xF9\x7E\x00", 3}, {"\xF9\x7E\x00", 3}},
+
+ /* Payload with 23 leftmost bits set -- converts to a single */
+ {0x7ffFFFFFE0000000, 0x7fffffff,
+ {"\xFA\x7F\xFF\xFF\xFF", 5}, {"\xFB\x7F\xFF\xFF\xFF\xE0\x00\x00\x00", 9},
+ {"\xF9\x7E\x00", 3}, {"\xF9\x7E\x00", 3}},
+
+ /* Payload with 24 leftmost bits set -- fails to convert to a single */
+ {0x7ffFFFFFF0000000, 0x00000000,
+ {"\xFB\x7F\xFF\xFF\xFF\xF0\x00\x00\x00", 9}, {"\xFB\x7F\xFF\xFF\xFF\xF0\x00\x00\x00", 9},
+ {"\xF9\x7E\x00", 3}, {"\xF9\x7E\x00", 3}},
+
+ /* Payload with all bits set */
+ {0x7fffffffffffffff, 0x00000000,
+ {"\xFB\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 9},
+ {"\xF9\x7E\x00", 3}, {"\xFB\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 9}},
+
+ /* List terminator */
+ {0, 0, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0} }
+};
+
+
+
+/* Public function. See float_tests.h
+ *
+ * This is the main test of floating-point encoding / decoding. It is
+ * data-driven by the above tables. It works better than tests below that
+ * it mostly replaces because it tests one number at a time, rather than
+ * putting them all in a map. It is much easier to debug test failures
+ * and to add new tests. */
+int32_t
+FloatValuesTests(void)
{
- return UsefulBufUtil_CopyDoubleToUint64(d) != u;
-}
+ unsigned int uTestIndex;
+ const struct DoubleTestCase *pTestCase;
+ const struct NaNTestCase *pNaNTestCase;
+ MakeUsefulBufOnStack( TestOutBuffer, 20);
+ UsefulBufC TestOutput;
+ QCBOREncodeContext EnCtx;
+ QCBORError uErr;
+ QCBORDecodeContext DCtx;
+ QCBORItem Item;
+ uint64_t uDecoded;
+#ifdef QCBOR_DISABLE_FLOAT_HW_USE
+ uint32_t uDecoded2;
+#endif
+ /* Test a variety of doubles */
+ for(uTestIndex = 0; DoubleTestCases[uTestIndex].Preferred.len != 0; uTestIndex++) {
+ pTestCase = &DoubleTestCases[uTestIndex];
-int32_t HalfPrecisionDecodeBasicTests(void)
-{
- UsefulBufC HalfPrecision = UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedHalf);
+ // if(pTestCase->dNumber == 1.1754943508222874E-38) {
+ if(uTestIndex == 19) {
+ uErr = 99; /* For setting break points for particular tests */
+ }
- QCBORDecodeContext DC;
- QCBORDecode_Init(&DC, HalfPrecision, 0);
+ /* Number Encode of Preferred */
+ QCBOREncode_Init(&EnCtx, TestOutBuffer);
+ QCBOREncode_AddDouble(&EnCtx, pTestCase->dNumber);
+ uErr = QCBOREncode_Finish(&EnCtx, &TestOutput);
- QCBORItem Item;
+ if(uErr != QCBOR_SUCCESS) {
+ return MakeTestResultCode(uTestIndex, 1, uErr);;
+ }
+ if(UsefulBuf_Compare(TestOutput, pTestCase->Preferred)) {
+ return MakeTestResultCode(uTestIndex, 1, 200);
+ }
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_MAP) {
- return -1;
- }
+ /* Number Encode of Not Preferred */
+ QCBOREncode_Init(&EnCtx, TestOutBuffer);
+ QCBOREncode_AddDoubleNoPreferred(&EnCtx, pTestCase->dNumber);
+ uErr = QCBOREncode_Finish(&EnCtx, &TestOutput);
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.0) {
- return -2;
- }
+ if(uErr != QCBOR_SUCCESS) {
+ return MakeTestResultCode(uTestIndex, 2, uErr);;
+ }
+ if(UsefulBuf_Compare(TestOutput, pTestCase->NotPreferred)) {
+ return MakeTestResultCode(uTestIndex, 2, 200);
+ }
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != INFINITY) {
- return -3;
- }
-
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != -INFINITY) {
- return -4;
- }
-
- // TODO: NAN-related is this really converting right? It is carrying
- // payload, but this confuses things.
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || !isnan(Item.val.dfnum)) {
- return -5;
- }
-
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 1.0) {
- return -6;
- }
-
- // Approximately 1/3
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.333251953125) {
- return -7;
- }
-
- // Largest half-precision
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 65504.0) {
- return -8;
- }
-
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != INFINITY) {
- return -9;
- }
-
- // Smallest half-precision subnormal
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.00000005960464477539063) {
- return -10;
- }
-
- // Largest half-precision subnormal
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.00006097555160522461) {
- return -11;
- }
-
- // Smallest half-precision normal
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.00006103515625) {
- return -12;
- }
-
- // half-precision zero
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.0) {
- return -13;
- }
-
- // negative 2
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != -2.0) {
- return -14;
- }
-
- // TODO: NAN-related double check these four tests
- QCBORDecode_GetNext(&DC, &Item); // qNaN
- if(Item.uDataType != QCBOR_TYPE_DOUBLE ||
- CheckDouble(Item.val.dfnum, 0x7ff8000000000000ULL)) {
- return -15;
- }
- QCBORDecode_GetNext(&DC, &Item); // sNaN
- if(Item.uDataType != QCBOR_TYPE_DOUBLE ||
- CheckDouble(Item.val.dfnum, 0x7ff0000000000001ULL)) {
- return -16;
- }
- QCBORDecode_GetNext(&DC, &Item); // qNaN with payload 0x0f
- if(Item.uDataType != QCBOR_TYPE_DOUBLE ||
- CheckDouble(Item.val.dfnum, 0x7ff800000000000fULL)) {
- return -17;
- }
- QCBORDecode_GetNext(&DC, &Item); // sNaN with payload 0x0f
- if(Item.uDataType != QCBOR_TYPE_DOUBLE ||
- CheckDouble(Item.val.dfnum, 0x7ff000000000000fULL)) {
- return -18;
- }
-
- if(QCBORDecode_Finish(&DC)) {
- return -19;
- }
-
- return 0;
-}
-
-
-
-
-int32_t HalfPrecisionAgainstRFCCodeTest(void)
-{
- for(uint32_t uHalfP = 0; uHalfP < 0xffff; uHalfP += 60) {
- unsigned char x[2];
- x[1] = (uint8_t)(uHalfP & 0xff);
- x[0] = (uint8_t)(uHalfP >> 8); // uHalfP is always less than 0xffff
- double d = decode_half(x);
-
- // Contruct the CBOR for the half-precision float by hand
- UsefulBuf_MAKE_STACK_UB(__xx, 3);
- UsefulOutBuf UOB;
- UsefulOutBuf_Init(&UOB, __xx);
-
- const uint8_t uHalfPrecInitialByte = (uint8_t)(HALF_PREC_FLOAT + (CBOR_MAJOR_TYPE_SIMPLE << 5)); // 0xf9
- UsefulOutBuf_AppendByte(&UOB, uHalfPrecInitialByte); // The initial byte for a half-precision float
- UsefulOutBuf_AppendUint16(&UOB, (uint16_t)uHalfP);
-
- // Now parse the hand-constructed CBOR. This will invoke the
- // conversion to a float
- QCBORDecodeContext DC;
- QCBORDecode_Init(&DC, UsefulOutBuf_OutUBuf(&UOB), 0);
-
- QCBORItem Item;
-
- QCBORDecode_GetNext(&DC, &Item);
- if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
- return -1;
- }
-
- //printf("%04x QCBOR:%15.15f RFC: %15.15f (%8x)\n",
- // uHalfP, Item.val.fnum, d , UsefulBufUtil_CopyFloatToUint32(d));
-
- if(isnan(d)) {
- // The RFC code uses the native instructions which may or may not
- // handle sNaN, qNaN and NaN payloads correctly. This test just
- // makes sure it is a NaN and doesn't worry about the type of NaN
+ /* Number Decode of Preferred */
+ QCBORDecode_Init(&DCtx, pTestCase->Preferred, 0);
+ uErr = QCBORDecode_GetNext(&DCtx, &Item);
+ if(uErr != QCBOR_SUCCESS) {
+ return MakeTestResultCode(uTestIndex, 3, uErr);;
+ }
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
+ if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+ return MakeTestResultCode(uTestIndex, 4, 0);
+ }
+ if(isnan(pTestCase->dNumber)) {
+ if(!isnan(Item.val.dfnum)) {
+ return MakeTestResultCode(uTestIndex, 5, 0);
+ }
+ } else {
+ if(Item.val.dfnum != pTestCase->dNumber) {
+ return MakeTestResultCode(uTestIndex, 6, 0);
+ }
+ }
+#else /* QCBOR_DISABLE_FLOAT_HW_USE */
+ /* When QCBOR_DISABLE_FLOAT_HW_USE is set, single-precision is not
+ * converted to double when decoding, so test differently. len == 5
+ * indicates single-precision in the encoded CBOR. */
+ if(pTestCase->Preferred.len == 5) {
+ if(Item.uDataType != QCBOR_TYPE_FLOAT) {
+ return MakeTestResultCode(uTestIndex, 4, 0);
+ }
+ if(isnan(pTestCase->dNumber)) {
+ if(!isnan(Item.val.fnum)) {
+ return MakeTestResultCode(uTestIndex, 5, 0);
+ }
+ } else {
+ if(Item.val.fnum != pTestCase->fNumber) {
+ return MakeTestResultCode(uTestIndex, 6, 0);
+ }
+ }
+ } else {
+ if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+ return MakeTestResultCode(uTestIndex, 4, 0);
+ }
+ if(isnan(pTestCase->dNumber)) {
if(!isnan(Item.val.dfnum)) {
- return -3;
+ return MakeTestResultCode(uTestIndex, 5, 0);
}
- } else {
- if(Item.val.dfnum != d) {
- return -2;
+ } else {
+ if(Item.val.dfnum != pTestCase->dNumber) {
+ return MakeTestResultCode(uTestIndex, 6, 0);
}
- }
- }
- return 0;
-}
+ }
+ }
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
+ /* Number Decode of Not Preferred */
+ QCBORDecode_Init(&DCtx, pTestCase->NotPreferred, 0);
+ uErr = QCBORDecode_GetNext(&DCtx, &Item);
+ if(uErr != QCBOR_SUCCESS) {
+ return MakeTestResultCode(uTestIndex, 7, uErr);;
+ }
+ if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+ return MakeTestResultCode(uTestIndex, 8, 0);
+ }
+ if(isnan(pTestCase->dNumber)) {
+ if(!isnan(Item.val.dfnum)) {
+ return MakeTestResultCode(uTestIndex, 9, 0);
+ }
+ } else {
+ if(Item.val.dfnum != pTestCase->dNumber) {
+ return MakeTestResultCode(uTestIndex, 10, 0);
+ }
+ }
-/*
- Expected output from preferred serialization of some of floating-point numbers
-{"zero": 0.0,
- "negative zero": -0.0,
- "infinitity": Infinity,
- "negative infinitity": -Infinity,
- "NaN": NaN,
- "one": 1.0,
- "one third": 0.333251953125,
- "largest half-precision": 65504.0,
- "largest half-precision point one": 65504.1,
- "too-large half-precision": 65536.0,
- "smallest half subnormal": 5.960464477539063e-8,
- "smallest half normal": 0.00006103515625,
- "smallest half normal plus": 0.00006103515625000001,
- "smallest normal minus": 0.000030517578125,
- "largest single": 3.4028234663852886e+38,
- "largest single plus": 6.805646932770577e+38,
- "smallest single": 1.1754943508222875e-38,
- "smallest single plus": 1.1754943508222878e-38,
- "smallest single minus": 1.1754943508222874e-38,
- "smallest single minus more": 5.877471754111438e-39,
- 3: -2.0, "single precision": 16777216.0,
- "single with precision loss": 16777217.0,
- 1: "fin"}
- */
-static const uint8_t spExpectedSmallest[] = {
- 0xB8, 0x1A,
- 0x64, 0x7A, 0x65, 0x72, 0x6F,
- 0xF9, 0x00, 0x00,
-
- 0x6D, 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x7A,
- 0x65, 0x72, 0x6F,
- 0xF9, 0x80, 0x00,
-
- 0x6A, 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
- 0xF9, 0x7C, 0x00,
-
- 0x73, 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x69,
- 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
- 0xF9, 0xFC, 0x00,
-
- 0x63, 0x4E, 0x61, 0x4E,
- 0xF9, 0x7E, 0x00,
-
- 0x63, 0x6F, 0x6E, 0x65,
- 0xF9, 0x3C, 0x00,
-
- 0x69, 0x6F, 0x6E, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64,
- 0xF9, 0x35, 0x55,
-
- 0x76, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61,
- 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69,
- 0x6F, 0x6E,
- 0xF9, 0x7B, 0xFF,
-
- 0x78, 0x20, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68,
- 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73,
- 0x69, 0x6F, 0x6E, 0x20, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x20,
- 0x6F, 0x6E, 0x65,
- 0xFB, 0x40, 0xEF, 0xFC, 0x03, 0x33, 0x33, 0x33, 0x33,
-
- 0x78, 0x18, 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65,
- 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63,
- 0x69, 0x73, 0x69, 0x6F, 0x6E,
- 0xFA, 0x47, 0x80, 0x00, 0x00,
-
- 0x77, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74,
- 0x20, 0x68, 0x61, 0x6C, 0x66, 0x20, 0x73, 0x75, 0x62, 0x6E,
- 0x6F, 0x72, 0x6D, 0x61, 0x6C,
- 0xFA, 0x33, 0x80, 0x00, 0x00,
-
- 0x74, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x68,
- 0x61, 0x6C, 0x66, 0x20, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
- 0xF9, 0x04, 0x00,
-
- 0x78, 0x19, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20,
- 0x68, 0x61, 0x6C, 0x66, 0x20, 0x6E, 0x6F, 0x72, 0x6D, 0x61,
- 0x6C, 0x20, 0x70, 0x6C, 0x75, 0x73,
- 0xFB, 0x3F, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-
- 0x75, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E,
- 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x6D, 0x69, 0x6E,
- 0x75, 0x73,
- 0xFB, 0x3F, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-
- 0x75, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E,
- 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x6D, 0x69, 0x6E, 0x75,
- 0x73,
- 0xFA, 0x38, 0x00, 0x00, 0x00,
-
- 0x6E, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x69,
- 0x6E, 0x67, 0x6C, 0x65,
- 0xFA, 0x7F, 0x7F, 0xFF, 0xFF,
-
- 0x73, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x69,
- 0x6E,0x67, 0x6C, 0x65, 0x20, 0x70, 0x6C, 0x75, 0x73,
- 0xFB, 0x47, 0xEF, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x01,
-
- 0x73, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x69,
- 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x70, 0x6C, 0x75, 0x73,
- 0xFB, 0x47, 0xFF, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x00,
-
- 0x6F, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73,
- 0x69, 0x6E, 0x67, 0x6C, 0x65,
- 0xFA, 0x00, 0x80, 0x00, 0x00,
-
- 0x74, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73,
- 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x70, 0x6C, 0x75, 0x73,
- 0xFB, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-
- 0x75, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73,
- 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x6D, 0x69, 0x6E, 0x75,
- 0x73,
- 0xFB, 0x38, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-
- 0x78, 0x1A, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20,
- 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x6D, 0x69, 0x6E,
- 0x75, 0x73, 0x20, 0x6D, 0x6F, 0x72, 0x65,
- 0xFB, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
- 0x03,
- 0xF9, 0xC0, 0x00,
-
- 0x70, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x70, 0x72, 0x65,
- 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
- 0xFA, 0x4B, 0x80, 0x00, 0x00,
-
- 0x78, 0x1A, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x77, 0x69,
- 0x74, 0x68, 0x20, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69,
- 0x6F, 0x6E, 0x20, 0x6C, 0x6F, 0x73, 0x73,
- 0xFB, 0x41, 0x70, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-
- 0x01,
- 0x63, 0x66, 0x69, 0x6E
-};
-
-
-/*
- Makes a double from a uint64_t by copying the bits, not
- by converting the value.
- */
-#define MAKE_DOUBLE(x) UsefulBufUtil_CopyUint64ToDouble(x)
-
-
-int32_t DoubleAsSmallestTest(void)
-{
- UsefulBuf_MAKE_STACK_UB(EncodedHalfsMem, sizeof(spExpectedSmallest));
-
- QCBOREncodeContext EC;
- QCBOREncode_Init(&EC, EncodedHalfsMem);
- QCBOREncode_OpenMap(&EC);
-
- // Many of these are from
- // https://en.wikipedia.org/wiki/Half-precision_floating-point_format
- // and
- // https://en.wikipedia.org/wiki/Single-precision_floating-point_format
-
- // F9 0000 # primitive(0)
- QCBOREncode_AddDoubleToMap(&EC, "zero", 0.00);
-
- // F9 8000 # primitive(0)
- QCBOREncode_AddDoubleToMap(&EC, "negative zero", -0.00);
-
- // F9 7C00 # primitive(31744)
- QCBOREncode_AddDoubleToMap(&EC, "infinitity", INFINITY);
-
- // F9 FC00 # primitive(64512)
- QCBOREncode_AddDoubleToMap(&EC, "negative infinitity", -INFINITY);
-
- // F9 7E00 # primitive(32256)
- QCBOREncode_AddDoubleToMap(&EC, "NaN", NAN);
-
- // TODO: test a few NaN variants
-
- // F9 3C00 # primitive(15360)
- QCBOREncode_AddDoubleToMap(&EC, "one", 1.0);
-
- // F9 3555 # primitive(13653)
- QCBOREncode_AddDoubleToMap(&EC, "one third", 0.333251953125);
-
- // 65504.0, converts to the large possible half-precision.
- // 0xF9, 0x7B, 0xFF,
- QCBOREncode_AddDoubleToMap(&EC, "largest half-precision", 65504.0);
-
- // 65504.1, the double that has both to large an exponent and too
- // much precision, so no conversion.
- // 0xFB, 0x40, 0xEF, 0xFC, 0x03, 0x33, 0x33, 0x33, 0x33,
- QCBOREncode_AddDoubleToMap(&EC, "largest half-precision point one", 65504.1);
-
- // 65536.0 has an exponent of 16, which is larger than 15, the
- // largest half-precision exponent. It is the exponent, not
- // precision loss that prevents conversion to half. It does convert
- // to single precision.
- // 0xFA, 0x47, 0x80, 0x00, 0x00,
- QCBOREncode_AddDoubleToMap(&EC, "too-large half-precision", 65536.0);
-
- // 5.9604644775390625E-8, the smallest possible half-precision
- // subnormal, digitis are lost converting to half, but not
- // when converting to a single
- // 0xFA, 0x33, 0x80, 0x00, 0x00,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest half subnormal",
- MAKE_DOUBLE(0x3e70000000000000));
-
- // 0.00006103515625, the double value that converts to the smallest
- // possible half-precision normal. which is what should appear in
- // the output.
- // 0xF9, 0x04, 0x00,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest half normal",
- MAKE_DOUBLE(0x3f10000000000000));
-
- // 0.000061035156250000014 ,the double value that is a tiny bit
- // greater than smallest possible half-precision normal. It will be
- // output as a double because converting it will reduce precision.
- // 0xFB, 0x3F, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest half normal plus",
- MAKE_DOUBLE(0x3f10000000000001));
-
- // 0.000061035156249999993, the double value that is a tiny bit
- // smaller than the smallest half-precision normal. This will fail
- // to convert to a half-precision because both the exponent is too
- // small and the precision is too large for a half-precision.
- // 0xFB, 0x3F, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest normal minus",
- MAKE_DOUBLE(0x3f0fffffffffffff));
-
- // 0.000030517578125, the double value that is too small to fit
- // into a half-precision because the exponent won't fit, not
- // because precision would be lost. (This would fit into a
- // half-precision subnormal, but there is no converstion to
- // that). This ends up encoded as a single-precision.
- // 0xFA, 0x38, 0x00, 0x00, 0x00,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest normal minus",
- MAKE_DOUBLE(0x3f00000000000000));
-
- // 3.4028234664e38, the value that converts to the largest possible
- // single-precision.
- // 0xFA, 0x7F, 0x7F, 0xFF, 0xFF,
- QCBOREncode_AddDoubleToMap(&EC,
- "largest single",
- MAKE_DOUBLE(0x47efffffe0000000));
-
- // 3.402823466385289E38, sightly larger than the largest possible
- // possible precision. Conversion fails because precision would be
- // lost.
- // 0xFB, 0x47, 0xEF, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x01,
- QCBOREncode_AddDoubleToMap(&EC,
- "largest single plus",
- MAKE_DOUBLE(0x47efffffe0000001));
-
- // 6.8056469327705772E38, slightly more larger than the largers
- // possible single precision. Conversion fails because exponent is
- // too large.
- // 0xFB, 0x47, 0xFF, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x00,
- QCBOREncode_AddDoubleToMap(&EC,
- "largest single plus",
- MAKE_DOUBLE(0x47ffffffe0000000));
-
- // 1.1754943508222875E-38, The double value that converts to the
- // smallest possible single-precision normal
- // 0xFA, 0x00, 0x80, 0x00, 0x00,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest single",
- MAKE_DOUBLE(0x3810000000000000));
-
- // 1.1754943508222878E-38, double value that is slightly larger
- // than the smallest single-precision normal. Conversion fails
- // because of precision
- // 0xFB, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest single plus",
- MAKE_DOUBLE(0x3810000000000001));
-
- // 1.1754943508222874E-38, slightly smaller than the smallest
- // single-precision normal. Conversion fails because of precision
- // 0xFB, 0x38, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest single minus",
- MAKE_DOUBLE(0x380fffffffffffff));
-
- // 5.8774717541114375E-39, slightly smaller than the smallest
- // single-precision normal. Conversion fails because the exponent
- // is too small.
- // 0xFB, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- QCBOREncode_AddDoubleToMap(&EC,
- "smallest single minus more",
- MAKE_DOUBLE(0x3800000000000000));
-
- // Just -2, which converts to a negative half-precision
- // F9 C000 # primitive(49152)
- QCBOREncode_AddDoubleToMapN(&EC, 3, -2.0);
-
- // 16777216, No precision loss converting to single
- // FA 4B800000 # primitive(1266679808)
- QCBOREncode_AddDoubleToMap(&EC, "single precision", 16777216);
-
- // 16777217, One more than above. Too much precision for a single
- // so no conversion.
- // 0xFB, 0x41, 0x70, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
- QCBOREncode_AddDoubleToMap(&EC, "single with precision loss", 16777217);
-
- // Just a convenient marker when cutting and pasting encoded CBOR
- QCBOREncode_AddSZStringToMapN(&EC, 1, "fin");
-
- QCBOREncode_CloseMap(&EC);
-
- UsefulBufC EncodedHalfs;
- QCBORError uErr = QCBOREncode_Finish(&EC, &EncodedHalfs);
- if(uErr) {
- return -1;
}
- if(UsefulBuf_Compare(EncodedHalfs, UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedSmallest))) {
- return -3;
+ /* Test a variety of NaNs with payloads */
+ for(uTestIndex = 0; NaNTestCases[uTestIndex].Preferred.len != 0; uTestIndex++) {
+ pNaNTestCase = &NaNTestCases[uTestIndex];
+
+
+ if(uTestIndex == 4) {
+ uErr = 99; /* For setting break points for particular tests */
+ }
+
+ /* NaN Encode of Preferred */
+ QCBOREncode_Init(&EnCtx, TestOutBuffer);
+ QCBOREncode_AddDouble(&EnCtx, UsefulBufUtil_CopyUint64ToDouble(pNaNTestCase->uDouble));
+ uErr = QCBOREncode_Finish(&EnCtx, &TestOutput);
+ if(uErr != QCBOR_SUCCESS) {
+ return MakeTestResultCode(uTestIndex+100, 10, uErr);;
+ }
+ if(UsefulBuf_Compare(TestOutput, pNaNTestCase->Preferred)) {
+ return MakeTestResultCode(uTestIndex+100, 10, 200);
+ }
+
+#ifdef QCBOR_COMPARE_TO_HW_NAN_CONVERSION
+ {
+ /* This test is off by default. It's purpose is to check
+ * QCBOR's mask-n-shift implementation against the HW/CPU
+ * instructions that do conversion between double and single.
+ * It is off because it is only used on occasion to verify
+ * QCBOR and because it is suspected that some HW/CPU does
+ * implement this correctly. NaN payloads are an obscure
+ * feature. */
+ float f;
+ double d, d2;
+
+ d = UsefulBufUtil_CopyUint64ToDouble(pNaNTestCase->uNumber);
+
+ /* Cast the double to a single and then back to a double and
+ * see if they are equal. If so, then the NaN payload doesn't
+ * have any bits that are lost when converting to single and
+ * it can be safely converted.
+ *
+ * This test can't be done for half-precision because it is
+ * not widely supported.
+ */
+ f = (float)d;
+ d2 = (double)f;
+
+ /* The length of encoded doubles is 9, singles 5 and halves
+ * 3. If there are NaN payload bits that can't be converted,
+ * then the length must be 9.
+ */
+ if((uint64_t)d != (uint64_t)d2 && pNaNTestCase->Preferred.len != 9) {
+ /* QCBOR conversion not the same as HW conversion */
+ return MakeTestResultCode(uTestIndex, 9, 200);
+ }
+ }
+#endif /* QCBOR_COMPARE_TO_HW_NAN_CONVERSION */
+
+
+ /* NaN Encode of Not Preferred */
+ QCBOREncode_Init(&EnCtx, TestOutBuffer);
+ QCBOREncode_AddDoubleNoPreferred(&EnCtx, UsefulBufUtil_CopyUint64ToDouble(pNaNTestCase->uDouble));
+ uErr = QCBOREncode_Finish(&EnCtx, &TestOutput);
+ if(uErr != QCBOR_SUCCESS) {
+ return MakeTestResultCode(uTestIndex+100, 11, uErr);;
+ }
+ if(UsefulBuf_Compare(TestOutput, pNaNTestCase->NotPreferred)) {
+ return MakeTestResultCode(uTestIndex+100, 11, 200);
+ }
+
+ /* NaN Decode of Preferred */
+ QCBORDecode_Init(&DCtx, pNaNTestCase->Preferred, 0);
+ uErr = QCBORDecode_GetNext(&DCtx, &Item);
+ if(uErr != QCBOR_SUCCESS) {
+ return MakeTestResultCode(uTestIndex+100, 12, uErr);
+ }
+
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
+
+ uDecoded = UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum);
+ if(uDecoded != pNaNTestCase->uDouble) {
+ return MakeTestResultCode(uTestIndex+100, 12, 200);
+ }
+#else /* QCBOR_DISABLE_FLOAT_HW_USE */
+ if(pNaNTestCase->Preferred.len == 5) {
+ if(Item.uDataType != QCBOR_TYPE_FLOAT) {
+ return MakeTestResultCode(uTestIndex, 4, 0);
+ }
+
+ uDecoded2 = UsefulBufUtil_CopyFloatToUint32(Item.val.fnum);
+
+ if(uDecoded2 != pNaNTestCase->uSingle) {
+ return MakeTestResultCode(uTestIndex, 4, 0);
+ }
+ } else {
+ if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+ return MakeTestResultCode(uTestIndex, 4, 0);
+ }
+ uDecoded = UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum);
+ if(uDecoded != pNaNTestCase->uDouble) {
+ return MakeTestResultCode(uTestIndex+100, 12, 200);
+ }
+ }
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
+
+ /* NaN Decode of Not Preferred */
+ QCBORDecode_Init(&DCtx, pNaNTestCase->NotPreferred, 0);
+ uErr = QCBORDecode_GetNext(&DCtx, &Item);
+ if(uErr != QCBOR_SUCCESS) {
+ return MakeTestResultCode(uTestIndex+100, 13, uErr);
+ }
+ uDecoded = UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum);
+ if(uDecoded != pNaNTestCase->uDouble) {
+ return MakeTestResultCode(uTestIndex+100, 13, 200);
+ }
}
return 0;
}
+
+
+
+/* Public function. See float_tests.h */
+int32_t
+HalfPrecisionAgainstRFCCodeTest(void)
+{
+ QCBORItem Item;
+ QCBORDecodeContext DC;
+ unsigned char pbHalfBytes[2];
+ uint8_t uHalfPrecInitialByte;
+ double d;
+ UsefulBuf_MAKE_STACK_UB(EncodedBytes, 3);
+ UsefulOutBuf UOB;
+ uint32_t uHalfP;
+
+
+ for(uHalfP = 0; uHalfP < 0xffff; uHalfP += 60) {
+ pbHalfBytes[1] = (uint8_t)(uHalfP & 0xff);
+ pbHalfBytes[0] = (uint8_t)(uHalfP >> 8); /* uHalfP is always less than 0xffff */
+ d = decode_half(pbHalfBytes);
+
+ /* Construct the CBOR for the half-precision float by hand */
+ UsefulOutBuf_Init(&UOB, EncodedBytes);
+
+ uHalfPrecInitialByte = (uint8_t)(HALF_PREC_FLOAT + (CBOR_MAJOR_TYPE_SIMPLE << 5)); /* 0xf9 */
+ UsefulOutBuf_AppendByte(&UOB, uHalfPrecInitialByte); /* initial byte */
+ UsefulOutBuf_AppendUint16(&UOB, (uint16_t)uHalfP); /* argument */
+
+ /* Now parse the hand-constructed CBOR. This will invoke the
+ * conversion to a float
+ */
+ QCBORDecode_Init(&DC, UsefulOutBuf_OutUBuf(&UOB), 0);
+ QCBORDecode_GetNext(&DC, &Item);
+ if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+ return -1;
+ }
+
+ if(isnan(d)) {
+ /* The RFC code uses the native instructions which may or may not
+ * handle sNaN, qNaN and NaN payloads correctly. This test just
+ * makes sure it is a NaN and doesn't worry about the type of NaN
+ */
+ if(!isnan(Item.val.dfnum)) {
+ return -3;
+ }
+ } else {
+ if(Item.val.dfnum != d) {
+ return -2;
+ }
+ }
+ }
+ return 0;
+}
+
#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
@@ -700,17 +683,27 @@
0x18, 0x6A,
0xFA, 0x00, 0x00, 0x00, 0x00};
-int32_t GeneralFloatEncodeTests(void)
+
+/* Public function. See float_tests.h */
+int32_t
+GeneralFloatEncodeTests(void)
{
+ /* See FloatNumberTests() for tests that really cover lots of float values.
+ * Add new tests for new values or decode modes there.
+ * This test is primarily to cover all the float encode methods. */
+
+ UsefulBufC Encoded;
UsefulBufC ExpectedFloats;
+ QCBORError uErr;
+
#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
UsefulBuf_MAKE_STACK_UB(OutBuffer, sizeof(spExpectedFloats));
ExpectedFloats = UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedFloats);
- (void)spExpectedFloatsNoHalf; // Avoid unused variable error
+ (void)spExpectedFloatsNoHalf; /* Avoid unused variable error */
#else
UsefulBuf_MAKE_STACK_UB(OutBuffer, sizeof(spExpectedFloatsNoHalf));
ExpectedFloats = UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedFloatsNoHalf);
- (void)spExpectedFloats; // Avoid unused variable error
+ (void)spExpectedFloats; /* Avoid unused variable error */
#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
QCBOREncodeContext EC;
@@ -744,8 +737,7 @@
QCBOREncode_CloseMap(&EC);
QCBOREncode_CloseArray(&EC);
- UsefulBufC Encoded;
- QCBORError uErr = QCBOREncode_Finish(&EC, &Encoded);
+ uErr = QCBOREncode_Finish(&EC, &Encoded);
if(uErr) {
return -1;
}
@@ -757,25 +749,15 @@
return 0;
}
-
-/* returns 0 if equivalent, non-zero if not equivalent */
-static int CHECK_EXPECTED_DOUBLE(double val, double expected)
-{
- double diff = val - expected;
-
- diff = fabs(diff);
-
- if(diff > 0.000001) {
- return 1;
- } else {
- return 0;
- }
-}
#endif /* USEFULBUF_DISABLE_ALL_FLOAT */
-int32_t GeneralFloatDecodeTests(void)
+/* Public function. See float_tests.h */
+int32_t
+GeneralFloatDecodeTests(void)
{
+ /* See FloatNumberTests() for tests that really cover lots of float values */
+
QCBORItem Item;
QCBORError uErr;
QCBORDecodeContext DC;
@@ -872,10 +854,10 @@
#ifndef USEFULBUF_DISABLE_ALL_FLOAT
#ifndef QCBOR_DISABLE_FLOAT_HW_USE
|| Item.uDataType != QCBOR_TYPE_DOUBLE
- || CHECK_EXPECTED_DOUBLE(3.14, Item.val.dfnum)
+ || 3.1400001049041748 != Item.val.dfnum
#else /* QCBOR_DISABLE_FLOAT_HW_USE */
|| Item.uDataType != QCBOR_TYPE_FLOAT
- || CHECK_EXPECTED_DOUBLE(3.14, Item.val.fnum)
+ || 3.140000f != Item.val.fnum
#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
#else /* USEFULBUF_DISABLE_ALL_FLOAT */
|| Item.uDataType != QCBOR_TYPE_NONE
@@ -893,7 +875,7 @@
|| Item.val.dfnum != 0.0
#else /* QCBOR_DISABLE_FLOAT_HW_USE */
|| Item.uDataType != QCBOR_TYPE_FLOAT
- || Item.val.fnum != 0.0
+ || Item.val.fnum != 0.0f
#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
#else /* USEFULBUF_DISABLE_ALL_FLOAT */
|| Item.uDataType != QCBOR_TYPE_NONE
diff --git a/test/float_tests.h b/test/float_tests.h
index 54daa3f..427aa76 100644
--- a/test/float_tests.h
+++ b/test/float_tests.h
@@ -1,7 +1,7 @@
/*==============================================================================
- float_tests.h -- tests for float and conversion to/from half-precision
+ float_tests.h -- tests for floats and conversion to/from half-precision
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
+ Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
@@ -17,22 +17,35 @@
#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
-int32_t HalfPrecisionDecodeBasicTests(void);
-
-int32_t DoubleAsSmallestTest(void);
-
+/* This tests a large number half-precision values
+ * in the conversion to/from half/double against
+ * the sample code in the CBOR RFC. */
int32_t HalfPrecisionAgainstRFCCodeTest(void);
#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
+
/*
- This calls each and every method for encoding
- floating-point numbers.
+ * This tests floating point encoding, decoding
+ * and conversion for lots of different values.
+ * It covers Preferred Serialization processing
+ * of floating point. It's focus is on the numbers
+ * not the encode/decode functions.
+ */
+int32_t FloatValuesTests(void);
+
+
+/*
+ * This calls each and every method for encoding
+ * floating-point numbers.
*/
int32_t GeneralFloatEncodeTests(void);
+
/*
- Tests basic float decoding.
+ * Tests float decoding, including error codes in scenarios
+ * where various float features are disabled. This also
+ * tests decoding using spiffy decode methods.
*/
int32_t GeneralFloatDecodeTests(void);
diff --git a/test/qcbor_encode_tests.c b/test/qcbor_encode_tests.c
index 5c59fe1..546252a 100644
--- a/test/qcbor_encode_tests.c
+++ b/test/qcbor_encode_tests.c
@@ -73,11 +73,11 @@
static int UsefulBuf_Compare_Print(UsefulBufC U1, UsefulBufC U2) {
size_t i;
for(i = 0; i < U1.len; i++) {
- if(((uint8_t *)U1.ptr)[i] != ((uint8_t *)U2.ptr)[i]) {
+ if(((const uint8_t *)U1.ptr)[i] != ((const uint8_t *)U2.ptr)[i]) {
printf("Position: %u Actual: 0x%x Expected: 0x%x\n",
(uint32_t)i,
- ((uint8_t *)U1.ptr)[i],
- ((uint8_t *)U2.ptr)[i]);
+ ((const uint8_t *)U1.ptr)[i],
+ ((const uint8_t *)U2.ptr)[i]);
return 1;
}
}
diff --git a/test/run_tests.c b/test/run_tests.c
index f2baaf1..34495ab 100644
--- a/test/run_tests.c
+++ b/test/run_tests.c
@@ -118,12 +118,11 @@
TEST_ENTRY(SetUpAllocatorTest),
TEST_ENTRY(CBORTestIssue134),
#endif /* #ifndef QCBOR_DISABLE_INDEFINITE_LENGTH_STRINGS */
-#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
- TEST_ENTRY(HalfPrecisionDecodeBasicTests),
- TEST_ENTRY(DoubleAsSmallestTest),
- TEST_ENTRY(HalfPrecisionAgainstRFCCodeTest),
-#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
#ifndef USEFULBUF_DISABLE_ALL_FLOAT
+#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
+ TEST_ENTRY(HalfPrecisionAgainstRFCCodeTest),
+ TEST_ENTRY(FloatValuesTests),
+#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
TEST_ENTRY(GeneralFloatEncodeTests),
TEST_ENTRY(GeneralFloatDecodeTests),
#endif /* USEFULBUF_DISABLE_ALL_FLOAT */