Fix preferred serialization of subnormals (#192) Preferred Serialization now fully supports conversion to/from half, single and double subnormals. This includes NaN payloads. The tests for floating-point are much better organized and give greater coverage. IEEE 754 code is better organized and cleaner. * Fix preferred serialization of subnormals (checkpoint) * Check point progress * Preferred float mostly working and tests passing * added NaN tests * Fix up ieee754.h; a few other compiler warnings * decoding NaN payloads fix; rework half-double; tests * Code tidyness * indent to 3, not 4 * TODO's are done in other test; code tidy * test running with float HW use disabled * Remove / rearrange float tests * Fix full float ifdef test fan out * Code tidiness; sort out final TODO's --------- Co-authored-by: Laurence Lundblade <lgl@securitytheory.com>

commit: 83dbf5cf9e7ca98040e51a1cbdb63b9cd0db3d20 [log] [tgz]
author: Laurence Lundblade <laurencelundblade@users.noreply.github.com> Sun Jan 07 19:17:52 2024 -0700
committer: GitHub <noreply@github.com> Sun Jan 07 19:17:52 2024 -0700
tree: 1854edb3c8568dd7588d04173310e4df8ff9ac43
parent: c5f45e494ad680be93b067c89ac0c9a53e41f226 [diff]
diff --git a/QCBOR.xcodeproj/project.pbxproj b/QCBOR.xcodeproj/project.pbxproj
index c1e6cd7..6e69fbd 100644
--- a/QCBOR.xcodeproj/project.pbxproj
+++ b/QCBOR.xcodeproj/project.pbxproj

@@ -150,8 +150,8 @@
 		0FA9BEB9216DC7AD00BA646B /* qcbor_encode_tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = qcbor_encode_tests.h; path = test/qcbor_encode_tests.h; sourceTree = "<group>"; };
 		0FA9BEBB216DE31700BA646B /* UsefulBuf_Tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UsefulBuf_Tests.h; path = test/UsefulBuf_Tests.h; sourceTree = "<group>"; };
 		0FA9BEBC216DE31700BA646B /* UsefulBuf_Tests.c */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.c; name = UsefulBuf_Tests.c; path = test/UsefulBuf_Tests.c; sourceTree = "<group>"; tabWidth = 3; };
-		E73B57572161CA680080D658 /* ieee754.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ieee754.h; path = src/ieee754.h; sourceTree = "<group>"; };
-		E73B57582161CA690080D658 /* ieee754.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ieee754.c; path = src/ieee754.c; sourceTree = "<group>"; };
+		E73B57572161CA680080D658 /* ieee754.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.h; name = ieee754.h; path = src/ieee754.h; sourceTree = "<group>"; tabWidth = 3; };
+		E73B57582161CA690080D658 /* ieee754.c */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.c; name = ieee754.c; path = src/ieee754.c; sourceTree = "<group>"; tabWidth = 3; };
 		E73B575A2161CA7C0080D658 /* float_tests.c */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 3; lastKnownFileType = sourcecode.c.c; name = float_tests.c; path = test/float_tests.c; sourceTree = "<group>"; tabWidth = 3; };
 		E73B575B2161CA7C0080D658 /* half_to_double_from_rfc7049.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = half_to_double_from_rfc7049.h; path = test/half_to_double_from_rfc7049.h; sourceTree = "<group>"; };
 		E73B575C2161CA7C0080D658 /* float_tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = float_tests.h; path = test/float_tests.h; sourceTree = "<group>"; };

diff --git a/src/ieee754.c b/src/ieee754.c
index a8079f8..2d98159 100644
--- a/src/ieee754.c
+++ b/src/ieee754.c

@@ -1,71 +1,63 @@
-/*==============================================================================
- ieee754.c -- floating-point conversion between half, double & single-precision
-
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
- Copyright (c) 2021, Arm Limited. All rights reserved.
-
- SPDX-License-Identifier: BSD-3-Clause
-
- See BSD-3-Clause license in README.md
-
- Created on 7/23/18
- =============================================================================*/
+/* ==========================================================================
+ * ieee754.c -- floating-point conversion between half, double & single-precision
+ *
+ * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * See BSD-3-Clause license in README.md
+ *
+ * Created on 7/23/18
+ * ========================================================================== */
 
 /*
- Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
- QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
+ * Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
+ * QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
  */
 #include "qcbor/qcbor_common.h"
 
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
 
 #include "ieee754.h"
-#include <string.h> // For memcpy()
+#include <string.h> /* For memcpy() */
 
 
 /*
- This code is written for clarity and verifiability, not for size, on
- the assumption that the optimizer will do a good job. The LLVM
- optimizer, -Os, does seem to do the job and the resulting object code
- is smaller from combining code for the many different cases (normal,
- subnormal, infinity, zero...) for the conversions. GCC is no where near
- as good.
-
- This code has really long lines and is much easier to read because of
- them. Some coding guidelines prefer 80 column lines (can they not afford
- big displays?). It would make this code much worse even to wrap at 120
- columns.
-
- Dead stripping is also really helpful to get code size down when
- floating-point encoding is not needed. (If this is put in a library
- and linking is against the library, then dead stripping is automatic).
-
- This code works solely using shifts and masks and thus has no
- dependency on any math libraries. It can even work if the CPU doesn't
- have any floating-point support, though that isn't the most useful
- thing to do.
-
- The memcpy() dependency is only for CopyFloatToUint32() and friends
- which only is needed to avoid type punning when converting the actual
- float bits to an unsigned value so the bit shifts and masks can work.
- */
-
-/*
- The references used to write this code:
-
- - IEEE 754-2008, particularly section 3.6 and 6.2.1
-
- - https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
-
- - https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
-
- - https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
-
- - https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
+ * This code has long lines and is easier to read because of
+ * them. Some coding guidelines prefer 80 column lines (can they not
+ * afford big displays?).
+ *
+ * This code works solely using shifts and masks and thus has no
+ * dependency on any math libraries. It can even work if the CPU
+ * doesn't have any floating-point support, though that isn't the most
+ * useful thing to do.
+ *
+ * The memcpy() dependency is only for CopyFloatToUint32() and friends
+ * which only is needed to avoid type punning when converting the
+ * actual float bits to an unsigned value so the bit shifts and masks
+ * can work.
+ *
+ * The references used to write this code:
+ *
+ *  IEEE 754-2008, particularly section 3.6 and 6.2.1
+ *
+ *  https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
+ *
+ *  https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
+ *
+ *  https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
+ *
+ *  https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
+ *
+ * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
+ * needed. It can be retrieved from github history if needed.
  */
 
 
-// ----- Half Precsion -----------
+
+
+/* ----- Half Precsion ----------- */
 #define HALF_NUM_SIGNIFICAND_BITS (10)
 #define HALF_NUM_EXPONENT_BITS    (5)
 #define HALF_NUM_SIGN_BITS        (1)
@@ -74,16 +66,16 @@
 #define HALF_EXPONENT_SHIFT       (HALF_NUM_SIGNIFICAND_BITS)
 #define HALF_SIGN_SHIFT           (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
 
-#define HALF_SIGNIFICAND_MASK     (0x3ffU) // The lower 10 bits  // 0x03ff
+#define HALF_SIGNIFICAND_MASK     (0x3ffU) // The lower 10 bits
 #define HALF_EXPONENT_MASK        (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
-#define HALF_SIGN_MASK            (0x01U << HALF_SIGN_SHIFT) //  // 0x8000 1 bit of sign
+#define HALF_SIGN_MASK            (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
 #define HALF_QUIET_NAN_BIT        (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
 
 /* Biased    Biased    Unbiased   Use
-    0x00       0        -15       0 and subnormal
-    0x01       1        -14       Smallest normal exponent
-    0x1e      30         15       Largest normal exponent
-    0x1F      31         16       NaN and Infinity  */
+ *  0x00       0        -15       0 and subnormal
+ *  0x01       1        -14       Smallest normal exponent
+ *  0x1e      30         15       Largest normal exponent
+ *  0x1F      31         16       NaN and Infinity  */
 #define HALF_EXPONENT_BIAS        (15)
 #define HALF_EXPONENT_MAX         (HALF_EXPONENT_BIAS)    //  15 Unbiased
 #define HALF_EXPONENT_MIN         (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
@@ -91,7 +83,7 @@
 #define HALF_EXPONENT_INF_OR_NAN  (HALF_EXPONENT_BIAS+1)  //  16 Unbiased
 
 
-// ------ Single-Precision --------
+/* ------ Single-Precision -------- */
 #define SINGLE_NUM_SIGNIFICAND_BITS (23)
 #define SINGLE_NUM_EXPONENT_BITS    (8)
 #define SINGLE_NUM_SIGN_BITS        (1)
@@ -106,19 +98,19 @@
 #define SINGLE_QUIET_NAN_BIT        (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
 
 /* Biased  Biased   Unbiased  Use
-    0x0000     0     -127      0 and subnormal
-    0x0001     1     -126      Smallest normal exponent
-    0x7f     127        0      1
-    0xfe     254      127      Largest normal exponent
-    0xff     255      128      NaN and Infinity  */
+ *  0x0000     0     -127      0 and subnormal
+ *  0x0001     1     -126      Smallest normal exponent
+ *  0x7f     127        0      1
+ *  0xfe     254      127      Largest normal exponent
+ *  0xff     255      128      NaN and Infinity  */
 #define SINGLE_EXPONENT_BIAS        (127)
-#define SINGLE_EXPONENT_MAX         (SINGLE_EXPONENT_BIAS)    //  127 unbiased
-#define SINGLE_EXPONENT_MIN         (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
-#define SINGLE_EXPONENT_ZERO        (-SINGLE_EXPONENT_BIAS)   // -127 unbiased
-#define SINGLE_EXPONENT_INF_OR_NAN  (SINGLE_EXPONENT_BIAS+1)  //  128 unbiased
+#define SINGLE_EXPONENT_MAX         (SINGLE_EXPONENT_BIAS)
+#define SINGLE_EXPONENT_MIN         (-SINGLE_EXPONENT_BIAS+1)
+#define SINGLE_EXPONENT_ZERO        (-SINGLE_EXPONENT_BIAS)
+#define SINGLE_EXPONENT_INF_OR_NAN  (SINGLE_EXPONENT_BIAS+1)
 
 
-// --------- Double-Precision ----------
+/* --------- Double-Precision ---------- */
 #define DOUBLE_NUM_SIGNIFICAND_BITS (52)
 #define DOUBLE_NUM_EXPONENT_BITS    (11)
 #define DOUBLE_NUM_SIGN_BITS        (1)
@@ -134,372 +126,518 @@
 
 
 /* Biased      Biased   Unbiased  Use
-   0x00000000     0     -1023     0 and subnormal
-   0x00000001     1     -1022     Smallest normal exponent
-   0x000007fe  2046      1023     Largest normal exponent
-   0x000007ff  2047      1024     NaN and Infinity  */
+ * 0x00000000     0     -1023     0 and subnormal
+ * 0x00000001     1     -1022     Smallest normal exponent
+ * 0x000007fe  2046      1023     Largest normal exponent
+ * 0x000007ff  2047      1024     NaN and Infinity  */
 #define DOUBLE_EXPONENT_BIAS        (1023)
-#define DOUBLE_EXPONENT_MAX         (DOUBLE_EXPONENT_BIAS)    // unbiased
-#define DOUBLE_EXPONENT_MIN         (-DOUBLE_EXPONENT_BIAS+1) // unbiased
-#define DOUBLE_EXPONENT_ZERO        (-DOUBLE_EXPONENT_BIAS)   // unbiased
-#define DOUBLE_EXPONENT_INF_OR_NAN  (DOUBLE_EXPONENT_BIAS+1)  // unbiased
+#define DOUBLE_EXPONENT_MAX         (DOUBLE_EXPONENT_BIAS)
+#define DOUBLE_EXPONENT_MIN         (-DOUBLE_EXPONENT_BIAS+1)
+#define DOUBLE_EXPONENT_ZERO        (-DOUBLE_EXPONENT_BIAS)
+#define DOUBLE_EXPONENT_INF_OR_NAN  (DOUBLE_EXPONENT_BIAS+1)
+
 
 
 
 /*
- Convenient functions to avoid type punning, compiler warnings and
- such. The optimizer reduces them to a simple assignment.  This is a
- crusty corner of C. It shouldn't be this hard.
-
- These are also in UsefulBuf.h under a different name. They are copied
- here to avoid a dependency on UsefulBuf.h. There is no object code
- size impact because these always optimze down to a simple assignment.
+ * Convenient functions to avoid type punning, compiler warnings and
+ * such. The optimizer reduces them to a simple assignment. This is a
+ * crusty corner of C. It shouldn't be this hard.
+ *
+ * These are also in UsefulBuf.h under a different name. They are copied
+ * here to avoid a dependency on UsefulBuf.h. There is no object code
+ * size impact because these always optimze down to a simple assignment.
  */
-static inline uint32_t CopyFloatToUint32(float f)
+static inline uint32_t
+CopyFloatToUint32(float f)
 {
-    uint32_t u32;
-    memcpy(&u32, &f, sizeof(uint32_t));
-    return u32;
+   uint32_t u32;
+   memcpy(&u32, &f, sizeof(uint32_t));
+   return u32;
 }
 
-static inline uint64_t CopyDoubleToUint64(double d)
+static inline uint64_t
+CopyDoubleToUint64(double d)
 {
-    uint64_t u64;
-    memcpy(&u64, &d, sizeof(uint64_t));
-    return u64;
+   uint64_t u64;
+   memcpy(&u64, &d, sizeof(uint64_t));
+   return u64;
 }
 
-static inline double CopyUint64ToDouble(uint64_t u64)
+static inline double
+CopyUint64ToDouble(uint64_t u64)
 {
-    double d;
-    memcpy(&d, &u64, sizeof(uint64_t));
-    return d;
+   double d;
+   memcpy(&d, &u64, sizeof(uint64_t));
+   return d;
+}
+
+static inline float
+CopyUint32ToSingle(uint32_t u32)
+{
+   float f;
+   memcpy(&f, &u32, sizeof(uint32_t));
+   return f;
 }
 
 
-// Public function; see ieee754.h
-uint16_t IEEE754_FloatToHalf(float f)
-{
-    // Pull the three parts out of the single-precision float
-    const uint32_t uSingle = CopyFloatToUint32(f);
-    const int32_t  nSingleUnbiasedExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
-    const uint32_t uSingleSign             = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
-    const uint32_t uSingleSignificand      = uSingle & SINGLE_SIGNIFICAND_MASK;
 
 
-    // Now convert the three parts to half-precision.
-
-    // All works is done on uint32_t with conversion to uint16_t at
-    // the end.  This avoids integer promotions that static analyzers
-    // complain about and reduces code size.
-    uint32_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
-
-    if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
-        // +/- Infinity and NaNs -- single biased exponent is 0xff
-        uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
-        if(!uSingleSignificand) {
-            // Infinity
-            uHalfSignificand = 0;
-        } else {
-            // Copy the LSBs of the NaN payload that will fit from the
-            // single to the half
-            uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
-            if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
-                // It's a qNaN; copy the qNaN bit
-                uHalfSignificand |= HALF_QUIET_NAN_BIT;
-            } else {
-                // It's an sNaN; make sure the significand is not zero
-                // so it stays a NaN This is needed because not all
-                // significand bits are copied from single
-                if(!uHalfSignificand) {
-                    // Set the LSB. This is what wikipedia shows for
-                    // sNAN.
-                    uHalfSignificand |= 0x01;
-                }
-            }
-        }
-    } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
-        // 0 or a subnormal number -- singled biased exponent is 0
-        uHalfBiasedExponent = 0;
-        uHalfSignificand    = 0; // Any subnormal single will be too small to express as a half precision
-    } else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
-        // Exponent is too large to express in half-precision; round
-        // up to infinity
-        uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
-        uHalfSignificand    = 0;
-    } else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
-        // Exponent is too small to express in half-precision normal;
-        // make it a half-precision subnormal
-        uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
-        uHalfSignificand    = 0;
-        // Could convert some of these values to a half-precision
-        // subnormal, but the layer above this will never use it. See
-        // layer above.  There is code to do this in github history
-        // for this file, but it was removed because it was never
-        // invoked.
-    } else {
-        // The normal case, exponent is in range for half-precision
-        uHalfBiasedExponent = (uint32_t)(nSingleUnbiasedExponent + HALF_EXPONENT_BIAS);
-        uHalfSignificand    = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
-    }
-    uHalfSign = uSingleSign;
-
-    // Put the 3 values in the right place for a half precision
-    const uint32_t uHalfPrecision =  uHalfSignificand |
-                                    (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
-                                    (uHalfSign << HALF_SIGN_SHIFT);
-    // Cast is safe because all the masks and shifts above work to
-    // make a half precision value which is only 16 bits.
-    return (uint16_t)uHalfPrecision;
-}
-
-
-// Public function; see ieee754.h
-uint16_t IEEE754_DoubleToHalf(double d)
-{
-    // Pull the three parts out of the double-precision float
-    const uint64_t uDouble = CopyDoubleToUint64(d);
-    const int64_t  nDoubleUnbiasedExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
-    const uint64_t uDoubleSign             = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
-    const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;
-
-    // Now convert the three parts to half-precision.
-
-    // All works is done on uint64_t with conversion to uint16_t at
-    // the end.  This avoids integer promotions that static analyzers
-    // complain about.  Other options are for these to be unsigned int
-    // or fast_int16_t. Code size doesn't vary much between all these
-    // options for 64-bit LLVM, 64-bit GCC and 32-bit Armv7 LLVM.
-    uint64_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
-
-    if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
-        // +/- Infinity and NaNs -- single biased exponent is 0xff
-        uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
-        if(!uDoubleSignificand) {
-            // Infinity
-            uHalfSignificand = 0;
-        } else {
-            // Copy the LSBs of the NaN payload that will fit from the
-            // double to the half
-            uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
-            if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
-                // It's a qNaN; copy the qNaN bit
-                uHalfSignificand |= HALF_QUIET_NAN_BIT;
-            } else {
-                // It's an sNaN; make sure the significand is not zero
-                // so it stays a NaN This is needed because not all
-                // significand bits are copied from single
-                if(!uHalfSignificand) {
-                    // Set the LSB. This is what wikipedia shows for
-                    // sNAN.
-                    uHalfSignificand |= 0x01;
-                }
-            }
-        }
-    } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
-        // 0 or a subnormal number -- double biased exponent is 0
-        uHalfBiasedExponent = 0;
-        uHalfSignificand    = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
-    } else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
-        // Exponent is too large to express in half-precision; round
-        // up to infinity; TODO, is this really true?
-        uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
-        uHalfSignificand    = 0;
-    } else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
-        // Exponent is too small to express in half-precision; round
-        // down to zero
-        uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
-        uHalfSignificand = 0;
-        // Could convert some of these values to a half-precision
-        // subnormal, but the layer above this will never use it. See
-        // layer above.  There is code to do this in github history
-        // for this file, but it was removed because it was never
-        // invoked.
-    } else {
-        // The normal case, exponent is in range for half-precision
-        uHalfBiasedExponent = (uint32_t)(nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS);
-        uHalfSignificand    = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
-    }
-    uHalfSign = uDoubleSign;
-
-
-    // Put the 3 values in the right place for a half precision
-    const uint64_t uHalfPrecision =  uHalfSignificand |
-                                    (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
-                                    (uHalfSign << HALF_SIGN_SHIFT);
-    // Cast is safe because all the masks and shifts above work to
-    // make a half precision value which is only 16 bits.
-    return (uint16_t)uHalfPrecision;
-}
-
-
-/*
-  EEE754_HalfToFloat() was created but is not needed. It can be retrieved from
-  github history if needed.
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uDoubleSign              0 if positive, 1 if negative
+ * @pararm[in] uDoubleSignificand      Bits of the significand
+ * @param[in] nDoubleUnBiasedExponent  Exponent
+ *
+ * This returns the bits for a single-precision float, a binary64
+ * as specified in IEEE754.
  */
-
-
-// Public function; see ieee754.h
-double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
+static double
+IEEE754_AssembleDouble(uint64_t uDoubleSign,
+                       uint64_t uDoubleSignificand,
+                       int64_t  nDoubleUnBiasedExponent)
 {
-    // Pull out the three parts of the half-precision float.  Do all
-    // the work in 64 bits because that is what the end result is.  It
-    // may give smaller code size and will keep static analyzers
-    // happier.
-    const uint64_t uHalfSignificand      = uHalfPrecision & HALF_SIGNIFICAND_MASK;
-    const int64_t  nHalfUnBiasedExponent = (int64_t)((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
-    const uint64_t uHalfSign             = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
+   uint64_t uDoubleBiasedExponent;
+
+   uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
+
+   return CopyUint64ToDouble(uDoubleSignificand |
+                             (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
+                             (uDoubleSign << DOUBLE_SIGN_SHIFT));
+}
 
 
-    // Make the three parts of hte single-precision number
-    uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
-    if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
-        // 0 or subnormal
-        uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
-        if(uHalfSignificand) {
-            // Subnormal case
-            uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
-            // A half-precision subnormal can always be converted to a
-            // normal double-precision float because the ranges line
-            // up
-            uDoubleSignificand = uHalfSignificand;
-            // Shift bits from right of the decimal to left, reducing
-            // the exponent by 1 each time
-            do {
-                uDoubleSignificand <<= 1;
-                uDoubleBiasedExponent--;
-            } while ((uDoubleSignificand & 0x400) == 0);
-            uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
-            uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+double
+IEEE754_HalfToDouble(uint16_t uHalfPrecision)
+{
+   uint64_t uDoubleSignificand;
+   int64_t  nDoubleUnBiasedExponent;
+   double   dResult;
+
+   /* Pull out the three parts of the half-precision float.  Do all
+    * the work in 64 bits because that is what the end result is.  It
+    * may give smaller code size and will keep static analyzers
+    * happier.
+    */
+   const uint64_t uHalfSignificand      = uHalfPrecision & HALF_SIGNIFICAND_MASK;
+   const uint64_t uHalfBiasedExponent   = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
+   const int64_t  nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
+   const uint64_t uHalfSign             = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
+
+   if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
+      /* 0 or subnormal */
+      if(uHalfSignificand) {
+         /* --- SUBNORMAL --- */
+         /* A half-precision subnormal can always be converted to a
+          * normal double-precision float because the ranges line up.
+          * The exponent of a subnormal starts out at the min exponent
+          * for a normal. As the sub normal significand bits are
+          * shifted, left to normalize, the exponent is
+          * decremented. Shifting continues until fully normalized.
+          */
+          nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
+          uDoubleSignificand      = uHalfSignificand;
+          do {
+             uDoubleSignificand <<= 1;
+             nDoubleUnBiasedExponent--;
+          } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
+          /* A normal has an implied 1 in the most significant
+           * position that a subnormal doesn't. */
+          uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
+          /* Must shift into place for a double significand */
+          uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
+
+          dResult = IEEE754_AssembleDouble(uHalfSign,
+                                           uDoubleSignificand,
+                                           nDoubleUnBiasedExponent);
+      } else {
+         /* --- ZERO --- */
+         dResult = IEEE754_AssembleDouble(uHalfSign,
+                                          0,
+                                          DOUBLE_EXPONENT_ZERO);
+      }
+   } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
+      /* NaN or Inifinity */
+      if(uHalfSignificand) {
+         /* --- NaN --- */
+         /* Half-precision payloads always fit into double precision
+          * payloads. They are shifted left the same as a normal
+          * number significand.
+          */
+         uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+         dResult = IEEE754_AssembleDouble(uHalfSign,
+                                          uDoubleSignificand,
+                                          DOUBLE_EXPONENT_INF_OR_NAN);
+      } else {
+         /* --- INFINITY --- */
+         dResult = IEEE754_AssembleDouble(uHalfSign,
+                                          0,
+                                          DOUBLE_EXPONENT_INF_OR_NAN);
+      }
+   } else {
+      /* --- NORMAL NUMBER --- */
+      uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+      dResult = IEEE754_AssembleDouble(uHalfSign,
+                                       uDoubleSignificand,
+                                       nHalfUnBiasedExponent);
+   }
+
+   return dResult;
+}
+
+
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uHalfSign              0 if positive, 1 if negative
+ * @pararm[in] uHalfSignificand      Bits of the significand
+ * @param[in] nHalfUnBiasedExponent  Exponent
+ *
+ * This returns the bits for a single-precision float, a binary32 as
+ * specified in IEEE754. It is returned as a uint64_t rather than a
+ * uint32_t or a float for convenience of usage.
+ */
+static uint32_t
+IEEE754_AssembleHalf(uint32_t uHalfSign,
+                     uint32_t uHalfSignificand,
+                     int32_t nHalfUnBiasedExponent)
+{
+   uint32_t uHalfUnbiasedExponent;
+
+   uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);
+
+   return uHalfSignificand |
+          (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
+          (uHalfSign << HALF_SIGN_SHIFT);
+}
+
+
+/*  Public function; see ieee754.h */
+IEEE754_union
+IEEE754_SingleToHalf(float f)
+{
+   IEEE754_union result;
+   uint32_t      uDroppedBits;
+   int32_t       nExponentDifference;
+   int32_t       nShiftAmount;
+   uint32_t      uHalfSignificand;
+
+   /* Pull the three parts out of the double-precision float Most work
+    * is done with uint32_t which helps avoid integer promotions and
+    * static analyzer complaints.
+    */
+   const uint32_t uSingle                 = CopyFloatToUint32(f);
+   const uint32_t uSingleBiasedExponent   = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
+   const int32_t  nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
+   const uint32_t uSingleSignificand      = uSingle & SINGLE_SIGNIFICAND_MASK;
+   const uint32_t uSingleSign             = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
+
+   if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
+      if(uSingleSignificand == 0) {
+         /* --- IS ZERO --- */
+         result.uSize  = IEEE754_UNION_IS_HALF;
+         result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                              0,
+                                              HALF_EXPONENT_ZERO);
+      } else {
+         /* --- IS SINGLE SUBNORMAL --- */
+         /* The largest single subnormal is slightly less than the
+          * largest single normal which is 2^-149 or
+          * 2.2040517676619426e-38.  The smallest half subnormal is
+          * 2^-14 or 5.9604644775390625E-8.  There is no overlap so
+          * single subnormals can't be converted to halfs of any sort.
+          */
+         result.uSize   = IEEE754_UNION_IS_SINGLE;
+         result.uValue  = uSingle;
+      }
+   } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
+      if(uSingleSignificand == 0) {
+         /* ---- IS INFINITY ---- */
+         result.uSize  = IEEE754_UNION_IS_HALF;
+         result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
+      } else {
+         /* The NaN can only be converted if no payload bits are lost
+          * per RFC 8949 section 4.1 that defines Preferred
+          * Serializaton. Note that Deterministically Encode CBOR in
+          * section 4.2 allows for some variation of this rule, but at
+          * the moment this implementation is of Preferred
+          * Serialization, not CDE. As of December 2023, we are also
+          * expecting an update to CDE. This code may need to be
+          * updated for CDE.
+          */
+         uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
+         if(uDroppedBits == 0) {
+            /* --- IS CONVERTABLE NAN --- */
+            uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+            result.uSize  = IEEE754_UNION_IS_HALF;
+            result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                                 uHalfSignificand,
+                                                 HALF_EXPONENT_INF_OR_NAN);
+
+         } else {
+            /* --- IS UNCONVERTABLE NAN --- */
+            result.uSize   = IEEE754_UNION_IS_SINGLE;
+            result.uValue  = uSingle;
+         }
+      }
+   } else {
+      /* ---- REGULAR NUMBER ---- */
+      /* A regular single can be converted to a regular half if the
+       * single's exponent is in the smaller range of a half and if no
+       * precision is lost in the significand.
+       */
+      if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
+         nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
+        (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
+         uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+
+         /* --- CONVERT TO HALF NORMAL --- */
+         result.uSize  = IEEE754_UNION_IS_HALF;
+         result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                              uHalfSignificand,
+                                              nSingleUnbiasedExponent);
+      } else {
+         /* Unable to convert to a half normal. See if it can be
+          * converted to a half subnormal. To do that, the exponent
+          * must be in range and no precision can be lost in the
+          * signficand.
+          *
+          * This is more complicated because the number is not
+          * normalized.  The signficand must be shifted proprotionally
+          * to the exponent and 1 must be added in.  See
+          * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+          *
+          * Exponents -14 to -24 map to a shift of 0 to 10 of the
+          * significand.  The largest value of a half subnormal has an
+          * exponent of -14. Subnormals are not normalized like
+          * normals meaning they lose precision as the numbers get
+          * smaller. Normals don't lose precision because the exponent
+          * allows all the bits of the significand to be significant.
+          */
+         /* The exponent of the largest possible half-precision
+          * subnormal is HALF_EXPONENT_MIN (-14).  Exponents larger
+          * than this are normal and handled above. We're going to
+          * shift the significand right by at least this amount.
+          */
+         nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
+
+         /* In addition to the shift based on the exponent's value,
+          * the single significand has to be shifted right to fit into
+          * a half-precision significand */
+         nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+
+         /* Must add 1 in to the possible significand because there is
+          * an implied 1 for normal values and not for subnormal
+          * values. See equations here:
+          * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+          */
+         uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
+
+         /* If only zero bits get shifted out, this can be converted
+          * to subnormal */
+         if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
+            nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
+            uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
+            /* --- CONVERTABLE TO HALF SUBNORMAL --- */
+            result.uSize  = IEEE754_UNION_IS_HALF;
+            result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                                 uHalfSignificand,
+                                                 HALF_EXPONENT_ZERO);
+         } else {
+            /* --- DO NOT CONVERT --- */
+            result.uSize   = IEEE754_UNION_IS_SINGLE;
+            result.uValue  = uSingle;
+         }
+      }
+   }
+
+   return result;
+}
+
+
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uSingleSign              0 if positive, 1 if negative
+ * @pararm[in] uSingleSignificand      Bits of the significand
+ * @param[in] nSingleUnBiasedExponent  Exponent
+ *
+ * This returns the bits for a single-precision float, a binary32 as
+ * specified in IEEE754. It is returned as a uint64_t rather than a
+ * uint32_t or a float for convenience of usage.
+ */
+static uint64_t
+IEEE754_AssembleSingle(uint64_t uSingleSign,
+                       uint64_t uSingleSignificand,
+                       int64_t  nSingleUnBiasedExponent)
+{
+   uint64_t uSingleBiasedExponent;
+
+   uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);
+
+   return uSingleSignificand |
+          (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
+          (uSingleSign << SINGLE_SIGN_SHIFT);
+}
+
+
+/**
+ * @brief Convert a double-precision float to single-precision.
+ *
+ * @param[in] d  The value to convert.
+ *
+ * @returns Either unconverted value or value converted to single-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
+ */
+static IEEE754_union
+IEEE754_DoubleToSingle(double d)
+{
+   IEEE754_union Result;
+   int64_t       nExponentDifference;
+   int64_t       nShiftAmount;
+   uint64_t      uSingleSignificand;
+   uint64_t      uDroppedBits;
+
+
+   /* Pull the three parts out of the double-precision float. Most
+    * work is done with uint64_t which helps avoid integer promotions
+    * and static analyzer complaints.
+    */
+   const uint64_t uDouble                 = CopyDoubleToUint64(d);
+   const uint64_t uDoubleBiasedExponent   = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
+   const int64_t  nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
+   const uint64_t uDoubleSign             = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
+   const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;
+
+
+    if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
+        if(uDoubleSignificand == 0) {
+            /* --- IS ZERO --- */
+            Result.uSize  = IEEE754_UNION_IS_SINGLE;
+            Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                   0,
+                                                   SINGLE_EXPONENT_ZERO);
         } else {
-            // Just zero
-            uDoubleSignificand = 0;
-        }
-    } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
-        // NaN or Inifinity
-        uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
-        if(uHalfSignificand) {
-            // NaN
-            // First preserve the NaN payload from half to single
-            uDoubleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
-            if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
-                // Next, set qNaN if needed since half qNaN bit is not
-                // copied above
-                uDoubleSignificand |= DOUBLE_QUIET_NAN_BIT;
+            /* --- IS DOUBLE SUBNORMAL --- */
+            /* The largest double subnormal is slightly less than the
+             * largest double normal which is 2^-1022 or
+             * 2.2250738585072014e-308.  The smallest single subnormal
+             * is 2^-149 or 1.401298464324817e-45.  There is no
+             * overlap so double subnormals can't be converted to
+             * singles of any sort.
+             */
+            Result.uSize   = IEEE754_UNION_IS_DOUBLE;
+            Result.uValue  = uDouble;
+         }
+    } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
+         if(uDoubleSignificand == 0) {
+             /* ---- IS INFINITY ---- */
+             Result.uSize  = IEEE754_UNION_IS_SINGLE;
+             Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                    0,
+                                                    SINGLE_EXPONENT_INF_OR_NAN);
+         } else {
+             /* The NaN can only be converted if no payload bits are
+              * lost per RFC 8949 section 4.1 that defines Preferred
+              * Serializaton. Note that Deterministically Encode CBOR
+              * in section 4.2 allows for some variation of this rule,
+              * but at the moment this implementation is of Preferred
+              * Serialization, not CDE. As of December 2023, we are
+              * also expecting an update to CDE. This code may need to
+              * be updated for CDE.
+              */
+             uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
+             if(uDroppedBits == 0) {
+                /* --- IS CONVERTABLE NAN --- */
+                uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+                Result.uSize  = IEEE754_UNION_IS_SINGLE;
+                Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                       uSingleSignificand,
+                                                       SINGLE_EXPONENT_INF_OR_NAN);
+            } else {
+               /* --- IS UNCONVERTABLE NAN --- */
+               Result.uSize   = IEEE754_UNION_IS_DOUBLE;
+               Result.uValue  = uDouble;
             }
+         }
+    } else {
+        /* ---- REGULAR NUMBER ---- */
+        /* A regular double can be converted to a regular single if
+         * the double's exponent is in the smaller range of a single
+         * and if no precision is lost in the significand.
+         */
+        uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
+        if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
+           nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
+           uDroppedBits == 0) {
+            /* --- IS CONVERTABLE TO SINGLE --- */
+            uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+            Result.uSize  = IEEE754_UNION_IS_SINGLE;
+            Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                   uSingleSignificand,
+                                                   nDoubleUnbiasedExponent);
         } else {
-            // Infinity
-            uDoubleSignificand = 0;
+            /* Unable to convert to a single normal. See if it can be
+             * converted to a single subnormal. To do that, the
+             * exponent must be in range and no precision can be lost
+             * in the signficand.
+             *
+             * This is more complicated because the number is not
+             * normalized.  The signficand must be shifted
+             * proprotionally to the exponent and 1 must be added
+             * in. See
+             * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+             */
+            nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
+            nShiftAmount        = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+            uSingleSignificand  = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
+
+            if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
+               nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
+               uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
+               /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
+               Result.uSize  = IEEE754_UNION_IS_SINGLE;
+               Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                      uSingleSignificand,
+                                                      SINGLE_EXPONENT_ZERO);
+            } else {
+               /* --- CAN NOT BE CONVERTED --- */
+               Result.uSize   = IEEE754_UNION_IS_DOUBLE;
+               Result.uValue  = uDouble;
+            }
         }
-    } else {
-        // Normal number
-        uDoubleBiasedExponent = (uint64_t)(nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
-        uDoubleSignificand    = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
     }
-    uDoubleSign = uHalfSign;
 
-
-    // Shift the 3 parts into place as a double-precision
-    const uint64_t uDouble = uDoubleSignificand |
-                            (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
-                            (uDoubleSign << DOUBLE_SIGN_SHIFT);
-    return CopyUint64ToDouble(uDouble);
+    return Result;
 }
 
 
-
-/*
- IEEE754_FloatToDouble(uint32_t uFloat) was created but is not needed. It can be retrieved from
-github history if needed.
-*/
-
-
-
-// Public function; see ieee754.h
-IEEE754_union IEEE754_FloatToSmallest(float f)
+/* Public function; see ieee754.h */
+IEEE754_union
+IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision)
 {
-    IEEE754_union result;
+   IEEE754_union result;
 
-    // Pull the neeed two parts out of the single-precision float
-    const uint32_t uSingle = CopyFloatToUint32(f);
-    const int32_t  nSingleExponent    = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
-    const uint32_t uSingleSignificand =   uSingle & SINGLE_SIGNIFICAND_MASK;
+   result = IEEE754_DoubleToSingle(d);
 
-    // Bit mask that is the significand bits that would be lost when
-    // converting from single-precision to half-precision
-    const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
+   if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
+      /* Cast to uint32_t is OK, because value was just successfully
+       * converted to single. */
+      float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
+      result = IEEE754_SingleToHalf(uSingle);
+   }
 
-    // Optimizer will re organize so there is only one call to
-    // IEEE754_FloatToHalf() in the final code.
-    if(uSingle == 0) {
-        // Value is 0.0000, not a a subnormal
-        result.uSize = IEEE754_UNION_IS_HALF;
-        result.uValue  = IEEE754_FloatToHalf(f);
-    } else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
-        // NaN, +/- infinity
-        result.uSize = IEEE754_UNION_IS_HALF;
-        result.uValue  = IEEE754_FloatToHalf(f);
-    } else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
-        // Normal number in exponent range and precision won't be lost
-        result.uSize = IEEE754_UNION_IS_HALF;
-        result.uValue  = IEEE754_FloatToHalf(f);
-    } else {
-        // Subnormal, exponent out of range, or precision will be lost
-        result.uSize = IEEE754_UNION_IS_SINGLE;
-        result.uValue  = uSingle;
-    }
-
-    return result;
+   return result;
 }
 
-// Public function; see ieee754.h
-IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
-{
-    IEEE754_union result;
 
-    // Pull the needed two parts out of the double-precision float
-    const uint64_t uDouble = CopyDoubleToUint64(d);
-    const int64_t  nDoubleExponent     = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
-    const uint64_t uDoubleSignificand  = uDouble & DOUBLE_SIGNIFICAND_MASK;
+#else /* QCBOR_DISABLE_PREFERRED_FLOAT */
 
-    // Masks to check whether dropped significand bits are zero or not
-    const uint64_t uDroppedHalfBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
-    const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
-
-    // This will not convert to half-precion or single-precision
-    // subnormals.  Values that could be converted will be output as
-    // the double they are or occasionally to a normal single.  This
-    // could be implemented, but it is more code and would rarely be
-    // used and rarely reduce the output size.
-
-    // The various cases
-    if(d == 0.0) { // Take care of positive and negative zero
-        // Value is 0.0000, not a a subnormal
-        result.uSize  = IEEE754_UNION_IS_HALF;
-        result.uValue = IEEE754_DoubleToHalf(d);
-    } else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
-        // NaN, +/- infinity
-        result.uSize  = IEEE754_UNION_IS_HALF;
-        result.uValue = IEEE754_DoubleToHalf(d);
-    } else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedHalfBits))) {
-        // Can convert to half without precision loss
-        result.uSize  = IEEE754_UNION_IS_HALF;
-        result.uValue = IEEE754_DoubleToHalf(d);
-    } else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
-        // Can convert to single without precision loss
-        result.uSize  = IEEE754_UNION_IS_SINGLE;
-        result.uValue = CopyFloatToUint32((float)d);
-    } else {
-        // Can't convert without precision loss
-        result.uSize  = IEEE754_UNION_IS_DOUBLE;
-        result.uValue = uDouble;
-    }
-
-    return result;
-}
-
-#else
-
-int x;
+int ieee754_dummy_place_holder;
 
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */

diff --git a/src/ieee754.h b/src/ieee754.h
index d37532a..863019b 100644
--- a/src/ieee754.h
+++ b/src/ieee754.h

@@ -1,14 +1,14 @@
-/*==============================================================================
- ieee754.c -- floating-point conversion between half, double & single-precision
-
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
-
- SPDX-License-Identifier: BSD-3-Clause
-
- See BSD-3-Clause license in README.md
-
- Created on 7/23/18
- =============================================================================*/
+/* ==========================================================================
+ * ieee754.h -- Conversion between half, double & single-precision floats
+ *
+ * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * See BSD-3-Clause license in README.md
+ *
+ * Created on 7/23/18
+ * ========================================================================== */
 
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
 
@@ -18,130 +18,109 @@
 #include <stdint.h>
 
 
-
-/*
- General comments
-
- This is a complete in that it handles all conversion cases including
- +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN and NaN
- payloads.
-
- This conforms to IEEE 754-2008, but note that this doesn't specify
- conversions, just the encodings.
-
- NaN payloads are preserved with alignment on the LSB. The qNaN bit is
- handled differently and explicity copied. It is always the MSB of the
- significand. The NaN payload MSBs (except the qNaN bit) are truncated
- when going from double or single to half.
-
- TODO: what does the C cast do with NaN payloads from
- double to single? It probably depends entirely on the
- CPU.
-
- */
-
-/*
- Most simply just explicilty encode the type you want, single or
- double.  This works easily everywhere since standard C supports both
- these types and so does qcbor.  This encoder also supports half
- precision and there's a few ways to use it to encode floating-point
- numbers in less space.
-
- Without losing precision, you can encode a single or double such that
- the special values of 0, NaN and Infinity encode as half-precision.
- This CBOR decodoer and most others should handle this properly.
-
- If you don't mind losing precision, then you can use half-precision.
- One way to do this is to set up your environment to use
- ___fp_16. Some compilers and CPUs support it even though it is not
- standard C. What is nice about this is that your program will use
- less memory and floating-point operations like multiplying, adding
- and such will be faster.
-
- Another way to make use of half-precision is to represent the values
- in your program as single or double, but encode them in CBOR as
- half-precision. This cuts the size of the encoded messages by 2 or 4,
- but doesn't reduce memory needs or speed because you are still using
- single or double in your code.
-
+/** @file ieee754.h
+ *
+ * This implements floating-point conversion between half, single and
+ * double precision floating-point numbers, in particular convesion to
+ * smaller representation (e.g., double to single) that does not lose
+ * precision for CBOR preferred serialization.
+ *
+ * This implementation works entirely with shifts and masks and does
+ * not require any floating-point HW or library.
+ *
+ * This conforms to IEEE 754-2008, but note that it doesn't specify
+ * conversions, just the encodings.
+ *
+ * This is complete, supporting +/- infinity, +/- zero, subnormals and
+ * NaN payloads. NaN payloads are converted to smaller by dropping the
+ * right most bits if they are zero and shifting to the right. If the
+ * rightmost bits are not zero the conversion is not performed. When
+ * converting from smaller to larger, the payload is shifted left and
+ * zero-padded. This is what is specified by CBOR preferred
+ * serialization and what modern HW conversion instructions do. CBOR
+ * CDE handling for NaN is not clearly specified, but upcoming
+ * documents may clarify this.
+ *
+ * There is no special handling of silent and quiet NaNs. It probably
+ * isn't necessary to transmit these special NaNs as there purpose is
+ * more for propgating errors up through some calculation. In many
+ * cases the handlng of the NaN payload will work for silent and quiet
+ * NaNs.
+ *
+ * A previous version of this was usable as a general library for
+ * conversion. This version is reduced to what is needed for CBOR.
  */
 
 
-
-/*
- Convert single-precision float to half-precision float.  Precision
- and NaN payload bits will be lost. Too-large values will round up to
- infinity and too small to zero.
+/**
+ * @brief Convert half-precision float to double-precision float.
+ *
+ * @param[in] uHalfPrecision   Half-prevision number to convert.
+ *
+ * @returns double-presion value.
+ *
+ * This is a lossless conversion because every half-precision value
+ * can be represented as a double. There is no error condition.
+ *
+ * There is no half-precision type in C, so it is represented here as
+ * a @c uint16_t. The bits of @c uHalfPrecision are as described for
+ * half-precision by IEEE 754.
  */
-uint16_t IEEE754_FloatToHalf(float f);
+double
+IEEE754_HalfToDouble(uint16_t uHalfPrecision);
 
 
-/*
- Convert double-precision float to half-precision float.  Precision
- and NaN payload bits will be lost. Too-large values will round up to
- infinity and too small to zero.
+/** Holds a floating-point value that could be half, single or
+ * double-precision.  The value is in a @c uint64_t that may be copied
+ * to a float or double.  Simply casting uValue will usually work but
+ * may generate compiler or static analyzer warnings. Using
+ * UsefulBufUtil_CopyUint64ToDouble() or
+ * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
+ * any extra code).
  */
-uint16_t IEEE754_DoubleToHalf(double d);
-
-
-/*
- Convert half-precision float to double-precision float.
- This is a loss-less conversion.
- */
-double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
-
-
-// Both tags the value and gives the size
-#define IEEE754_UNION_IS_HALF   2
-#define IEEE754_UNION_IS_SINGLE 4
-#define IEEE754_UNION_IS_DOUBLE 8
-
 typedef struct {
-    uint8_t uSize;  // One of IEEE754_IS_xxxx
-    uint64_t uValue;
+   enum {IEEE754_UNION_IS_HALF   = 2,
+         IEEE754_UNION_IS_SINGLE = 4,
+         IEEE754_UNION_IS_DOUBLE = 8,
+   } uSize; /* Size of uValue */
+   uint64_t uValue;
 } IEEE754_union;
 
 
-/*
- Converts double-precision to single-precision or half-precision if
- possible without loss of precisions. If not, leaves it as a
- double. Only converts to single-precision unless bAllowHalfPrecision
- is set.
+/**
+ * @brief Convert a double to either single or half-precision.
+ *
+ * @param[in] d                    The value to convert.
+ * @param[in] bAllowHalfPrecision  If true, convert to either half or
+ *                                 single precision.
+ *
+ * @returns Unconverted value, or value converted to single or half-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
  */
-IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
+IEEE754_union
+IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);
 
-/*
- Converts double-precision to single-precision if possible without
- loss of precision. If not, leaves it as a double.
+
+/**
+ * @brief Convert a single-precision float to half-precision.
+ *
+ * @param[in] f  The value to convert.
+ *
+ * @returns Either unconverted value or value converted to half-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
  */
-static inline IEEE754_union IEEE754_DoubleToSmall(double d)
-{
-    return IEEE754_DoubleToSmallestInternal(d, 0);
-}
-
-
-/*
- Converts double-precision to single-precision or half-precision if
- possible without loss of precisions. If not, leaves it as a double.
- */
-static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
-{
-    return IEEE754_DoubleToSmallestInternal(d, 1);
-}
-
-
-/*
- Converts single-precision to half-precision if possible without loss
- of precision. If not leaves as single-precision.
- */
-IEEE754_union IEEE754_FloatToSmallest(float f);
+IEEE754_union
+IEEE754_SingleToHalf(float f);
 
 
 #endif /* ieee754_h */
 
-
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
-
-
-
-

diff --git a/src/qcbor_encode.c b/src/qcbor_encode.c
index 53df657..f52692a 100644
--- a/src/qcbor_encode.c
+++ b/src/qcbor_encode.c

@@ -768,9 +768,9 @@
 void QCBOREncode_AddDouble(QCBOREncodeContext *me, double dNum)
 {
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
-   const IEEE754_union uNum = IEEE754_DoubleToSmallest(dNum);
+   const IEEE754_union uNum = IEEE754_DoubleToSmaller(dNum, true);
 
-   QCBOREncode_AddType7(me, uNum.uSize, uNum.uValue);
+   QCBOREncode_AddType7(me, (uint8_t)uNum.uSize, uNum.uValue);
 #else /* QCBOR_DISABLE_PREFERRED_FLOAT */
    QCBOREncode_AddDoubleNoPreferred(me, dNum);
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
@@ -794,9 +794,9 @@
 void QCBOREncode_AddFloat(QCBOREncodeContext *me, float fNum)
 {
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
-   const IEEE754_union uNum = IEEE754_FloatToSmallest(fNum);
+   const IEEE754_union uNum = IEEE754_SingleToHalf(fNum);
 
-   QCBOREncode_AddType7(me, uNum.uSize, uNum.uValue);
+   QCBOREncode_AddType7(me, (uint8_t)uNum.uSize, uNum.uValue);
 #else /* QCBOR_DISABLE_PREFERRED_FLOAT */
    QCBOREncode_AddFloatNoPreferred(me, fNum);
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */

diff --git a/test/float_tests.c b/test/float_tests.c
index 2bf5fad..1a7ade1 100644
--- a/test/float_tests.c
+++ b/test/float_tests.c

@@ -1,32 +1,33 @@
-/*==============================================================================
- float_tests.c -- tests for float and conversion to/from half-precision
-
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
- Copyright (c) 2021, Arm Limited. All rights reserved.
-
- SPDX-License-Identifier: BSD-3-Clause
-
- See BSD-3-Clause license in README.md
-
- Created on 9/19/18
- =============================================================================*/
+/* ==========================================================================
+ * float_tests.c -- tests for float and conversion to/from half-precision
+ *
+ * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * See BSD-3-Clause license in README.md
+ *
+ * Created on 9/19/18
+ * ========================================================================= */
 
 
 #include "float_tests.h"
 #include "qcbor/qcbor_encode.h"
 #include "qcbor/qcbor_decode.h"
 #include "qcbor/qcbor_spiffy_decode.h"
-#include <math.h> // For INFINITY and NAN and isnan()
+#include <math.h> /* For INFINITY and NAN and isnan() */
 
 
-/* Make a test results code that includes three components
- * Return code is
- * xxxyyyzzz where zz is the error code, yy is the test number and zz is
- * check being performed
+
+/* Make a test results code that includes three components. Return code
+ * is xxxyyyzzz where zz is the error code, yy is the test number and
+ * zz is check being performed
  */
-static inline int32_t MakeTestResultCode(uint32_t   uTestCase,
-                                         uint32_t   uTestNumber,
-                                         QCBORError uErrorCode)
+static inline int32_t
+MakeTestResultCode(uint32_t   uTestCase,
+                   uint32_t   uTestNumber,
+                   QCBORError uErrorCode)
 {
    uint32_t uCode = (uTestCase * 1000000) +
                     (uTestNumber * 1000) +
@@ -40,585 +41,567 @@
 #include "half_to_double_from_rfc7049.h"
 
 
-/*
- Half-precision values that are input to test half-precision decoding
+struct DoubleTestCase {
+   double      dNumber;
+   double      fNumber;
+   UsefulBufC  Preferred;
+   UsefulBufC  NotPreferred;
+   UsefulBufC  CDE;
+   UsefulBufC  DCBOR;
+};
 
- As decoded by http://cbor.me
- {"zero": 0.0,
- "infinitity": Infinity,
- "negative infinitity": -Infinity,
- "NaN": NaN,
- "one": 1.0,
- "one third": 0.333251953125,
- "largest half-precision": 65504.0,
- "too-large half-precision": Infinity,
- "smallest subnormal": 5.960464477539063e-8,
- "smallest normal": 0.00006097555160522461,
- "biggest subnormal": 0.00006103515625,
- "subnormal single": 0.0,
- 3: -2.0,
- 4: NaN,
- 5: NaN,
- 6: NaN,
- 7: NaN}
+/* Boundaries for all destination conversions to test at.
+ *
+ * smallest subnormal single  1.401298464324817e-45   2^^-149
+ * largest subnormal single   1.1754942106924411e-38  2^^-126
+ * smallest normal single     1.1754943508222875e-38
+ * largest single             3.4028234663852886E+38
+ *
+ * smallest subnormal half   5.9604644775390625E-8
+ * largest subnormal half    6.097555160522461E-5
+ * smallest normal half      6.103515625E-5
+ * largest half              65504.0
+ *
+ * Boundaries for origin conversions
+ * smallest subnormal double 5.0e-324  2^^-1074
+ * largest subnormal double
+ * smallest normal double 2.2250738585072014e-308  2^^-1022
+ * largest normal double 1.7976931348623157e308 2^^-1023
  */
-static const uint8_t spExpectedHalf[] = {
-    0xB1,
-        0x64,
-            0x7A, 0x65, 0x72, 0x6F,
-        0xF9, 0x00, 0x00, // half-precision 0.000
-        0x6A,
-            0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
-        0xF9, 0x7C, 0x00, // Infinity
-        0x73,
-            0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x69, 0x6E,
-            0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
-        0xF9, 0xFC, 0x00, // -Inifinity
-        0x63,
-            0x4E, 0x61, 0x4E,
-        0xF9, 0x7E, 0x00, // NaN
-        0x63,
-            0x6F, 0x6E, 0x65,
-        0xF9, 0x3C, 0x00, // 1.0
-        0x69,
-            0x6F, 0x6E, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64,
-        0xF9, 0x35, 0x55, // half-precsion one third 0.333251953125
-        0x76,
-            0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61, 0x6C,
-            0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
-        0xF9, 0x7B, 0xFF, // largest half-precision 65504.0
-        0x78, 0x18,
-            0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x20, 0x68,
-            0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69,
-            0x6F, 0x6E,
-        0xF9, 0x7C, 0x00, // Infinity
-        0x72,
-            0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75,
-            0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
-        0xF9, 0x00, 0x01, // Smallest half-precision subnormal 0.000000059604645
-        0x71,
-            0x62, 0x69, 0x67, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62,
-            0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
-        0xF9, 0x03, 0xFF, // Largest half-precision subnormal 0.0000609755516
-        0x6F,
-            0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E, 0x6F,
-            0x72, 0x6D, 0x61, 0x6C,
-        0xF9, 0x04, 0x00,  // Smallest half-precision normal 0.000061988
-        0x70,
-            0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x73,
-            0x69, 0x6E, 0x67, 0x6C, 0x65,
-        0xF9, 0x00, 0x00,
-        0x03,
-        0xF9, 0xC0, 0x00,    // -2
-        0x04,
-        0xF9, 0x7E, 0x00,    // qNaN
-        0x05,
-        0xF9, 0x7C, 0x01,    // sNaN
-        0x06,
-        0xF9, 0x7E, 0x0F,    // qNaN with payload 0x0f
-        0x07,
-        0xF9, 0x7C, 0x0F,    // sNaN with payload 0x0f
+
+/* Always four lines per test case so shell scripts can process into
+ * other formats.  CDE and DCBOR standards are not complete yet,
+ * encodings are a guess.  C string literals are used because they
+ * are the shortest notation. They are used __with a length__ . Null
+ * termination doesn't work because * there are zero bytes.
+ */
+static const struct DoubleTestCase DoubleTestCases[] =  {
+   /* Zero */
+   {0.0,                                         0.0f,
+    {"\xF9\x00\x00", 3},                         {"\xFB\x00\x00\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x00\x00", 3},                         {"\xF9\x00\x00", 3}},
+
+   /* Negative Zero */
+   {-0.0,                                        -0.0f,
+    {"\xF9\x80\x00", 3},                         {"\xFB\x80\x00\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x80\x00", 3},                         {"\xF9\x80\x00", 3}},
+
+   /* NaN */
+   {NAN,                                         NAN,
+    {"\xF9\x7E\x00", 3},                         {"\xFB\x7F\xF8\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x7E\x00", 3},                         {"\xF9\x7E\x00", 3}},
+
+   /* Infinity */
+   {INFINITY,                                    INFINITY,
+    {"\xF9\x7C\x00", 3},                         {"\xFB\x7F\xF0\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x7C\x00", 3},                         {"\xF9\x7C\x00", 3}},
+
+   /* Negative Infinity */
+   {-INFINITY,                                   -INFINITY,
+    {"\xF9\xFC\x00", 3},                         {"\xFB\xFF\xF0\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\xFC\x00", 3},                         {"\xF9\xFC\x00", 3}},
+
+   /* 1.0 */
+   {1.0,                                         1.0f,
+    {"\xF9\x3C\x00", 3},                         {"\xFB\x3F\xF0\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x3C\x00", 3},                         {"\xF9\x3C\x00", 3}},
+
+   /* -2.0 -- a negative number that is not zero */
+   {-2.0,                                        -2.0f,
+    {"\xF9\xC0\x00", 3},                         {"\xFB\xC0\x00\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\xC0\x00", 3},                         {"\xF9\x3C\x00", 3}},
+
+   /* 1/3 */
+   {0.333251953125,                              0.333251953125f,
+    {"\xF9\x35\x55", 3},                         {"\xFB\x3F\xD5\x54\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x35\x55", 3},                         {"\xF9\x35\x55", 3}},
+
+   /* 5.9604644775390625E-8 -- smallest half-precision subnormal */
+   {5.9604644775390625E-8,                       0.0f,
+    {"\xF9\x00\x01", 3},                         {"\xFB\x3E\x70\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x00\x01", 3},                         {"\xF9\x00\x01", 3}},
+
+   /* 3.0517578125E-5 -- a half-precision subnormal */
+   {3.0517578125E-5,                             0.0f,
+    {"\xF9\x02\x00", 3},                         {"\xFB\x3F\x00\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x02\x00", 3},                         {"\xF9\x02\x00", 3}},
+
+   /* 6.097555160522461E-5 -- largest half-precision subnormal */
+   {6.097555160522461E-5,                        0.0f,
+    {"\xF9\x03\xFF", 3},                         {"\xFB\x3F\x0F\xF8\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x03\xFF", 3},                         {"\xF9\04\00", 3}},
+
+   /* 6.103515625E-5 -- smallest possible half-precision normal */
+   {6.103515625E-5,                              0.0f,
+    {"\xF9\04\00", 3},                           {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\04\00", 3},                           {"\xF9\04\00", 3}},
+
+   /* 6.1035156250000014E-5 -- slightly larger than smallest half-precision normal */
+   {6.1035156250000014E-5,                       6.1035156250000014E-5f,
+    {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x01", 9},
+    {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x3F\x10\x00\x00\x00\x00\x00\x01", 9}},
+
+   /* 6.1035156249999993E-5 -- slightly smaller than smallest half-precision normal */
+   {6.1035156249999993E-5,  0.0f,
+    {"\xFB\x3F\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x3F\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9},
+    {"\xFB\x3F\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x3F\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}},
+
+   /* 65504.0 -- largest possible half-precision */
+   {65504.0,                                     0.0f,
+    {"\xF9\x7B\xFF", 3},                         {"\xFB\x40\xEF\xFC\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x7B\xFF", 3},                         {"\xF9\x7B\xFF", 3}},
+
+   /* 65504.1 -- exponent too large and too much precision to convert */
+   {65504.1,                                     0.0f,
+    {"\xFB\x40\xEF\xFC\x03\x33\x33\x33\x33", 9}, {"\xFB\x40\xEF\xFC\x03\x33\x33\x33\x33", 9},
+    {"\xFB\x40\xEF\xFC\x03\x33\x33\x33\x33", 9}, {"\xFB\x40\xEF\xFC\x03\x33\x33\x33\x33", 9}},
+
+    /* 65536.0 -- exponent too large but not too much precision for single */
+   {65536.0,                                     65536.0f,
+    {"\xFA\x47\x80\x00\x00", 5},                 {"\xFB\x40\xF0\x00\x00\x00\x00\x00\x00", 9},
+    {"\xFA\x47\x80\x00\x00", 5},                 {"\xFA\x47\x80\x00\x00", 5}},
+
+   /* 1.401298464324817e-45 -- smallest single subnormal */
+   {1.401298464324817e-45,                       1.40129846E-45f,
+    {"\xFA\x00\x00\x00\x01", 5},                 {"\xFB\x36\xA0\x00\x00\x00\x00\x00\x00", 9},
+    {"\xFA\x00\x00\x00\x01", 5},                 {"\xFA\x00\x00\x00\x01", 5}},
+
+   /* 5.8774717541114375E-39 -- slightly smaller than the smallest
+    // single normal */
+   {5.8774717541114375E-39,                      5.87747175E-39f,
+    {"\xFA\x00\x40\x00\x00", 5},                 {"\xFB\x38\x00\x00\x00\x00\x00\x00\x00", 9},
+    {"\xFA\x00\x40\x00\x00", 5},                 {"\xFA\x00\x40\x00\x00", 5}},
+
+   /* 1.1754942106924411e-38 -- largest single subnormal */
+   {1.1754942106924411E-38,                      1.17549421E-38f,
+    {"\xFA\x00\x7f\xff\xff", 5},                 {"\xFB\x38\x0f\xff\xff\xC0\x00\x00\x00", 9},
+    {"\xFA\x00\x7f\xff\xff", 5},                 {"\xFA\x00\x7f\xff\xff", 5} },
+
+   /* 1.1754943508222874E-38 -- slightly bigger than smallest single normal */
+   {1.1754943508222874E-38,                      0.0f,
+    {"\xFB\x38\x0f\xff\xff\xff\xff\xff\xff", 9}, {"\xFB\x38\x0f\xff\xff\xff\xff\xff\xff", 9},
+    {"\xFB\x38\x0f\xff\xff\xff\xff\xff\xff", 9}, {"\xFB\x38\x0f\xff\xff\xff\xff\xff\xff", 9}},
+
+   /* 1.1754943508222875e-38 -- smallest single normal */
+   {1.1754943508222875e-38,                      1.17549435E-38f,
+    {"\xFA\x00\x80\x00\x00", 5},                 {"\xFB\x38\x10\x00\x00\x00\x00\x00\x00", 9},
+    {"\xFA\x00\x80\x00\x00", 5},                 {"\xFA\x00\x80\x00\x00", 5}},
+
+   /* 1.1754943508222875e-38 -- slightly bigger than smallest single normal */
+   {1.1754943508222878e-38,                      0.0f,
+    {"\xFB\x38\x10\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x38\x10\x00\x00\x00\x00\x00\x01", 9},
+    {"\xFB\x38\x10\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x38\x10\x00\x00\x00\x00\x00\x01", 9}},
+
+   /* 16777216 -- converts to single without loss */
+   {16777216,                                    16777216,
+    {"\xFA\x4B\x80\x00\x00", 5},                 {"\xFB\x41\x70\x00\x00\x00\x00\x00\x00", 9},
+    {"\xFA\x4B\x80\x00\x00", 5},                 {"\xFA\x4B\x80\x00\x00", 5}},
+
+   /* 16777217 -- one more than above and fails conversion to single */
+   {16777217,                                    16777216,
+    {"\xFB\x41\x70\x00\x00\x10\x00\x00\x00", 9}, {"\xFB\x41\x70\x00\x00\x10\x00\x00\x00", 9},
+    {"\xFB\x41\x70\x00\x00\x10\x00\x00\x00", 9}, {"\xFB\x41\x70\x00\x00\x10\x00\x00\x00", 9}},
+
+   /* 3.4028234663852886E+38 -- largest possible single normal */
+   {3.4028234663852886E+38,                      3.40282347E+38f,
+    {"\xFA\x7F\x7F\xFF\xFF", 5},                 {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x00", 9},
+    {"\xFA\x7F\x7F\xFF\xFF", 5},                 {"\xFA\x7F\x7F\xFF\xFF", 5}},
+
+   /* 3.402823466385289E+38 -- slightly larger than largest possible single */
+   {3.402823466385289E+38,                       0.0f,
+    {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x01", 9}, {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x01", 9},
+    {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x01", 9}, {"\xFB\x47\xEF\xFF\xFF\xE0\x00\x00\x01", 9}},
+
+   /* 3.402823669209385e+38 -- exponent larger by one than largest possible single */
+   {3.402823669209385e+38,                       0.0f,
+    {"\xFB\x47\xF0\x00\x00\x00\x00\x00\x00", 9}, {"\xFB\x47\xF0\x00\x00\x00\x00\x00\x00", 9},
+    {"\xFB\x47\xF0\x00\x00\x00\x00\x00\x00", 9}, {"\xFB\x47\xF0\x00\x00\x00\x00\x00\x00", 9}},
+
+   /* 5.0e-324 -- smallest double subnormal normal */
+   {5.0e-324,                                    0.0f,
+    {"\xFB\x00\x00\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x00\x00\x00\x00\x00\x00\x00\x01", 9},
+    {"\xFB\x00\x00\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x00\x00\x00\x00\x00\x00\x00\x01", 9}},
+
+   /* 2.2250738585072009E−308 -- largest double subnormal */
+   {2.2250738585072009e-308,                     0.0f,
+    {"\xFB\x00\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x00\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9},
+    {"\xFB\x00\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x00\x0F\xFF\xFF\xFF\xFF\xFF\xFF", 9}},
+
+   /* 2.2250738585072014e-308 -- smallest double normal */
+   {2.2250738585072014e-308,                     0.0f,
+    {"\xFB\x00\x10\x00\x00\x00\x00\x00\x00", 9}, {"\xFB\x00\x10\x00\x00\x00\x00\x00\x00", 9},
+    {"\xFB\x00\x10\x00\x00\x00\x00\x00\x00", 9}, {"\xFB\x00\x10\x00\x00\x00\x00\x00\x00", 9}},
+
+   /* 1.7976931348623157E308 -- largest double normal */
+   {1.7976931348623157e308,                      0.0f,
+    {"\xFB\x7F\xEF\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x7F\xEF\xFF\xFF\xFF\xFF\xFF\xFF", 9},
+    {"\xFB\x7F\xEF\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x7F\xEF\xFF\xFF\xFF\xFF\xFF\xFF", 9}},
+
+   /* List terminator */
+   {0.0, 0.0f, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0} }
 };
 
 
-inline static bool CheckDouble(double d, uint64_t u)
+struct NaNTestCase {
+   uint64_t    uDouble;
+   uint32_t    uSingle;
+   UsefulBufC  Preferred;
+   UsefulBufC  NotPreferred;
+   UsefulBufC  CDE;
+   UsefulBufC  DCBOR;
+};
+
+/* Always four lines per test case so shell scripts can process into
+ * other formats. CDE and DCBOR standards are not complete yet,
+ * encodings are a guess. C string literals are used because they
+ * are the shortest notation. They are used __with a length__ . Null
+ * termination doesn't work because there are zero bytes.
+ */
+static const struct NaNTestCase NaNTestCases[] =  {
+
+   /* Payload with most significant bit set, a qNaN by most implementations */
+   {0x7ff8000000000000,                          0x00000000,
+    {"\xF9\x7E\x00", 3},                         {"\xFB\x7F\xF8\x00\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x7E\x00", 3},                         {"\xF9\x7E\x00", 3}},
+
+   /* Payload with single rightmost set */
+   {0x7ff8000000000001,                          0x00000000,
+    {"\xFB\x7F\xF8\x00\x00\x00\x00\x00\x01", 9}, {"\xFB\x7F\xF8\x00\x00\x00\x00\x00\x01", 9},
+    {"\xF9\x7E\x00", 3},                         {"\xF9\x7E\x00", 3}},
+
+   /* Payload with 10 leftmost bits set -- converts to half */
+   {0x7ffffc0000000000,                          0x00000000,
+    {"\xF9\x7F\xFF", 3},                         {"\xFB\x7F\xFF\xFC\x00\x00\x00\x00\x00", 9},
+    {"\xF9\x7E\x00", 3},                         {"\xF9\x7E\x00", 3}},
+
+   /* Payload with 10 rightmost bits set -- cannot convert to half */
+   {0x7ff80000000003ff,                          0x00000000,
+    {"\xFB\x7F\xF8\x00\x00\x00\x00\x03\xFF", 9}, {"\xFB\x7F\xF8\x00\x00\x00\x00\x03\xFF", 9},
+    {"\xF9\x7E\x00", 3},                         {"\xF9\x7E\x00", 3}},
+
+   /* Payload with 23 leftmost bits set -- converts to a single */
+   {0x7ffFFFFFE0000000,                          0x7fffffff,
+    {"\xFA\x7F\xFF\xFF\xFF", 5},                 {"\xFB\x7F\xFF\xFF\xFF\xE0\x00\x00\x00", 9},
+    {"\xF9\x7E\x00", 3},                         {"\xF9\x7E\x00", 3}},
+
+   /* Payload with 24 leftmost bits set -- fails to convert to a single */
+   {0x7ffFFFFFF0000000,                          0x00000000,
+    {"\xFB\x7F\xFF\xFF\xFF\xF0\x00\x00\x00", 9}, {"\xFB\x7F\xFF\xFF\xFF\xF0\x00\x00\x00", 9},
+    {"\xF9\x7E\x00", 3},                         {"\xF9\x7E\x00", 3}},
+
+   /* Payload with all bits set */
+   {0x7fffffffffffffff,                          0x00000000,
+    {"\xFB\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 9}, {"\xFB\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 9},
+    {"\xF9\x7E\x00", 3},                         {"\xFB\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 9}},
+
+   /* List terminator */
+   {0, 0, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0} }
+};
+
+
+
+/* Public function. See float_tests.h
+ *
+ * This is the main test of floating-point encoding / decoding. It is
+ * data-driven by the above tables. It works better than tests below that
+ * it mostly replaces because it tests one number at a time, rather than
+ * putting them all in a map. It is much easier to debug test failures
+ * and to add new tests. */
+int32_t
+FloatValuesTests(void)
 {
-   return UsefulBufUtil_CopyDoubleToUint64(d) != u;
-}
+   unsigned int                 uTestIndex;
+   const struct DoubleTestCase *pTestCase;
+   const struct NaNTestCase    *pNaNTestCase;
+   MakeUsefulBufOnStack(        TestOutBuffer, 20);
+   UsefulBufC                   TestOutput;
+   QCBOREncodeContext           EnCtx;
+   QCBORError                   uErr;
+   QCBORDecodeContext           DCtx;
+   QCBORItem                    Item;
+   uint64_t                     uDecoded;
+#ifdef QCBOR_DISABLE_FLOAT_HW_USE
+   uint32_t                     uDecoded2;
+#endif
 
+   /* Test a variety of doubles */
+   for(uTestIndex = 0; DoubleTestCases[uTestIndex].Preferred.len != 0; uTestIndex++) {
+      pTestCase = &DoubleTestCases[uTestIndex];
 
-int32_t HalfPrecisionDecodeBasicTests(void)
-{
-   UsefulBufC HalfPrecision = UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedHalf);
+     // if(pTestCase->dNumber == 1.1754943508222874E-38) {
+         if(uTestIndex == 19) {
+         uErr = 99; /* For setting break points for particular tests */
+      }
 
-   QCBORDecodeContext DC;
-   QCBORDecode_Init(&DC, HalfPrecision, 0);
+      /* Number Encode of Preferred */
+      QCBOREncode_Init(&EnCtx, TestOutBuffer);
+      QCBOREncode_AddDouble(&EnCtx, pTestCase->dNumber);
+      uErr = QCBOREncode_Finish(&EnCtx, &TestOutput);
 
-   QCBORItem Item;
+      if(uErr != QCBOR_SUCCESS) {
+         return MakeTestResultCode(uTestIndex, 1, uErr);;
+      }
+      if(UsefulBuf_Compare(TestOutput, pTestCase->Preferred)) {
+         return MakeTestResultCode(uTestIndex, 1, 200);
+      }
 
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_MAP) {
-      return -1;
-   }
+      /* Number Encode of Not Preferred */
+      QCBOREncode_Init(&EnCtx, TestOutBuffer);
+      QCBOREncode_AddDoubleNoPreferred(&EnCtx, pTestCase->dNumber);
+      uErr = QCBOREncode_Finish(&EnCtx, &TestOutput);
 
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.0) {
-      return -2;
-   }
+      if(uErr != QCBOR_SUCCESS) {
+         return MakeTestResultCode(uTestIndex, 2, uErr);;
+      }
+      if(UsefulBuf_Compare(TestOutput, pTestCase->NotPreferred)) {
+         return MakeTestResultCode(uTestIndex, 2, 200);
+      }
 
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != INFINITY) {
-      return -3;
-   }
-
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != -INFINITY) {
-      return -4;
-   }
-
-   // TODO: NAN-related is this really converting right? It is carrying
-   // payload, but this confuses things.
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || !isnan(Item.val.dfnum)) {
-      return -5;
-   }
-
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 1.0) {
-      return -6;
-   }
-
-   // Approximately 1/3
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.333251953125) {
-      return -7;
-   }
-
-   // Largest half-precision
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 65504.0) {
-      return -8;
-   }
-
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != INFINITY) {
-      return -9;
-   }
-
-   // Smallest half-precision subnormal
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.00000005960464477539063) {
-      return -10;
-   }
-
-   // Largest half-precision subnormal
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.00006097555160522461) {
-      return -11;
-   }
-
-   // Smallest half-precision normal
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.00006103515625) {
-      return -12;
-   }
-
-   // half-precision zero
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != 0.0) {
-      return -13;
-   }
-
-   // negative 2
-   QCBORDecode_GetNext(&DC, &Item);
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE || Item.val.dfnum != -2.0) {
-      return -14;
-   }
-
-   // TODO: NAN-related double check these four tests
-   QCBORDecode_GetNext(&DC, &Item); // qNaN
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE ||
-      CheckDouble(Item.val.dfnum, 0x7ff8000000000000ULL)) {
-      return -15;
-   }
-   QCBORDecode_GetNext(&DC, &Item); // sNaN
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE ||
-      CheckDouble(Item.val.dfnum, 0x7ff0000000000001ULL)) {
-      return -16;
-   }
-   QCBORDecode_GetNext(&DC, &Item); // qNaN with payload 0x0f
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE ||
-      CheckDouble(Item.val.dfnum, 0x7ff800000000000fULL)) {
-      return -17;
-   }
-   QCBORDecode_GetNext(&DC, &Item); // sNaN with payload 0x0f
-   if(Item.uDataType != QCBOR_TYPE_DOUBLE ||
-      CheckDouble(Item.val.dfnum, 0x7ff000000000000fULL)) {
-      return -18;
-   }
-
-   if(QCBORDecode_Finish(&DC)) {
-      return -19;
-   }
-
-   return 0;
-}
-
-
-
-
-int32_t HalfPrecisionAgainstRFCCodeTest(void)
-{
-    for(uint32_t uHalfP = 0; uHalfP < 0xffff; uHalfP += 60) {
-        unsigned char x[2];
-        x[1] = (uint8_t)(uHalfP & 0xff);
-        x[0] = (uint8_t)(uHalfP >> 8); // uHalfP is always less than 0xffff
-        double d = decode_half(x);
-
-        // Contruct the CBOR for the half-precision float by hand
-        UsefulBuf_MAKE_STACK_UB(__xx, 3);
-        UsefulOutBuf UOB;
-        UsefulOutBuf_Init(&UOB, __xx);
-
-        const uint8_t uHalfPrecInitialByte = (uint8_t)(HALF_PREC_FLOAT + (CBOR_MAJOR_TYPE_SIMPLE << 5)); // 0xf9
-        UsefulOutBuf_AppendByte(&UOB, uHalfPrecInitialByte); // The initial byte for a half-precision float
-        UsefulOutBuf_AppendUint16(&UOB, (uint16_t)uHalfP);
-
-        // Now parse the hand-constructed CBOR. This will invoke the
-        // conversion to a float
-        QCBORDecodeContext DC;
-        QCBORDecode_Init(&DC, UsefulOutBuf_OutUBuf(&UOB), 0);
-
-        QCBORItem Item;
-
-        QCBORDecode_GetNext(&DC, &Item);
-        if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
-            return -1;
-        }
-
-        //printf("%04x  QCBOR:%15.15f  RFC: %15.15f (%8x)\n",
-        //       uHalfP, Item.val.fnum, d , UsefulBufUtil_CopyFloatToUint32(d));
-
-        if(isnan(d)) {
-            // The RFC code uses the native instructions which may or may not
-            // handle sNaN, qNaN and NaN payloads correctly. This test just
-            // makes sure it is a NaN and doesn't worry about the type of NaN
+      /* Number Decode of Preferred */
+      QCBORDecode_Init(&DCtx, pTestCase->Preferred, 0);
+      uErr = QCBORDecode_GetNext(&DCtx, &Item);
+      if(uErr != QCBOR_SUCCESS) {
+         return MakeTestResultCode(uTestIndex, 3, uErr);;
+      }
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
+      if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+         return MakeTestResultCode(uTestIndex, 4, 0);
+      }
+      if(isnan(pTestCase->dNumber)) {
+         if(!isnan(Item.val.dfnum)) {
+            return MakeTestResultCode(uTestIndex, 5, 0);
+         }
+      } else {
+         if(Item.val.dfnum != pTestCase->dNumber) {
+            return MakeTestResultCode(uTestIndex, 6, 0);
+         }
+      }
+#else /* QCBOR_DISABLE_FLOAT_HW_USE */
+      /* When QCBOR_DISABLE_FLOAT_HW_USE is set, single-precision is not
+       * converted to double when decoding, so test differently. len == 5
+       * indicates single-precision in the encoded CBOR. */
+      if(pTestCase->Preferred.len == 5) {
+         if(Item.uDataType != QCBOR_TYPE_FLOAT) {
+            return MakeTestResultCode(uTestIndex, 4, 0);
+         }
+         if(isnan(pTestCase->dNumber)) {
+            if(!isnan(Item.val.fnum)) {
+               return MakeTestResultCode(uTestIndex, 5, 0);
+            }
+         } else {
+            if(Item.val.fnum != pTestCase->fNumber) {
+               return MakeTestResultCode(uTestIndex, 6, 0);
+            }
+         }
+      } else {
+         if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+            return MakeTestResultCode(uTestIndex, 4, 0);
+         }
+         if(isnan(pTestCase->dNumber)) {
             if(!isnan(Item.val.dfnum)) {
-                return -3;
+               return MakeTestResultCode(uTestIndex, 5, 0);
             }
-        } else {
-            if(Item.val.dfnum != d) {
-                return -2;
+         } else {
+            if(Item.val.dfnum != pTestCase->dNumber) {
+               return MakeTestResultCode(uTestIndex, 6, 0);
             }
-        }
-    }
-    return 0;
-}
+         }
+      }
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
 
+      /* Number Decode of Not Preferred */
+      QCBORDecode_Init(&DCtx, pTestCase->NotPreferred, 0);
+      uErr = QCBORDecode_GetNext(&DCtx, &Item);
+      if(uErr != QCBOR_SUCCESS) {
+         return MakeTestResultCode(uTestIndex, 7, uErr);;
+      }
+      if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+         return MakeTestResultCode(uTestIndex, 8, 0);
+      }
+      if(isnan(pTestCase->dNumber)) {
+         if(!isnan(Item.val.dfnum)) {
+            return MakeTestResultCode(uTestIndex, 9, 0);
+         }
+      } else {
+         if(Item.val.dfnum != pTestCase->dNumber) {
+            return MakeTestResultCode(uTestIndex, 10, 0);
+         }
+      }
 
-/*
- Expected output from preferred serialization of some of floating-point numbers
-{"zero": 0.0,
- "negative zero": -0.0,
- "infinitity": Infinity,
- "negative infinitity": -Infinity,
- "NaN": NaN,
- "one": 1.0,
- "one third": 0.333251953125,
- "largest half-precision": 65504.0,
- "largest half-precision point one": 65504.1,
- "too-large half-precision": 65536.0,
- "smallest half subnormal": 5.960464477539063e-8,
- "smallest half normal": 0.00006103515625,
- "smallest half normal plus": 0.00006103515625000001,
- "smallest normal minus": 0.000030517578125,
- "largest single": 3.4028234663852886e+38,
- "largest single plus": 6.805646932770577e+38,
- "smallest single": 1.1754943508222875e-38,
- "smallest single plus": 1.1754943508222878e-38,
- "smallest single minus": 1.1754943508222874e-38,
- "smallest single minus more": 5.877471754111438e-39,
- 3: -2.0, "single precision": 16777216.0,
- "single with precision loss": 16777217.0,
- 1: "fin"}
- */
-static const uint8_t spExpectedSmallest[] = {
-   0xB8, 0x1A,
-      0x64, 0x7A, 0x65, 0x72, 0x6F,
-      0xF9, 0x00, 0x00,
-
-      0x6D, 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x7A,
-         0x65, 0x72, 0x6F,
-      0xF9, 0x80, 0x00,
-
-      0x6A, 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
-      0xF9, 0x7C, 0x00,
-
-      0x73, 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x69,
-         0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
-      0xF9, 0xFC, 0x00,
-
-      0x63, 0x4E, 0x61, 0x4E,
-      0xF9, 0x7E, 0x00,
-
-      0x63, 0x6F, 0x6E, 0x65,
-      0xF9, 0x3C, 0x00,
-
-      0x69, 0x6F, 0x6E, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64,
-      0xF9, 0x35, 0x55,
-
-      0x76, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61,
-         0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69,
-         0x6F, 0x6E,
-      0xF9, 0x7B, 0xFF,
-
-      0x78, 0x20, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68,
-         0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73,
-         0x69, 0x6F, 0x6E, 0x20, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x20,
-         0x6F, 0x6E, 0x65,
-      0xFB, 0x40, 0xEF, 0xFC, 0x03, 0x33, 0x33, 0x33, 0x33,
-
-      0x78, 0x18, 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65,
-         0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63,
-         0x69, 0x73, 0x69, 0x6F, 0x6E,
-      0xFA, 0x47, 0x80, 0x00, 0x00,
-
-      0x77, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74,
-         0x20, 0x68, 0x61, 0x6C, 0x66, 0x20, 0x73, 0x75, 0x62, 0x6E,
-         0x6F, 0x72, 0x6D, 0x61, 0x6C,
-      0xFA, 0x33, 0x80, 0x00, 0x00,
-
-      0x74, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x68,
-         0x61, 0x6C, 0x66, 0x20, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
-      0xF9, 0x04, 0x00,
-
-      0x78, 0x19, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20,
-         0x68, 0x61, 0x6C, 0x66, 0x20, 0x6E, 0x6F, 0x72, 0x6D, 0x61,
-         0x6C, 0x20, 0x70, 0x6C, 0x75, 0x73,
-      0xFB, 0x3F, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-
-      0x75, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E,
-         0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x6D, 0x69, 0x6E,
-         0x75, 0x73,
-      0xFB, 0x3F, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-
-      0x75, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E,
-         0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x6D, 0x69, 0x6E, 0x75,
-         0x73,
-      0xFA, 0x38, 0x00, 0x00, 0x00,
-
-      0x6E, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x69,
-         0x6E, 0x67, 0x6C, 0x65,
-      0xFA, 0x7F, 0x7F, 0xFF, 0xFF,
-
-      0x73, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x69,
-         0x6E,0x67, 0x6C, 0x65, 0x20, 0x70, 0x6C, 0x75, 0x73,
-      0xFB, 0x47, 0xEF, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x01,
-
-      0x73, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x69,
-         0x6E, 0x67, 0x6C, 0x65, 0x20, 0x70, 0x6C, 0x75, 0x73,
-      0xFB, 0x47, 0xFF, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x00,
-
-      0x6F, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73,
-         0x69, 0x6E, 0x67, 0x6C, 0x65,
-      0xFA, 0x00, 0x80, 0x00, 0x00,
-
-      0x74, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73,
-         0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x70, 0x6C, 0x75, 0x73,
-      0xFB, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-
-      0x75, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73,
-         0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x6D, 0x69, 0x6E, 0x75,
-         0x73,
-      0xFB, 0x38, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-
-      0x78, 0x1A, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20,
-         0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x6D, 0x69, 0x6E,
-         0x75, 0x73, 0x20, 0x6D, 0x6F, 0x72, 0x65,
-      0xFB, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
-      0x03,
-      0xF9, 0xC0, 0x00,
-
-      0x70, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x70, 0x72, 0x65,
-         0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
-      0xFA, 0x4B, 0x80, 0x00, 0x00,
-
-      0x78, 0x1A, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x77, 0x69,
-         0x74, 0x68, 0x20, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69,
-         0x6F, 0x6E, 0x20, 0x6C, 0x6F, 0x73, 0x73,
-      0xFB, 0x41, 0x70, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-
-      0x01,
-      0x63, 0x66, 0x69, 0x6E
-};
-
-
-/*
- Makes a double from a uint64_t by copying the bits, not
- by converting the value.
- */
-#define MAKE_DOUBLE(x) UsefulBufUtil_CopyUint64ToDouble(x)
-
-
-int32_t DoubleAsSmallestTest(void)
-{
-   UsefulBuf_MAKE_STACK_UB(EncodedHalfsMem, sizeof(spExpectedSmallest));
-
-   QCBOREncodeContext EC;
-   QCBOREncode_Init(&EC, EncodedHalfsMem);
-   QCBOREncode_OpenMap(&EC);
-
-   // Many of these are from
-   // https://en.wikipedia.org/wiki/Half-precision_floating-point_format
-   // and
-   // https://en.wikipedia.org/wiki/Single-precision_floating-point_format
-
-   // F9 0000                              # primitive(0)
-   QCBOREncode_AddDoubleToMap(&EC, "zero", 0.00);
-
-   // F9 8000                              # primitive(0)
-   QCBOREncode_AddDoubleToMap(&EC, "negative zero", -0.00);
-
-   // F9 7C00                              # primitive(31744)
-   QCBOREncode_AddDoubleToMap(&EC, "infinitity", INFINITY);
-
-   // F9 FC00                              # primitive(64512)
-   QCBOREncode_AddDoubleToMap(&EC, "negative infinitity", -INFINITY);
-
-   // F9 7E00                              # primitive(32256)
-   QCBOREncode_AddDoubleToMap(&EC, "NaN", NAN);
-
-   // TODO: test a few NaN variants
-
-   // F9 3C00                              # primitive(15360)
-   QCBOREncode_AddDoubleToMap(&EC, "one", 1.0);
-
-   // F9 3555                              # primitive(13653)
-   QCBOREncode_AddDoubleToMap(&EC, "one third", 0.333251953125);
-
-   // 65504.0, converts to the large possible half-precision.
-   // 0xF9, 0x7B, 0xFF,
-   QCBOREncode_AddDoubleToMap(&EC, "largest half-precision", 65504.0);
-
-   // 65504.1, the double that has both to large an exponent and too
-   // much precision, so no conversion.
-   // 0xFB, 0x40, 0xEF, 0xFC, 0x03, 0x33, 0x33, 0x33, 0x33,
-   QCBOREncode_AddDoubleToMap(&EC, "largest half-precision point one", 65504.1);
-
-   // 65536.0 has an exponent of 16, which is larger than 15, the
-   // largest half-precision exponent. It is the exponent, not
-   // precision loss that prevents conversion to half. It does convert
-   // to single precision.
-   // 0xFA, 0x47, 0x80, 0x00, 0x00,
-   QCBOREncode_AddDoubleToMap(&EC, "too-large half-precision", 65536.0);
-
-   // 5.9604644775390625E-8, the smallest possible half-precision
-   // subnormal, digitis are lost converting to half, but not
-   // when converting to a single
-   // 0xFA, 0x33, 0x80, 0x00, 0x00,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest half subnormal",
-                              MAKE_DOUBLE(0x3e70000000000000));
-
-   // 0.00006103515625, the double value that converts to the smallest
-   // possible half-precision normal.  which is what should appear in
-   // the output.
-   // 0xF9, 0x04, 0x00,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest half normal",
-                              MAKE_DOUBLE(0x3f10000000000000));
-
-   // 0.000061035156250000014 ,the double value that is a tiny bit
-   // greater than smallest possible half-precision normal. It will be
-   // output as a double because converting it will reduce precision.
-   // 0xFB, 0x3F, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest half normal plus",
-                              MAKE_DOUBLE(0x3f10000000000001));
-
-   // 0.000061035156249999993, the double value that is a tiny bit
-   // smaller than the smallest half-precision normal. This will fail
-   // to convert to a half-precision because both the exponent is too
-   // small and the precision is too large for a half-precision.
-   // 0xFB, 0x3F, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest normal minus",
-                              MAKE_DOUBLE(0x3f0fffffffffffff));
-
-   // 0.000030517578125, the double value that is too small to fit
-   // into a half-precision because the exponent won't fit, not
-   // because precision would be lost. (This would fit into a
-   // half-precision subnormal, but there is no converstion to
-   // that). This ends up encoded as a single-precision.
-   // 0xFA, 0x38, 0x00, 0x00, 0x00,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest normal minus",
-                              MAKE_DOUBLE(0x3f00000000000000));
-
-   // 3.4028234664e38, the value that converts to the largest possible
-   // single-precision.
-   // 0xFA, 0x7F, 0x7F, 0xFF, 0xFF,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "largest single",
-                              MAKE_DOUBLE(0x47efffffe0000000));
-
-   // 3.402823466385289E38, sightly larger than the largest possible
-   // possible precision.  Conversion fails because precision would be
-   // lost.
-   // 0xFB, 0x47, 0xEF, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x01,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "largest single plus",
-                              MAKE_DOUBLE(0x47efffffe0000001));
-
-   // 6.8056469327705772E38, slightly more larger than the largers
-   // possible single precision.  Conversion fails because exponent is
-   // too large.
-   // 0xFB, 0x47, 0xFF, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x00,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "largest single plus",
-                              MAKE_DOUBLE(0x47ffffffe0000000));
-
-   // 1.1754943508222875E-38, The double value that converts to the
-   // smallest possible single-precision normal
-   // 0xFA, 0x00, 0x80, 0x00, 0x00,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest single",
-                              MAKE_DOUBLE(0x3810000000000000));
-
-   // 1.1754943508222878E-38, double value that is slightly larger
-   // than the smallest single-precision normal. Conversion fails
-   // because of precision
-   // 0xFB, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest single plus",
-                              MAKE_DOUBLE(0x3810000000000001));
-
-   // 1.1754943508222874E-38, slightly smaller than the smallest
-   // single-precision normal.  Conversion fails because of precision
-   // 0xFB, 0x38, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest single minus",
-                              MAKE_DOUBLE(0x380fffffffffffff));
-
-   // 5.8774717541114375E-39, slightly smaller than the smallest
-   // single-precision normal.  Conversion fails because the exponent
-   // is too small.
-   // 0xFB, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-   QCBOREncode_AddDoubleToMap(&EC,
-                              "smallest single minus more",
-                              MAKE_DOUBLE(0x3800000000000000));
-
-   // Just -2, which converts to a negative half-precision
-   // F9 C000                              # primitive(49152)
-   QCBOREncode_AddDoubleToMapN(&EC, 3, -2.0);
-
-   // 16777216, No precision loss converting to single
-   // FA 4B800000                          # primitive(1266679808)
-   QCBOREncode_AddDoubleToMap(&EC, "single precision", 16777216);
-
-   // 16777217, One more than above. Too much precision for a single
-   // so no conversion.
-   // 0xFB, 0x41, 0x70, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
-   QCBOREncode_AddDoubleToMap(&EC, "single with precision loss", 16777217);
-
-   // Just a convenient marker when cutting and pasting encoded CBOR
-   QCBOREncode_AddSZStringToMapN(&EC, 1, "fin");
-
-   QCBOREncode_CloseMap(&EC);
-
-   UsefulBufC EncodedHalfs;
-   QCBORError uErr = QCBOREncode_Finish(&EC, &EncodedHalfs);
-   if(uErr) {
-      return -1;
    }
 
-   if(UsefulBuf_Compare(EncodedHalfs, UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedSmallest))) {
-      return -3;
+   /* Test a variety of NaNs with payloads */
+   for(uTestIndex = 0; NaNTestCases[uTestIndex].Preferred.len != 0; uTestIndex++) {
+      pNaNTestCase = &NaNTestCases[uTestIndex];
+
+
+      if(uTestIndex == 4) {
+         uErr = 99; /* For setting break points for particular tests */
+      }
+
+      /* NaN Encode of Preferred */
+      QCBOREncode_Init(&EnCtx, TestOutBuffer);
+      QCBOREncode_AddDouble(&EnCtx, UsefulBufUtil_CopyUint64ToDouble(pNaNTestCase->uDouble));
+      uErr = QCBOREncode_Finish(&EnCtx, &TestOutput);
+      if(uErr != QCBOR_SUCCESS) {
+         return MakeTestResultCode(uTestIndex+100, 10, uErr);;
+      }
+      if(UsefulBuf_Compare(TestOutput, pNaNTestCase->Preferred)) {
+         return MakeTestResultCode(uTestIndex+100, 10, 200);
+      }
+
+#ifdef QCBOR_COMPARE_TO_HW_NAN_CONVERSION
+      {
+         /* This test is off by default. It's purpose is to check
+          * QCBOR's mask-n-shift implementation against the HW/CPU
+          * instructions that do conversion between double and single.
+          * It is off because it is only used on occasion to verify
+          * QCBOR and because it is suspected that some HW/CPU does
+          * implement this correctly. NaN payloads are an obscure
+          * feature. */
+         float f;
+         double d, d2;
+
+         d = UsefulBufUtil_CopyUint64ToDouble(pNaNTestCase->uNumber);
+
+         /* Cast the double to a single and then back to a double and
+          * see if they are equal. If so, then the NaN payload doesn't
+          * have any bits that are lost when converting to single and
+          * it can be safely converted.
+          *
+          * This test can't be done for half-precision because it is
+          * not widely supported.
+          */
+         f = (float)d;
+         d2 = (double)f;
+
+         /* The length of encoded doubles is 9, singles 5 and halves
+          * 3. If there are NaN payload bits that can't be converted,
+          * then the length must be 9.
+          */
+         if((uint64_t)d != (uint64_t)d2 && pNaNTestCase->Preferred.len != 9) {
+            /* QCBOR conversion not the same as HW conversion */
+            return MakeTestResultCode(uTestIndex, 9, 200);
+         }
+      }
+#endif /* QCBOR_COMPARE_TO_HW_NAN_CONVERSION */
+
+
+      /* NaN Encode of Not Preferred */
+      QCBOREncode_Init(&EnCtx, TestOutBuffer);
+      QCBOREncode_AddDoubleNoPreferred(&EnCtx, UsefulBufUtil_CopyUint64ToDouble(pNaNTestCase->uDouble));
+      uErr = QCBOREncode_Finish(&EnCtx, &TestOutput);
+      if(uErr != QCBOR_SUCCESS) {
+         return MakeTestResultCode(uTestIndex+100, 11, uErr);;
+      }
+      if(UsefulBuf_Compare(TestOutput, pNaNTestCase->NotPreferred)) {
+         return MakeTestResultCode(uTestIndex+100, 11, 200);
+      }
+
+      /* NaN Decode of Preferred */
+      QCBORDecode_Init(&DCtx, pNaNTestCase->Preferred, 0);
+      uErr = QCBORDecode_GetNext(&DCtx, &Item);
+      if(uErr != QCBOR_SUCCESS) {
+         return MakeTestResultCode(uTestIndex+100, 12, uErr);
+      }
+
+#ifndef QCBOR_DISABLE_FLOAT_HW_USE
+
+      uDecoded = UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum);
+      if(uDecoded != pNaNTestCase->uDouble) {
+         return MakeTestResultCode(uTestIndex+100, 12, 200);
+      }
+#else /* QCBOR_DISABLE_FLOAT_HW_USE */
+      if(pNaNTestCase->Preferred.len == 5) {
+         if(Item.uDataType != QCBOR_TYPE_FLOAT) {
+            return MakeTestResultCode(uTestIndex, 4, 0);
+         }
+
+         uDecoded2 = UsefulBufUtil_CopyFloatToUint32(Item.val.fnum);
+
+         if(uDecoded2 != pNaNTestCase->uSingle) {
+            return MakeTestResultCode(uTestIndex, 4, 0);
+         }
+      } else {
+         if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+            return MakeTestResultCode(uTestIndex, 4, 0);
+         }
+         uDecoded = UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum);
+         if(uDecoded != pNaNTestCase->uDouble) {
+            return MakeTestResultCode(uTestIndex+100, 12, 200);
+         }
+      }
+#endif /* QCBOR_DISABLE_FLOAT_HW_USE */
+
+      /* NaN Decode of Not Preferred */
+      QCBORDecode_Init(&DCtx, pNaNTestCase->NotPreferred, 0);
+      uErr = QCBORDecode_GetNext(&DCtx, &Item);
+      if(uErr != QCBOR_SUCCESS) {
+         return MakeTestResultCode(uTestIndex+100, 13, uErr);
+      }
+      uDecoded = UsefulBufUtil_CopyDoubleToUint64(Item.val.dfnum);
+      if(uDecoded != pNaNTestCase->uDouble) {
+         return MakeTestResultCode(uTestIndex+100, 13, 200);
+      }
    }
 
    return 0;
 }
+
+
+
+/* Public function. See float_tests.h */
+int32_t 
+HalfPrecisionAgainstRFCCodeTest(void)
+{
+   QCBORItem          Item;
+   QCBORDecodeContext DC;
+   unsigned char      pbHalfBytes[2];
+   uint8_t            uHalfPrecInitialByte;
+   double             d;
+   UsefulBuf_MAKE_STACK_UB(EncodedBytes, 3);
+   UsefulOutBuf      UOB;
+   uint32_t          uHalfP;
+
+
+   for(uHalfP = 0; uHalfP < 0xffff; uHalfP += 60) {
+      pbHalfBytes[1] = (uint8_t)(uHalfP & 0xff);
+      pbHalfBytes[0] = (uint8_t)(uHalfP >> 8); /* uHalfP is always less than 0xffff */
+      d = decode_half(pbHalfBytes);
+
+      /* Construct the CBOR for the half-precision float by hand */
+      UsefulOutBuf_Init(&UOB, EncodedBytes);
+
+      uHalfPrecInitialByte = (uint8_t)(HALF_PREC_FLOAT + (CBOR_MAJOR_TYPE_SIMPLE << 5)); /* 0xf9 */
+      UsefulOutBuf_AppendByte(&UOB, uHalfPrecInitialByte); /* initial byte */
+      UsefulOutBuf_AppendUint16(&UOB, (uint16_t)uHalfP);   /* argument */
+
+      /* Now parse the hand-constructed CBOR. This will invoke the
+       * conversion to a float
+       */
+      QCBORDecode_Init(&DC, UsefulOutBuf_OutUBuf(&UOB), 0);
+      QCBORDecode_GetNext(&DC, &Item);
+      if(Item.uDataType != QCBOR_TYPE_DOUBLE) {
+         return -1;
+      }
+
+      if(isnan(d)) {
+         /* The RFC code uses the native instructions which may or may not
+          * handle sNaN, qNaN and NaN payloads correctly. This test just
+          * makes sure it is a NaN and doesn't worry about the type of NaN
+          */
+         if(!isnan(Item.val.dfnum)) {
+            return -3;
+         }
+      } else {
+         if(Item.val.dfnum != d) {
+            return -2;
+         }
+      }
+   }
+   return 0;
+}
+
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
 
 
@@ -700,17 +683,27 @@
          0x18, 0x6A,
           0xFA, 0x00, 0x00, 0x00, 0x00};
 
-int32_t GeneralFloatEncodeTests(void)
+
+/* Public function. See float_tests.h */
+int32_t
+GeneralFloatEncodeTests(void)
 {
+   /* See FloatNumberTests() for tests that really cover lots of float values.
+    * Add new tests for new values or decode modes there. 
+    * This test is primarily to cover all the float encode methods. */
+
+   UsefulBufC Encoded;
    UsefulBufC ExpectedFloats;
+   QCBORError uErr;
+
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
    UsefulBuf_MAKE_STACK_UB(OutBuffer, sizeof(spExpectedFloats));
    ExpectedFloats = UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedFloats);
-   (void)spExpectedFloatsNoHalf; // Avoid unused variable error
+   (void)spExpectedFloatsNoHalf; /* Avoid unused variable error */
 #else
    UsefulBuf_MAKE_STACK_UB(OutBuffer, sizeof(spExpectedFloatsNoHalf));
    ExpectedFloats = UsefulBuf_FROM_BYTE_ARRAY_LITERAL(spExpectedFloatsNoHalf);
-   (void)spExpectedFloats; // Avoid unused variable error
+   (void)spExpectedFloats; /* Avoid unused variable error */
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
 
    QCBOREncodeContext EC;
@@ -744,8 +737,7 @@
    QCBOREncode_CloseMap(&EC);
    QCBOREncode_CloseArray(&EC);
 
-   UsefulBufC Encoded;
-   QCBORError uErr = QCBOREncode_Finish(&EC, &Encoded);
+   uErr = QCBOREncode_Finish(&EC, &Encoded);
    if(uErr) {
       return -1;
    }
@@ -757,25 +749,15 @@
    return 0;
 }
 
-
-/* returns 0 if equivalent, non-zero if not equivalent */
-static int CHECK_EXPECTED_DOUBLE(double val, double expected)
-{
-   double diff = val - expected;
-
-   diff = fabs(diff);
-
-   if(diff > 0.000001) {
-      return 1;
-   } else {
-      return 0;
-   }
-}
 #endif /* USEFULBUF_DISABLE_ALL_FLOAT */
 
 
-int32_t GeneralFloatDecodeTests(void)
+/* Public function. See float_tests.h */
+int32_t 
+GeneralFloatDecodeTests(void)
 {
+   /* See FloatNumberTests() for tests that really cover lots of float values */
+
    QCBORItem          Item;
    QCBORError         uErr;
    QCBORDecodeContext DC;
@@ -872,10 +854,10 @@
 #ifndef USEFULBUF_DISABLE_ALL_FLOAT
 #ifndef QCBOR_DISABLE_FLOAT_HW_USE
       || Item.uDataType != QCBOR_TYPE_DOUBLE
-      || CHECK_EXPECTED_DOUBLE(3.14, Item.val.dfnum)
+      || 3.1400001049041748 != Item.val.dfnum
 #else /* QCBOR_DISABLE_FLOAT_HW_USE */
       || Item.uDataType != QCBOR_TYPE_FLOAT
-      || CHECK_EXPECTED_DOUBLE(3.14, Item.val.fnum)
+      || 3.140000f != Item.val.fnum
 #endif /* QCBOR_DISABLE_FLOAT_HW_USE */
 #else /* USEFULBUF_DISABLE_ALL_FLOAT */
       || Item.uDataType != QCBOR_TYPE_NONE
@@ -893,7 +875,7 @@
       || Item.val.dfnum != 0.0
 #else /* QCBOR_DISABLE_FLOAT_HW_USE */
       || Item.uDataType != QCBOR_TYPE_FLOAT
-      || Item.val.fnum != 0.0
+      || Item.val.fnum != 0.0f
 #endif /* QCBOR_DISABLE_FLOAT_HW_USE */
 #else /* USEFULBUF_DISABLE_ALL_FLOAT */
       || Item.uDataType != QCBOR_TYPE_NONE

diff --git a/test/float_tests.h b/test/float_tests.h
index 54daa3f..427aa76 100644
--- a/test/float_tests.h
+++ b/test/float_tests.h

@@ -1,7 +1,7 @@
 /*==============================================================================
- float_tests.h -- tests for float and conversion to/from half-precision
+ float_tests.h -- tests for floats and conversion to/from half-precision
 
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
+ Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
 
  SPDX-License-Identifier: BSD-3-Clause
 
@@ -17,22 +17,35 @@
 
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
 
-int32_t HalfPrecisionDecodeBasicTests(void);
-
-int32_t DoubleAsSmallestTest(void);
-
+/* This tests a large number half-precision values
+ * in the conversion to/from half/double against
+ * the sample code in the CBOR RFC. */
 int32_t HalfPrecisionAgainstRFCCodeTest(void);
 
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
 
+
 /*
- This calls each and every method for encoding
- floating-point numbers.
+ * This tests floating point encoding, decoding
+ * and conversion for lots of different values.
+ * It covers Preferred Serialization processing
+ * of floating point.  It's focus is on the numbers
+ * not the encode/decode functions.
+ */
+int32_t FloatValuesTests(void);
+
+
+/*
+ * This calls each and every method for encoding
+ * floating-point numbers.
  */
 int32_t GeneralFloatEncodeTests(void);
 
+
 /*
- Tests basic float decoding.
+ * Tests float decoding, including error codes in scenarios
+ * where various float features are disabled. This also
+ * tests decoding using spiffy decode methods.
  */
 int32_t GeneralFloatDecodeTests(void);
 

diff --git a/test/qcbor_encode_tests.c b/test/qcbor_encode_tests.c
index 5c59fe1..546252a 100644
--- a/test/qcbor_encode_tests.c
+++ b/test/qcbor_encode_tests.c

@@ -73,11 +73,11 @@
 static int UsefulBuf_Compare_Print(UsefulBufC U1, UsefulBufC U2) {
    size_t i;
    for(i = 0; i < U1.len; i++) {
-      if(((uint8_t *)U1.ptr)[i] != ((uint8_t *)U2.ptr)[i]) {
+      if(((const uint8_t *)U1.ptr)[i] != ((const uint8_t *)U2.ptr)[i]) {
          printf("Position: %u  Actual: 0x%x   Expected: 0x%x\n",
                 (uint32_t)i,
-                ((uint8_t *)U1.ptr)[i],
-                ((uint8_t *)U2.ptr)[i]);
+                ((const uint8_t *)U1.ptr)[i],
+                ((const uint8_t *)U2.ptr)[i]);
          return 1;
       }
    }

diff --git a/test/run_tests.c b/test/run_tests.c
index f2baaf1..34495ab 100644
--- a/test/run_tests.c
+++ b/test/run_tests.c

@@ -118,12 +118,11 @@
     TEST_ENTRY(SetUpAllocatorTest),
     TEST_ENTRY(CBORTestIssue134),
 #endif /* #ifndef QCBOR_DISABLE_INDEFINITE_LENGTH_STRINGS */
-#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
-    TEST_ENTRY(HalfPrecisionDecodeBasicTests),
-    TEST_ENTRY(DoubleAsSmallestTest),
-    TEST_ENTRY(HalfPrecisionAgainstRFCCodeTest),
-#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
 #ifndef USEFULBUF_DISABLE_ALL_FLOAT
+#ifndef QCBOR_DISABLE_PREFERRED_FLOAT
+   TEST_ENTRY(HalfPrecisionAgainstRFCCodeTest),
+   TEST_ENTRY(FloatValuesTests),
+#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
     TEST_ENTRY(GeneralFloatEncodeTests),
     TEST_ENTRY(GeneralFloatDecodeTests),
 #endif /* USEFULBUF_DISABLE_ALL_FLOAT */
commit	83dbf5cf9e7ca98040e51a1cbdb63b9cd0db3d20	[log] [tgz]
author	Laurence Lundblade <laurencelundblade@users.noreply.github.com>	Sun Jan 07 19:17:52 2024 -0700
committer	GitHub <noreply@github.com>	Sun Jan 07 19:17:52 2024 -0700
tree	1854edb3c8568dd7588d04173310e4df8ff9ac43
parent	c5f45e494ad680be93b067c89ac0c9a53e41f226 [diff]