Fix preferred serialization of subnormals (#192)

Preferred Serialization now fully supports conversion to/from half, single and double subnormals. This includes NaN payloads.

The tests for floating-point are much better organized and give greater coverage. 

IEEE 754 code is better organized and cleaner.



* Fix preferred serialization of subnormals (checkpoint)

* Check point progress

* Preferred float mostly working and tests passing

* added NaN tests

* Fix up ieee754.h; a few other compiler warnings

* decoding NaN payloads fix; rework half-double; tests

* Code tidyness

* indent to 3, not 4

* TODO's are done in other test; code tidy

* test running with float HW use disabled

* Remove / rearrange float tests

* Fix full float ifdef test fan out

* Code tidiness; sort out final TODO's

---------

Co-authored-by: Laurence Lundblade <lgl@securitytheory.com>
diff --git a/src/ieee754.c b/src/ieee754.c
index a8079f8..2d98159 100644
--- a/src/ieee754.c
+++ b/src/ieee754.c
@@ -1,71 +1,63 @@
-/*==============================================================================
- ieee754.c -- floating-point conversion between half, double & single-precision
-
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
- Copyright (c) 2021, Arm Limited. All rights reserved.
-
- SPDX-License-Identifier: BSD-3-Clause
-
- See BSD-3-Clause license in README.md
-
- Created on 7/23/18
- =============================================================================*/
+/* ==========================================================================
+ * ieee754.c -- floating-point conversion between half, double & single-precision
+ *
+ * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * See BSD-3-Clause license in README.md
+ *
+ * Created on 7/23/18
+ * ========================================================================== */
 
 /*
- Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
- QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
+ * Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
+ * QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
  */
 #include "qcbor/qcbor_common.h"
 
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
 
 #include "ieee754.h"
-#include <string.h> // For memcpy()
+#include <string.h> /* For memcpy() */
 
 
 /*
- This code is written for clarity and verifiability, not for size, on
- the assumption that the optimizer will do a good job. The LLVM
- optimizer, -Os, does seem to do the job and the resulting object code
- is smaller from combining code for the many different cases (normal,
- subnormal, infinity, zero...) for the conversions. GCC is no where near
- as good.
-
- This code has really long lines and is much easier to read because of
- them. Some coding guidelines prefer 80 column lines (can they not afford
- big displays?). It would make this code much worse even to wrap at 120
- columns.
-
- Dead stripping is also really helpful to get code size down when
- floating-point encoding is not needed. (If this is put in a library
- and linking is against the library, then dead stripping is automatic).
-
- This code works solely using shifts and masks and thus has no
- dependency on any math libraries. It can even work if the CPU doesn't
- have any floating-point support, though that isn't the most useful
- thing to do.
-
- The memcpy() dependency is only for CopyFloatToUint32() and friends
- which only is needed to avoid type punning when converting the actual
- float bits to an unsigned value so the bit shifts and masks can work.
- */
-
-/*
- The references used to write this code:
-
- - IEEE 754-2008, particularly section 3.6 and 6.2.1
-
- - https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
-
- - https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
-
- - https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
-
- - https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
+ * This code has long lines and is easier to read because of
+ * them. Some coding guidelines prefer 80 column lines (can they not
+ * afford big displays?).
+ *
+ * This code works solely using shifts and masks and thus has no
+ * dependency on any math libraries. It can even work if the CPU
+ * doesn't have any floating-point support, though that isn't the most
+ * useful thing to do.
+ *
+ * The memcpy() dependency is only for CopyFloatToUint32() and friends
+ * which only is needed to avoid type punning when converting the
+ * actual float bits to an unsigned value so the bit shifts and masks
+ * can work.
+ *
+ * The references used to write this code:
+ *
+ *  IEEE 754-2008, particularly section 3.6 and 6.2.1
+ *
+ *  https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
+ *
+ *  https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
+ *
+ *  https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
+ *
+ *  https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
+ *
+ * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
+ * needed. It can be retrieved from github history if needed.
  */
 
 
-// ----- Half Precsion -----------
+
+
+/* ----- Half Precsion ----------- */
 #define HALF_NUM_SIGNIFICAND_BITS (10)
 #define HALF_NUM_EXPONENT_BITS    (5)
 #define HALF_NUM_SIGN_BITS        (1)
@@ -74,16 +66,16 @@
 #define HALF_EXPONENT_SHIFT       (HALF_NUM_SIGNIFICAND_BITS)
 #define HALF_SIGN_SHIFT           (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
 
-#define HALF_SIGNIFICAND_MASK     (0x3ffU) // The lower 10 bits  // 0x03ff
+#define HALF_SIGNIFICAND_MASK     (0x3ffU) // The lower 10 bits
 #define HALF_EXPONENT_MASK        (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
-#define HALF_SIGN_MASK            (0x01U << HALF_SIGN_SHIFT) //  // 0x8000 1 bit of sign
+#define HALF_SIGN_MASK            (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
 #define HALF_QUIET_NAN_BIT        (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
 
 /* Biased    Biased    Unbiased   Use
-    0x00       0        -15       0 and subnormal
-    0x01       1        -14       Smallest normal exponent
-    0x1e      30         15       Largest normal exponent
-    0x1F      31         16       NaN and Infinity  */
+ *  0x00       0        -15       0 and subnormal
+ *  0x01       1        -14       Smallest normal exponent
+ *  0x1e      30         15       Largest normal exponent
+ *  0x1F      31         16       NaN and Infinity  */
 #define HALF_EXPONENT_BIAS        (15)
 #define HALF_EXPONENT_MAX         (HALF_EXPONENT_BIAS)    //  15 Unbiased
 #define HALF_EXPONENT_MIN         (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
@@ -91,7 +83,7 @@
 #define HALF_EXPONENT_INF_OR_NAN  (HALF_EXPONENT_BIAS+1)  //  16 Unbiased
 
 
-// ------ Single-Precision --------
+/* ------ Single-Precision -------- */
 #define SINGLE_NUM_SIGNIFICAND_BITS (23)
 #define SINGLE_NUM_EXPONENT_BITS    (8)
 #define SINGLE_NUM_SIGN_BITS        (1)
@@ -106,19 +98,19 @@
 #define SINGLE_QUIET_NAN_BIT        (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))
 
 /* Biased  Biased   Unbiased  Use
-    0x0000     0     -127      0 and subnormal
-    0x0001     1     -126      Smallest normal exponent
-    0x7f     127        0      1
-    0xfe     254      127      Largest normal exponent
-    0xff     255      128      NaN and Infinity  */
+ *  0x0000     0     -127      0 and subnormal
+ *  0x0001     1     -126      Smallest normal exponent
+ *  0x7f     127        0      1
+ *  0xfe     254      127      Largest normal exponent
+ *  0xff     255      128      NaN and Infinity  */
 #define SINGLE_EXPONENT_BIAS        (127)
-#define SINGLE_EXPONENT_MAX         (SINGLE_EXPONENT_BIAS)    //  127 unbiased
-#define SINGLE_EXPONENT_MIN         (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
-#define SINGLE_EXPONENT_ZERO        (-SINGLE_EXPONENT_BIAS)   // -127 unbiased
-#define SINGLE_EXPONENT_INF_OR_NAN  (SINGLE_EXPONENT_BIAS+1)  //  128 unbiased
+#define SINGLE_EXPONENT_MAX         (SINGLE_EXPONENT_BIAS)
+#define SINGLE_EXPONENT_MIN         (-SINGLE_EXPONENT_BIAS+1)
+#define SINGLE_EXPONENT_ZERO        (-SINGLE_EXPONENT_BIAS)
+#define SINGLE_EXPONENT_INF_OR_NAN  (SINGLE_EXPONENT_BIAS+1)
 
 
-// --------- Double-Precision ----------
+/* --------- Double-Precision ---------- */
 #define DOUBLE_NUM_SIGNIFICAND_BITS (52)
 #define DOUBLE_NUM_EXPONENT_BITS    (11)
 #define DOUBLE_NUM_SIGN_BITS        (1)
@@ -134,372 +126,518 @@
 
 
 /* Biased      Biased   Unbiased  Use
-   0x00000000     0     -1023     0 and subnormal
-   0x00000001     1     -1022     Smallest normal exponent
-   0x000007fe  2046      1023     Largest normal exponent
-   0x000007ff  2047      1024     NaN and Infinity  */
+ * 0x00000000     0     -1023     0 and subnormal
+ * 0x00000001     1     -1022     Smallest normal exponent
+ * 0x000007fe  2046      1023     Largest normal exponent
+ * 0x000007ff  2047      1024     NaN and Infinity  */
 #define DOUBLE_EXPONENT_BIAS        (1023)
-#define DOUBLE_EXPONENT_MAX         (DOUBLE_EXPONENT_BIAS)    // unbiased
-#define DOUBLE_EXPONENT_MIN         (-DOUBLE_EXPONENT_BIAS+1) // unbiased
-#define DOUBLE_EXPONENT_ZERO        (-DOUBLE_EXPONENT_BIAS)   // unbiased
-#define DOUBLE_EXPONENT_INF_OR_NAN  (DOUBLE_EXPONENT_BIAS+1)  // unbiased
+#define DOUBLE_EXPONENT_MAX         (DOUBLE_EXPONENT_BIAS)
+#define DOUBLE_EXPONENT_MIN         (-DOUBLE_EXPONENT_BIAS+1)
+#define DOUBLE_EXPONENT_ZERO        (-DOUBLE_EXPONENT_BIAS)
+#define DOUBLE_EXPONENT_INF_OR_NAN  (DOUBLE_EXPONENT_BIAS+1)
+
 
 
 
 /*
- Convenient functions to avoid type punning, compiler warnings and
- such. The optimizer reduces them to a simple assignment.  This is a
- crusty corner of C. It shouldn't be this hard.
-
- These are also in UsefulBuf.h under a different name. They are copied
- here to avoid a dependency on UsefulBuf.h. There is no object code
- size impact because these always optimze down to a simple assignment.
+ * Convenient functions to avoid type punning, compiler warnings and
+ * such. The optimizer reduces them to a simple assignment. This is a
+ * crusty corner of C. It shouldn't be this hard.
+ *
+ * These are also in UsefulBuf.h under a different name. They are copied
+ * here to avoid a dependency on UsefulBuf.h. There is no object code
+ * size impact because these always optimze down to a simple assignment.
  */
-static inline uint32_t CopyFloatToUint32(float f)
+static inline uint32_t
+CopyFloatToUint32(float f)
 {
-    uint32_t u32;
-    memcpy(&u32, &f, sizeof(uint32_t));
-    return u32;
+   uint32_t u32;
+   memcpy(&u32, &f, sizeof(uint32_t));
+   return u32;
 }
 
-static inline uint64_t CopyDoubleToUint64(double d)
+static inline uint64_t
+CopyDoubleToUint64(double d)
 {
-    uint64_t u64;
-    memcpy(&u64, &d, sizeof(uint64_t));
-    return u64;
+   uint64_t u64;
+   memcpy(&u64, &d, sizeof(uint64_t));
+   return u64;
 }
 
-static inline double CopyUint64ToDouble(uint64_t u64)
+static inline double
+CopyUint64ToDouble(uint64_t u64)
 {
-    double d;
-    memcpy(&d, &u64, sizeof(uint64_t));
-    return d;
+   double d;
+   memcpy(&d, &u64, sizeof(uint64_t));
+   return d;
+}
+
+static inline float
+CopyUint32ToSingle(uint32_t u32)
+{
+   float f;
+   memcpy(&f, &u32, sizeof(uint32_t));
+   return f;
 }
 
 
-// Public function; see ieee754.h
-uint16_t IEEE754_FloatToHalf(float f)
-{
-    // Pull the three parts out of the single-precision float
-    const uint32_t uSingle = CopyFloatToUint32(f);
-    const int32_t  nSingleUnbiasedExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
-    const uint32_t uSingleSign             = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
-    const uint32_t uSingleSignificand      = uSingle & SINGLE_SIGNIFICAND_MASK;
 
 
-    // Now convert the three parts to half-precision.
-
-    // All works is done on uint32_t with conversion to uint16_t at
-    // the end.  This avoids integer promotions that static analyzers
-    // complain about and reduces code size.
-    uint32_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
-
-    if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
-        // +/- Infinity and NaNs -- single biased exponent is 0xff
-        uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
-        if(!uSingleSignificand) {
-            // Infinity
-            uHalfSignificand = 0;
-        } else {
-            // Copy the LSBs of the NaN payload that will fit from the
-            // single to the half
-            uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
-            if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
-                // It's a qNaN; copy the qNaN bit
-                uHalfSignificand |= HALF_QUIET_NAN_BIT;
-            } else {
-                // It's an sNaN; make sure the significand is not zero
-                // so it stays a NaN This is needed because not all
-                // significand bits are copied from single
-                if(!uHalfSignificand) {
-                    // Set the LSB. This is what wikipedia shows for
-                    // sNAN.
-                    uHalfSignificand |= 0x01;
-                }
-            }
-        }
-    } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
-        // 0 or a subnormal number -- singled biased exponent is 0
-        uHalfBiasedExponent = 0;
-        uHalfSignificand    = 0; // Any subnormal single will be too small to express as a half precision
-    } else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
-        // Exponent is too large to express in half-precision; round
-        // up to infinity
-        uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
-        uHalfSignificand    = 0;
-    } else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
-        // Exponent is too small to express in half-precision normal;
-        // make it a half-precision subnormal
-        uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
-        uHalfSignificand    = 0;
-        // Could convert some of these values to a half-precision
-        // subnormal, but the layer above this will never use it. See
-        // layer above.  There is code to do this in github history
-        // for this file, but it was removed because it was never
-        // invoked.
-    } else {
-        // The normal case, exponent is in range for half-precision
-        uHalfBiasedExponent = (uint32_t)(nSingleUnbiasedExponent + HALF_EXPONENT_BIAS);
-        uHalfSignificand    = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
-    }
-    uHalfSign = uSingleSign;
-
-    // Put the 3 values in the right place for a half precision
-    const uint32_t uHalfPrecision =  uHalfSignificand |
-                                    (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
-                                    (uHalfSign << HALF_SIGN_SHIFT);
-    // Cast is safe because all the masks and shifts above work to
-    // make a half precision value which is only 16 bits.
-    return (uint16_t)uHalfPrecision;
-}
-
-
-// Public function; see ieee754.h
-uint16_t IEEE754_DoubleToHalf(double d)
-{
-    // Pull the three parts out of the double-precision float
-    const uint64_t uDouble = CopyDoubleToUint64(d);
-    const int64_t  nDoubleUnbiasedExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
-    const uint64_t uDoubleSign             = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
-    const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;
-
-    // Now convert the three parts to half-precision.
-
-    // All works is done on uint64_t with conversion to uint16_t at
-    // the end.  This avoids integer promotions that static analyzers
-    // complain about.  Other options are for these to be unsigned int
-    // or fast_int16_t. Code size doesn't vary much between all these
-    // options for 64-bit LLVM, 64-bit GCC and 32-bit Armv7 LLVM.
-    uint64_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
-
-    if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
-        // +/- Infinity and NaNs -- single biased exponent is 0xff
-        uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
-        if(!uDoubleSignificand) {
-            // Infinity
-            uHalfSignificand = 0;
-        } else {
-            // Copy the LSBs of the NaN payload that will fit from the
-            // double to the half
-            uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
-            if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
-                // It's a qNaN; copy the qNaN bit
-                uHalfSignificand |= HALF_QUIET_NAN_BIT;
-            } else {
-                // It's an sNaN; make sure the significand is not zero
-                // so it stays a NaN This is needed because not all
-                // significand bits are copied from single
-                if(!uHalfSignificand) {
-                    // Set the LSB. This is what wikipedia shows for
-                    // sNAN.
-                    uHalfSignificand |= 0x01;
-                }
-            }
-        }
-    } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
-        // 0 or a subnormal number -- double biased exponent is 0
-        uHalfBiasedExponent = 0;
-        uHalfSignificand    = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
-    } else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
-        // Exponent is too large to express in half-precision; round
-        // up to infinity; TODO, is this really true?
-        uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
-        uHalfSignificand    = 0;
-    } else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
-        // Exponent is too small to express in half-precision; round
-        // down to zero
-        uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
-        uHalfSignificand = 0;
-        // Could convert some of these values to a half-precision
-        // subnormal, but the layer above this will never use it. See
-        // layer above.  There is code to do this in github history
-        // for this file, but it was removed because it was never
-        // invoked.
-    } else {
-        // The normal case, exponent is in range for half-precision
-        uHalfBiasedExponent = (uint32_t)(nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS);
-        uHalfSignificand    = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
-    }
-    uHalfSign = uDoubleSign;
-
-
-    // Put the 3 values in the right place for a half precision
-    const uint64_t uHalfPrecision =  uHalfSignificand |
-                                    (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
-                                    (uHalfSign << HALF_SIGN_SHIFT);
-    // Cast is safe because all the masks and shifts above work to
-    // make a half precision value which is only 16 bits.
-    return (uint16_t)uHalfPrecision;
-}
-
-
-/*
-  EEE754_HalfToFloat() was created but is not needed. It can be retrieved from
-  github history if needed.
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uDoubleSign              0 if positive, 1 if negative
+ * @pararm[in] uDoubleSignificand      Bits of the significand
+ * @param[in] nDoubleUnBiasedExponent  Exponent
+ *
+ * This returns the bits for a single-precision float, a binary64
+ * as specified in IEEE754.
  */
-
-
-// Public function; see ieee754.h
-double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
+static double
+IEEE754_AssembleDouble(uint64_t uDoubleSign,
+                       uint64_t uDoubleSignificand,
+                       int64_t  nDoubleUnBiasedExponent)
 {
-    // Pull out the three parts of the half-precision float.  Do all
-    // the work in 64 bits because that is what the end result is.  It
-    // may give smaller code size and will keep static analyzers
-    // happier.
-    const uint64_t uHalfSignificand      = uHalfPrecision & HALF_SIGNIFICAND_MASK;
-    const int64_t  nHalfUnBiasedExponent = (int64_t)((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
-    const uint64_t uHalfSign             = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
+   uint64_t uDoubleBiasedExponent;
+
+   uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
+
+   return CopyUint64ToDouble(uDoubleSignificand |
+                             (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
+                             (uDoubleSign << DOUBLE_SIGN_SHIFT));
+}
 
 
-    // Make the three parts of hte single-precision number
-    uint64_t uDoubleSignificand, uDoubleSign, uDoubleBiasedExponent;
-    if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
-        // 0 or subnormal
-        uDoubleBiasedExponent = DOUBLE_EXPONENT_ZERO + DOUBLE_EXPONENT_BIAS;
-        if(uHalfSignificand) {
-            // Subnormal case
-            uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
-            // A half-precision subnormal can always be converted to a
-            // normal double-precision float because the ranges line
-            // up
-            uDoubleSignificand = uHalfSignificand;
-            // Shift bits from right of the decimal to left, reducing
-            // the exponent by 1 each time
-            do {
-                uDoubleSignificand <<= 1;
-                uDoubleBiasedExponent--;
-            } while ((uDoubleSignificand & 0x400) == 0);
-            uDoubleSignificand &= HALF_SIGNIFICAND_MASK;
-            uDoubleSignificand <<= (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+double
+IEEE754_HalfToDouble(uint16_t uHalfPrecision)
+{
+   uint64_t uDoubleSignificand;
+   int64_t  nDoubleUnBiasedExponent;
+   double   dResult;
+
+   /* Pull out the three parts of the half-precision float.  Do all
+    * the work in 64 bits because that is what the end result is.  It
+    * may give smaller code size and will keep static analyzers
+    * happier.
+    */
+   const uint64_t uHalfSignificand      = uHalfPrecision & HALF_SIGNIFICAND_MASK;
+   const uint64_t uHalfBiasedExponent   = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
+   const int64_t  nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
+   const uint64_t uHalfSign             = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
+
+   if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
+      /* 0 or subnormal */
+      if(uHalfSignificand) {
+         /* --- SUBNORMAL --- */
+         /* A half-precision subnormal can always be converted to a
+          * normal double-precision float because the ranges line up.
+          * The exponent of a subnormal starts out at the min exponent
+          * for a normal. As the sub normal significand bits are
+          * shifted, left to normalize, the exponent is
+          * decremented. Shifting continues until fully normalized.
+          */
+          nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
+          uDoubleSignificand      = uHalfSignificand;
+          do {
+             uDoubleSignificand <<= 1;
+             nDoubleUnBiasedExponent--;
+          } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
+          /* A normal has an implied 1 in the most significant
+           * position that a subnormal doesn't. */
+          uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
+          /* Must shift into place for a double significand */
+          uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
+
+          dResult = IEEE754_AssembleDouble(uHalfSign,
+                                           uDoubleSignificand,
+                                           nDoubleUnBiasedExponent);
+      } else {
+         /* --- ZERO --- */
+         dResult = IEEE754_AssembleDouble(uHalfSign,
+                                          0,
+                                          DOUBLE_EXPONENT_ZERO);
+      }
+   } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
+      /* NaN or Inifinity */
+      if(uHalfSignificand) {
+         /* --- NaN --- */
+         /* Half-precision payloads always fit into double precision
+          * payloads. They are shifted left the same as a normal
+          * number significand.
+          */
+         uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+         dResult = IEEE754_AssembleDouble(uHalfSign,
+                                          uDoubleSignificand,
+                                          DOUBLE_EXPONENT_INF_OR_NAN);
+      } else {
+         /* --- INFINITY --- */
+         dResult = IEEE754_AssembleDouble(uHalfSign,
+                                          0,
+                                          DOUBLE_EXPONENT_INF_OR_NAN);
+      }
+   } else {
+      /* --- NORMAL NUMBER --- */
+      uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+      dResult = IEEE754_AssembleDouble(uHalfSign,
+                                       uDoubleSignificand,
+                                       nHalfUnBiasedExponent);
+   }
+
+   return dResult;
+}
+
+
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uHalfSign              0 if positive, 1 if negative
+ * @pararm[in] uHalfSignificand      Bits of the significand
+ * @param[in] nHalfUnBiasedExponent  Exponent
+ *
+ * This returns the bits for a single-precision float, a binary32 as
+ * specified in IEEE754. It is returned as a uint64_t rather than a
+ * uint32_t or a float for convenience of usage.
+ */
+static uint32_t
+IEEE754_AssembleHalf(uint32_t uHalfSign,
+                     uint32_t uHalfSignificand,
+                     int32_t nHalfUnBiasedExponent)
+{
+   uint32_t uHalfUnbiasedExponent;
+
+   uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);
+
+   return uHalfSignificand |
+          (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
+          (uHalfSign << HALF_SIGN_SHIFT);
+}
+
+
+/*  Public function; see ieee754.h */
+IEEE754_union
+IEEE754_SingleToHalf(float f)
+{
+   IEEE754_union result;
+   uint32_t      uDroppedBits;
+   int32_t       nExponentDifference;
+   int32_t       nShiftAmount;
+   uint32_t      uHalfSignificand;
+
+   /* Pull the three parts out of the double-precision float Most work
+    * is done with uint32_t which helps avoid integer promotions and
+    * static analyzer complaints.
+    */
+   const uint32_t uSingle                 = CopyFloatToUint32(f);
+   const uint32_t uSingleBiasedExponent   = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
+   const int32_t  nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
+   const uint32_t uSingleSignificand      = uSingle & SINGLE_SIGNIFICAND_MASK;
+   const uint32_t uSingleSign             = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
+
+   if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
+      if(uSingleSignificand == 0) {
+         /* --- IS ZERO --- */
+         result.uSize  = IEEE754_UNION_IS_HALF;
+         result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                              0,
+                                              HALF_EXPONENT_ZERO);
+      } else {
+         /* --- IS SINGLE SUBNORMAL --- */
+         /* The largest single subnormal is slightly less than the
+          * largest single normal which is 2^-149 or
+          * 2.2040517676619426e-38.  The smallest half subnormal is
+          * 2^-14 or 5.9604644775390625E-8.  There is no overlap so
+          * single subnormals can't be converted to halfs of any sort.
+          */
+         result.uSize   = IEEE754_UNION_IS_SINGLE;
+         result.uValue  = uSingle;
+      }
+   } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
+      if(uSingleSignificand == 0) {
+         /* ---- IS INFINITY ---- */
+         result.uSize  = IEEE754_UNION_IS_HALF;
+         result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
+      } else {
+         /* The NaN can only be converted if no payload bits are lost
+          * per RFC 8949 section 4.1 that defines Preferred
+          * Serializaton. Note that Deterministically Encode CBOR in
+          * section 4.2 allows for some variation of this rule, but at
+          * the moment this implementation is of Preferred
+          * Serialization, not CDE. As of December 2023, we are also
+          * expecting an update to CDE. This code may need to be
+          * updated for CDE.
+          */
+         uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
+         if(uDroppedBits == 0) {
+            /* --- IS CONVERTABLE NAN --- */
+            uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+            result.uSize  = IEEE754_UNION_IS_HALF;
+            result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                                 uHalfSignificand,
+                                                 HALF_EXPONENT_INF_OR_NAN);
+
+         } else {
+            /* --- IS UNCONVERTABLE NAN --- */
+            result.uSize   = IEEE754_UNION_IS_SINGLE;
+            result.uValue  = uSingle;
+         }
+      }
+   } else {
+      /* ---- REGULAR NUMBER ---- */
+      /* A regular single can be converted to a regular half if the
+       * single's exponent is in the smaller range of a half and if no
+       * precision is lost in the significand.
+       */
+      if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
+         nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
+        (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
+         uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+
+         /* --- CONVERT TO HALF NORMAL --- */
+         result.uSize  = IEEE754_UNION_IS_HALF;
+         result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                              uHalfSignificand,
+                                              nSingleUnbiasedExponent);
+      } else {
+         /* Unable to convert to a half normal. See if it can be
+          * converted to a half subnormal. To do that, the exponent
+          * must be in range and no precision can be lost in the
+          * signficand.
+          *
+          * This is more complicated because the number is not
+          * normalized.  The signficand must be shifted proprotionally
+          * to the exponent and 1 must be added in.  See
+          * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+          *
+          * Exponents -14 to -24 map to a shift of 0 to 10 of the
+          * significand.  The largest value of a half subnormal has an
+          * exponent of -14. Subnormals are not normalized like
+          * normals meaning they lose precision as the numbers get
+          * smaller. Normals don't lose precision because the exponent
+          * allows all the bits of the significand to be significant.
+          */
+         /* The exponent of the largest possible half-precision
+          * subnormal is HALF_EXPONENT_MIN (-14).  Exponents larger
+          * than this are normal and handled above. We're going to
+          * shift the significand right by at least this amount.
+          */
+         nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
+
+         /* In addition to the shift based on the exponent's value,
+          * the single significand has to be shifted right to fit into
+          * a half-precision significand */
+         nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+
+         /* Must add 1 in to the possible significand because there is
+          * an implied 1 for normal values and not for subnormal
+          * values. See equations here:
+          * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+          */
+         uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
+
+         /* If only zero bits get shifted out, this can be converted
+          * to subnormal */
+         if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
+            nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
+            uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
+            /* --- CONVERTABLE TO HALF SUBNORMAL --- */
+            result.uSize  = IEEE754_UNION_IS_HALF;
+            result.uValue = IEEE754_AssembleHalf(uSingleSign,
+                                                 uHalfSignificand,
+                                                 HALF_EXPONENT_ZERO);
+         } else {
+            /* --- DO NOT CONVERT --- */
+            result.uSize   = IEEE754_UNION_IS_SINGLE;
+            result.uValue  = uSingle;
+         }
+      }
+   }
+
+   return result;
+}
+
+
+/**
+ * @brief Assemble sign, significand and exponent into single precision float.
+ *
+ * @param[in] uSingleSign              0 if positive, 1 if negative
+ * @pararm[in] uSingleSignificand      Bits of the significand
+ * @param[in] nSingleUnBiasedExponent  Exponent
+ *
+ * This returns the bits for a single-precision float, a binary32 as
+ * specified in IEEE754. It is returned as a uint64_t rather than a
+ * uint32_t or a float for convenience of usage.
+ */
+static uint64_t
+IEEE754_AssembleSingle(uint64_t uSingleSign,
+                       uint64_t uSingleSignificand,
+                       int64_t  nSingleUnBiasedExponent)
+{
+   uint64_t uSingleBiasedExponent;
+
+   uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);
+
+   return uSingleSignificand |
+          (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
+          (uSingleSign << SINGLE_SIGN_SHIFT);
+}
+
+
+/**
+ * @brief Convert a double-precision float to single-precision.
+ *
+ * @param[in] d  The value to convert.
+ *
+ * @returns Either unconverted value or value converted to single-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
+ */
+static IEEE754_union
+IEEE754_DoubleToSingle(double d)
+{
+   IEEE754_union Result;
+   int64_t       nExponentDifference;
+   int64_t       nShiftAmount;
+   uint64_t      uSingleSignificand;
+   uint64_t      uDroppedBits;
+
+
+   /* Pull the three parts out of the double-precision float. Most
+    * work is done with uint64_t which helps avoid integer promotions
+    * and static analyzer complaints.
+    */
+   const uint64_t uDouble                 = CopyDoubleToUint64(d);
+   const uint64_t uDoubleBiasedExponent   = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
+   const int64_t  nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
+   const uint64_t uDoubleSign             = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
+   const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;
+
+
+    if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
+        if(uDoubleSignificand == 0) {
+            /* --- IS ZERO --- */
+            Result.uSize  = IEEE754_UNION_IS_SINGLE;
+            Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                   0,
+                                                   SINGLE_EXPONENT_ZERO);
         } else {
-            // Just zero
-            uDoubleSignificand = 0;
-        }
-    } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
-        // NaN or Inifinity
-        uDoubleBiasedExponent = DOUBLE_EXPONENT_INF_OR_NAN + DOUBLE_EXPONENT_BIAS;
-        if(uHalfSignificand) {
-            // NaN
-            // First preserve the NaN payload from half to single
-            uDoubleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
-            if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
-                // Next, set qNaN if needed since half qNaN bit is not
-                // copied above
-                uDoubleSignificand |= DOUBLE_QUIET_NAN_BIT;
+            /* --- IS DOUBLE SUBNORMAL --- */
+            /* The largest double subnormal is slightly less than the
+             * largest double normal which is 2^-1022 or
+             * 2.2250738585072014e-308.  The smallest single subnormal
+             * is 2^-149 or 1.401298464324817e-45.  There is no
+             * overlap so double subnormals can't be converted to
+             * singles of any sort.
+             */
+            Result.uSize   = IEEE754_UNION_IS_DOUBLE;
+            Result.uValue  = uDouble;
+         }
+    } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
+         if(uDoubleSignificand == 0) {
+             /* ---- IS INFINITY ---- */
+             Result.uSize  = IEEE754_UNION_IS_SINGLE;
+             Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                    0,
+                                                    SINGLE_EXPONENT_INF_OR_NAN);
+         } else {
+             /* The NaN can only be converted if no payload bits are
+              * lost per RFC 8949 section 4.1 that defines Preferred
+              * Serializaton. Note that Deterministically Encode CBOR
+              * in section 4.2 allows for some variation of this rule,
+              * but at the moment this implementation is of Preferred
+              * Serialization, not CDE. As of December 2023, we are
+              * also expecting an update to CDE. This code may need to
+              * be updated for CDE.
+              */
+             uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
+             if(uDroppedBits == 0) {
+                /* --- IS CONVERTABLE NAN --- */
+                uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+                Result.uSize  = IEEE754_UNION_IS_SINGLE;
+                Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                       uSingleSignificand,
+                                                       SINGLE_EXPONENT_INF_OR_NAN);
+            } else {
+               /* --- IS UNCONVERTABLE NAN --- */
+               Result.uSize   = IEEE754_UNION_IS_DOUBLE;
+               Result.uValue  = uDouble;
             }
+         }
+    } else {
+        /* ---- REGULAR NUMBER ---- */
+        /* A regular double can be converted to a regular single if
+         * the double's exponent is in the smaller range of a single
+         * and if no precision is lost in the significand.
+         */
+        uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
+        if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
+           nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
+           uDroppedBits == 0) {
+            /* --- IS CONVERTABLE TO SINGLE --- */
+            uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+            Result.uSize  = IEEE754_UNION_IS_SINGLE;
+            Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                   uSingleSignificand,
+                                                   nDoubleUnbiasedExponent);
         } else {
-            // Infinity
-            uDoubleSignificand = 0;
+            /* Unable to convert to a single normal. See if it can be
+             * converted to a single subnormal. To do that, the
+             * exponent must be in range and no precision can be lost
+             * in the signficand.
+             *
+             * This is more complicated because the number is not
+             * normalized.  The signficand must be shifted
+             * proprotionally to the exponent and 1 must be added
+             * in. See
+             * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
+             */
+            nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
+            nShiftAmount        = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
+            uSingleSignificand  = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;
+
+            if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
+               nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
+               uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
+               /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
+               Result.uSize  = IEEE754_UNION_IS_SINGLE;
+               Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
+                                                      uSingleSignificand,
+                                                      SINGLE_EXPONENT_ZERO);
+            } else {
+               /* --- CAN NOT BE CONVERTED --- */
+               Result.uSize   = IEEE754_UNION_IS_DOUBLE;
+               Result.uValue  = uDouble;
+            }
         }
-    } else {
-        // Normal number
-        uDoubleBiasedExponent = (uint64_t)(nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
-        uDoubleSignificand    = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
     }
-    uDoubleSign = uHalfSign;
 
-
-    // Shift the 3 parts into place as a double-precision
-    const uint64_t uDouble = uDoubleSignificand |
-                            (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
-                            (uDoubleSign << DOUBLE_SIGN_SHIFT);
-    return CopyUint64ToDouble(uDouble);
+    return Result;
 }
 
 
-
-/*
- IEEE754_FloatToDouble(uint32_t uFloat) was created but is not needed. It can be retrieved from
-github history if needed.
-*/
-
-
-
-// Public function; see ieee754.h
-IEEE754_union IEEE754_FloatToSmallest(float f)
+/* Public function; see ieee754.h */
+IEEE754_union
+IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision)
 {
-    IEEE754_union result;
+   IEEE754_union result;
 
-    // Pull the neeed two parts out of the single-precision float
-    const uint32_t uSingle = CopyFloatToUint32(f);
-    const int32_t  nSingleExponent    = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
-    const uint32_t uSingleSignificand =   uSingle & SINGLE_SIGNIFICAND_MASK;
+   result = IEEE754_DoubleToSingle(d);
 
-    // Bit mask that is the significand bits that would be lost when
-    // converting from single-precision to half-precision
-    const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
+   if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
+      /* Cast to uint32_t is OK, because value was just successfully
+       * converted to single. */
+      float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
+      result = IEEE754_SingleToHalf(uSingle);
+   }
 
-    // Optimizer will re organize so there is only one call to
-    // IEEE754_FloatToHalf() in the final code.
-    if(uSingle == 0) {
-        // Value is 0.0000, not a a subnormal
-        result.uSize = IEEE754_UNION_IS_HALF;
-        result.uValue  = IEEE754_FloatToHalf(f);
-    } else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
-        // NaN, +/- infinity
-        result.uSize = IEEE754_UNION_IS_HALF;
-        result.uValue  = IEEE754_FloatToHalf(f);
-    } else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
-        // Normal number in exponent range and precision won't be lost
-        result.uSize = IEEE754_UNION_IS_HALF;
-        result.uValue  = IEEE754_FloatToHalf(f);
-    } else {
-        // Subnormal, exponent out of range, or precision will be lost
-        result.uSize = IEEE754_UNION_IS_SINGLE;
-        result.uValue  = uSingle;
-    }
-
-    return result;
+   return result;
 }
 
-// Public function; see ieee754.h
-IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
-{
-    IEEE754_union result;
 
-    // Pull the needed two parts out of the double-precision float
-    const uint64_t uDouble = CopyDoubleToUint64(d);
-    const int64_t  nDoubleExponent     = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
-    const uint64_t uDoubleSignificand  = uDouble & DOUBLE_SIGNIFICAND_MASK;
+#else /* QCBOR_DISABLE_PREFERRED_FLOAT */
 
-    // Masks to check whether dropped significand bits are zero or not
-    const uint64_t uDroppedHalfBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
-    const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
-
-    // This will not convert to half-precion or single-precision
-    // subnormals.  Values that could be converted will be output as
-    // the double they are or occasionally to a normal single.  This
-    // could be implemented, but it is more code and would rarely be
-    // used and rarely reduce the output size.
-
-    // The various cases
-    if(d == 0.0) { // Take care of positive and negative zero
-        // Value is 0.0000, not a a subnormal
-        result.uSize  = IEEE754_UNION_IS_HALF;
-        result.uValue = IEEE754_DoubleToHalf(d);
-    } else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
-        // NaN, +/- infinity
-        result.uSize  = IEEE754_UNION_IS_HALF;
-        result.uValue = IEEE754_DoubleToHalf(d);
-    } else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedHalfBits))) {
-        // Can convert to half without precision loss
-        result.uSize  = IEEE754_UNION_IS_HALF;
-        result.uValue = IEEE754_DoubleToHalf(d);
-    } else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
-        // Can convert to single without precision loss
-        result.uSize  = IEEE754_UNION_IS_SINGLE;
-        result.uValue = CopyFloatToUint32((float)d);
-    } else {
-        // Can't convert without precision loss
-        result.uSize  = IEEE754_UNION_IS_DOUBLE;
-        result.uValue = uDouble;
-    }
-
-    return result;
-}
-
-#else
-
-int x;
+int ieee754_dummy_place_holder;
 
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
diff --git a/src/ieee754.h b/src/ieee754.h
index d37532a..863019b 100644
--- a/src/ieee754.h
+++ b/src/ieee754.h
@@ -1,14 +1,14 @@
-/*==============================================================================
- ieee754.c -- floating-point conversion between half, double & single-precision
-
- Copyright (c) 2018-2020, Laurence Lundblade. All rights reserved.
-
- SPDX-License-Identifier: BSD-3-Clause
-
- See BSD-3-Clause license in README.md
-
- Created on 7/23/18
- =============================================================================*/
+/* ==========================================================================
+ * ieee754.h -- Conversion between half, double & single-precision floats
+ *
+ * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * See BSD-3-Clause license in README.md
+ *
+ * Created on 7/23/18
+ * ========================================================================== */
 
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
 
@@ -18,130 +18,109 @@
 #include <stdint.h>
 
 
-
-/*
- General comments
-
- This is a complete in that it handles all conversion cases including
- +/- infinity, +/- zero, subnormal numbers, qNaN, sNaN and NaN
- payloads.
-
- This conforms to IEEE 754-2008, but note that this doesn't specify
- conversions, just the encodings.
-
- NaN payloads are preserved with alignment on the LSB. The qNaN bit is
- handled differently and explicity copied. It is always the MSB of the
- significand. The NaN payload MSBs (except the qNaN bit) are truncated
- when going from double or single to half.
-
- TODO: what does the C cast do with NaN payloads from
- double to single? It probably depends entirely on the
- CPU.
-
- */
-
-/*
- Most simply just explicilty encode the type you want, single or
- double.  This works easily everywhere since standard C supports both
- these types and so does qcbor.  This encoder also supports half
- precision and there's a few ways to use it to encode floating-point
- numbers in less space.
-
- Without losing precision, you can encode a single or double such that
- the special values of 0, NaN and Infinity encode as half-precision.
- This CBOR decodoer and most others should handle this properly.
-
- If you don't mind losing precision, then you can use half-precision.
- One way to do this is to set up your environment to use
- ___fp_16. Some compilers and CPUs support it even though it is not
- standard C. What is nice about this is that your program will use
- less memory and floating-point operations like multiplying, adding
- and such will be faster.
-
- Another way to make use of half-precision is to represent the values
- in your program as single or double, but encode them in CBOR as
- half-precision. This cuts the size of the encoded messages by 2 or 4,
- but doesn't reduce memory needs or speed because you are still using
- single or double in your code.
-
+/** @file ieee754.h
+ *
+ * This implements floating-point conversion between half, single and
+ * double precision floating-point numbers, in particular convesion to
+ * smaller representation (e.g., double to single) that does not lose
+ * precision for CBOR preferred serialization.
+ *
+ * This implementation works entirely with shifts and masks and does
+ * not require any floating-point HW or library.
+ *
+ * This conforms to IEEE 754-2008, but note that it doesn't specify
+ * conversions, just the encodings.
+ *
+ * This is complete, supporting +/- infinity, +/- zero, subnormals and
+ * NaN payloads. NaN payloads are converted to smaller by dropping the
+ * right most bits if they are zero and shifting to the right. If the
+ * rightmost bits are not zero the conversion is not performed. When
+ * converting from smaller to larger, the payload is shifted left and
+ * zero-padded. This is what is specified by CBOR preferred
+ * serialization and what modern HW conversion instructions do. CBOR
+ * CDE handling for NaN is not clearly specified, but upcoming
+ * documents may clarify this.
+ *
+ * There is no special handling of silent and quiet NaNs. It probably
+ * isn't necessary to transmit these special NaNs as there purpose is
+ * more for propgating errors up through some calculation. In many
+ * cases the handlng of the NaN payload will work for silent and quiet
+ * NaNs.
+ *
+ * A previous version of this was usable as a general library for
+ * conversion. This version is reduced to what is needed for CBOR.
  */
 
 
-
-/*
- Convert single-precision float to half-precision float.  Precision
- and NaN payload bits will be lost. Too-large values will round up to
- infinity and too small to zero.
+/**
+ * @brief Convert half-precision float to double-precision float.
+ *
+ * @param[in] uHalfPrecision   Half-prevision number to convert.
+ *
+ * @returns double-presion value.
+ *
+ * This is a lossless conversion because every half-precision value
+ * can be represented as a double. There is no error condition.
+ *
+ * There is no half-precision type in C, so it is represented here as
+ * a @c uint16_t. The bits of @c uHalfPrecision are as described for
+ * half-precision by IEEE 754.
  */
-uint16_t IEEE754_FloatToHalf(float f);
+double
+IEEE754_HalfToDouble(uint16_t uHalfPrecision);
 
 
-/*
- Convert double-precision float to half-precision float.  Precision
- and NaN payload bits will be lost. Too-large values will round up to
- infinity and too small to zero.
+/** Holds a floating-point value that could be half, single or
+ * double-precision.  The value is in a @c uint64_t that may be copied
+ * to a float or double.  Simply casting uValue will usually work but
+ * may generate compiler or static analyzer warnings. Using
+ * UsefulBufUtil_CopyUint64ToDouble() or
+ * UsefulBufUtil_CopyUint32ToFloat() will not (and will not generate
+ * any extra code).
  */
-uint16_t IEEE754_DoubleToHalf(double d);
-
-
-/*
- Convert half-precision float to double-precision float.
- This is a loss-less conversion.
- */
-double IEEE754_HalfToDouble(uint16_t uHalfPrecision);
-
-
-// Both tags the value and gives the size
-#define IEEE754_UNION_IS_HALF   2
-#define IEEE754_UNION_IS_SINGLE 4
-#define IEEE754_UNION_IS_DOUBLE 8
-
 typedef struct {
-    uint8_t uSize;  // One of IEEE754_IS_xxxx
-    uint64_t uValue;
+   enum {IEEE754_UNION_IS_HALF   = 2,
+         IEEE754_UNION_IS_SINGLE = 4,
+         IEEE754_UNION_IS_DOUBLE = 8,
+   } uSize; /* Size of uValue */
+   uint64_t uValue;
 } IEEE754_union;
 
 
-/*
- Converts double-precision to single-precision or half-precision if
- possible without loss of precisions. If not, leaves it as a
- double. Only converts to single-precision unless bAllowHalfPrecision
- is set.
+/**
+ * @brief Convert a double to either single or half-precision.
+ *
+ * @param[in] d                    The value to convert.
+ * @param[in] bAllowHalfPrecision  If true, convert to either half or
+ *                                 single precision.
+ *
+ * @returns Unconverted value, or value converted to single or half-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
  */
-IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision);
+IEEE754_union
+IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision);
 
-/*
- Converts double-precision to single-precision if possible without
- loss of precision. If not, leaves it as a double.
+
+/**
+ * @brief Convert a single-precision float to half-precision.
+ *
+ * @param[in] f  The value to convert.
+ *
+ * @returns Either unconverted value or value converted to half-precision.
+ *
+ * This always succeeds. If the value cannot be converted without the
+ * loss of precision, it is not converted.
+ *
+ * This handles all subnormals and NaN payloads.
  */
-static inline IEEE754_union IEEE754_DoubleToSmall(double d)
-{
-    return IEEE754_DoubleToSmallestInternal(d, 0);
-}
-
-
-/*
- Converts double-precision to single-precision or half-precision if
- possible without loss of precisions. If not, leaves it as a double.
- */
-static inline IEEE754_union IEEE754_DoubleToSmallest(double d)
-{
-    return IEEE754_DoubleToSmallestInternal(d, 1);
-}
-
-
-/*
- Converts single-precision to half-precision if possible without loss
- of precision. If not leaves as single-precision.
- */
-IEEE754_union IEEE754_FloatToSmallest(float f);
+IEEE754_union
+IEEE754_SingleToHalf(float f);
 
 
 #endif /* ieee754_h */
 
-
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
-
-
-
-
diff --git a/src/qcbor_encode.c b/src/qcbor_encode.c
index 53df657..f52692a 100644
--- a/src/qcbor_encode.c
+++ b/src/qcbor_encode.c
@@ -768,9 +768,9 @@
 void QCBOREncode_AddDouble(QCBOREncodeContext *me, double dNum)
 {
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
-   const IEEE754_union uNum = IEEE754_DoubleToSmallest(dNum);
+   const IEEE754_union uNum = IEEE754_DoubleToSmaller(dNum, true);
 
-   QCBOREncode_AddType7(me, uNum.uSize, uNum.uValue);
+   QCBOREncode_AddType7(me, (uint8_t)uNum.uSize, uNum.uValue);
 #else /* QCBOR_DISABLE_PREFERRED_FLOAT */
    QCBOREncode_AddDoubleNoPreferred(me, dNum);
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */
@@ -794,9 +794,9 @@
 void QCBOREncode_AddFloat(QCBOREncodeContext *me, float fNum)
 {
 #ifndef QCBOR_DISABLE_PREFERRED_FLOAT
-   const IEEE754_union uNum = IEEE754_FloatToSmallest(fNum);
+   const IEEE754_union uNum = IEEE754_SingleToHalf(fNum);
 
-   QCBOREncode_AddType7(me, uNum.uSize, uNum.uValue);
+   QCBOREncode_AddType7(me, (uint8_t)uNum.uSize, uNum.uValue);
 #else /* QCBOR_DISABLE_PREFERRED_FLOAT */
    QCBOREncode_AddFloatNoPreferred(me, fNum);
 #endif /* QCBOR_DISABLE_PREFERRED_FLOAT */