remove conversion to subnormals; tidy up comments

commit: 576aa0c1d6ba16ade7a7527774cf7479b34e6a4a [log] [tgz]
author: Laurence Lundblade <lgl@securitytheory.com> Tue Jul 21 21:36:52 2020 -0700
committer: Laurence Lundblade <lgl@securitytheory.com> Tue Jul 21 21:36:52 2020 -0700
tree: 01c909e50ac75652ee4624d40f17f87c9cd378af
parent: 29ec4640a61597d4b5ed0b2db3e984cfa180f52d [diff]
diff --git a/src/ieee754.c b/src/ieee754.c
index 285c006..8b2efa8 100644
--- a/src/ieee754.c
+++ b/src/ieee754.c

@@ -182,9 +182,9 @@
 
     // Now convert the three parts to half-precision.
 
-    // All works is done on uint32_t with conversion to uint16_t at the end.
-    // This avoids integer promotions that static analyzers complain about and
-    // reduces code size.
+    // All works is done on uint32_t with conversion to uint16_t at
+    // the end.  This avoids integer promotions that static analyzers
+    // complain about and reduces code size.
     uint32_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
 
     if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
@@ -194,38 +194,42 @@
             // Infinity
             uHalfSignificand = 0;
         } else {
-            // Copy the LSBs of the NaN payload that will fit from the single to the half
+            // Copy the LSBs of the NaN payload that will fit from the
+            // single to the half
             uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
             if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
                 // It's a qNaN; copy the qNaN bit
                 uHalfSignificand |= HALF_QUIET_NAN_BIT;
             } else {
-                // It's an sNaN; make sure the significand is not zero so it stays a NaN
-                // This is needed because not all significand bits are copied from single
+                // It's an sNaN; make sure the significand is not zero
+                // so it stays a NaN This is needed because not all
+                // significand bits are copied from single
                 if(!uHalfSignificand) {
-                    // Set the LSB. This is what wikipedia shows for sNAN.
+                    // Set the LSB. This is what wikipedia shows for
+                    // sNAN.
                     uHalfSignificand |= 0x01;
                 }
             }
         }
     } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
-        // 0 or a subnormal number  -- singled biased exponent is 0
+        // 0 or a subnormal number -- singled biased exponent is 0
         uHalfBiasedExponent = 0;
         uHalfSignificand    = 0; // Any subnormal single will be too small to express as a half precision
     } else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
-        // Exponent is too large to express in half-precision; round up to infinity
+        // Exponent is too large to express in half-precision; round
+        // up to infinity
         uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
         uHalfSignificand    = 0;
     } else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
-        // Exponent is too small to express in half-precision normal; make it a half-precision subnormal
+        // Exponent is too small to express in half-precision normal;
+        // make it a half-precision subnormal
         uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
-        // Difference between single normal exponent and the base exponent of a half subnormal
-        const uint32_t uExpDiff = (uint32_t)-(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
-        // Also have to shift the significand by the difference in number of bits between a single and a half significand
-        const uint32_t uSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
-        // Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
-        const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01U << SINGLE_NUM_SIGNIFICAND_BITS);
-        uHalfSignificand = uSingleSignificandSubnormal >> (uExpDiff + uSignificandBitsDiff);
+        uHalfSignificand    = 0;
+        // Could convert some of these values to a half-precision
+        // subnormal, but the layer above this will never use it. See
+        // layer above.  There is code to do this in github history
+        // for this file, but it was removed because it was never
+        // invoked.
     } else {
         // The normal case, exponent is in range for half-precision
         uHalfBiasedExponent = (uint32_t)(nSingleUnbiasedExponent + HALF_EXPONENT_BIAS);
@@ -237,8 +241,8 @@
     const uint32_t uHalfPrecision =  uHalfSignificand |
                                     (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
                                     (uHalfSign << HALF_SIGN_SHIFT);
-    // Cast is safe because all the masks and shifts above work to make
-    // a half precision value which is only 16 bits.
+    // Cast is safe because all the masks and shifts above work to
+    // make a half precision value which is only 16 bits.
     return (uint16_t)uHalfPrecision;
 }
 
@@ -254,11 +258,11 @@
 
     // Now convert the three parts to half-precision.
 
-    // All works is done on uint64_t with conversion to uint16_t at the end.
-    // This avoids integer promotions that static analyzers complain about.
-    // Other options are for these to be unsigned int or fast_int16_t. Code
-    // size doesn't vary much between all these options for 64-bit LLVM,
-    // 64-bit GCC and 32-bit Armv7 LLVM.
+    // All works is done on uint64_t with conversion to uint16_t at
+    // the end.  This avoids integer promotions that static analyzers
+    // complain about.  Other options are for these to be unsigned int
+    // or fast_int16_t. Code size doesn't vary much between all these
+    // options for 64-bit LLVM, 64-bit GCC and 32-bit Armv7 LLVM.
     uint64_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
 
     if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
@@ -268,38 +272,42 @@
             // Infinity
             uHalfSignificand = 0;
         } else {
-            // Copy the LSBs of the NaN payload that will fit from the double to the half
+            // Copy the LSBs of the NaN payload that will fit from the
+            // double to the half
             uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
             if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
                 // It's a qNaN; copy the qNaN bit
                 uHalfSignificand |= HALF_QUIET_NAN_BIT;
             } else {
-                // It's an sNaN; make sure the significand is not zero so it stays a NaN
-                // This is needed because not all significand bits are copied from single
+                // It's an sNaN; make sure the significand is not zero
+                // so it stays a NaN This is needed because not all
+                // significand bits are copied from single
                 if(!uHalfSignificand) {
-                    // Set the LSB. This is what wikipedia shows for sNAN.
+                    // Set the LSB. This is what wikipedia shows for
+                    // sNAN.
                     uHalfSignificand |= 0x01;
                 }
             }
         }
     } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
-        // 0 or a subnormal number  -- double biased exponent is 0
+        // 0 or a subnormal number -- double biased exponent is 0
         uHalfBiasedExponent = 0;
         uHalfSignificand    = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
     } else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
-        // Exponent is too large to express in half-precision; round up to infinity; TODO, is this really true?
+        // Exponent is too large to express in half-precision; round
+        // up to infinity; TODO, is this really true?
         uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
         uHalfSignificand    = 0;
     } else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
-        // Exponent is too small to express in half-precision; round down to zero
+        // Exponent is too small to express in half-precision; round
+        // down to zero
         uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
-        // Difference between double normal exponent and the base exponent of a half subnormal
-        const uint64_t uExpDiff = (uint64_t)-(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
-        // Also have to shift the significand by the difference in number of bits between a double and a half significand
-        const uint64_t uSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
-        // Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
-        const uint64_t uDoubleSignificandSubnormal = uDoubleSignificand + (0x01ULL << DOUBLE_NUM_SIGNIFICAND_BITS);
-        uHalfSignificand = uDoubleSignificandSubnormal >> (uExpDiff + uSignificandBitsDiff);
+        uHalfSignificand = 0;
+        // Could convert some of these values to a half-precision
+        // subnormal, but the layer above this will never use it. See
+        // layer above.  There is code to do this in github history
+        // for this file, but it was removed because it was never
+        // invoked.
     } else {
         // The normal case, exponent is in range for half-precision
         uHalfBiasedExponent = (uint32_t)(nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS);
@@ -312,8 +320,8 @@
     const uint64_t uHalfPrecision =  uHalfSignificand |
                                     (uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
                                     (uHalfSign << HALF_SIGN_SHIFT);
-    // Cast is safe because all the masks and shifts above work to make
-    // a half precision value which is only 16 bits.
+    // Cast is safe because all the masks and shifts above work to
+    // make a half precision value which is only 16 bits.
     return (uint16_t)uHalfPrecision;
 }
 
@@ -327,9 +335,10 @@
 // Public function; see ieee754.h
 double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
 {
-    // Pull out the three parts of the half-precision float
-    // Do all the work in 64 bits because that is what the end result is.
-    // It may give smaller code size and will keep static analyzers happier.
+    // Pull out the three parts of the half-precision float.  Do all
+    // the work in 64 bits because that is what the end result is.  It
+    // may give smaller code size and will keep static analyzers
+    // happier.
     const uint64_t uHalfSignificand      = uHalfPrecision & HALF_SIGNIFICAND_MASK;
     const int64_t  nHalfUnBiasedExponent = (int64_t)((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
     const uint64_t uHalfSign             = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
@@ -343,9 +352,12 @@
         if(uHalfSignificand) {
             // Subnormal case
             uDoubleBiasedExponent = -HALF_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS +1;
-            // A half-precision subnormal can always be converted to a normal double-precision float because the ranges line up
+            // A half-precision subnormal can always be converted to a
+            // normal double-precision float because the ranges line
+            // up
             uDoubleSignificand = uHalfSignificand;
-            // Shift bits from right of the decimal to left, reducing the exponent by 1 each time
+            // Shift bits from right of the decimal to left, reducing
+            // the exponent by 1 each time
             do {
                 uDoubleSignificand <<= 1;
                 uDoubleBiasedExponent--;
@@ -364,7 +376,8 @@
             // First preserve the NaN payload from half to single
             uDoubleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
             if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
-                // Next, set qNaN if needed since half qNaN bit is not copied above
+                // Next, set qNaN if needed since half qNaN bit is not
+                // copied above
                 uDoubleSignificand |= DOUBLE_QUIET_NAN_BIT;
             }
         } else {
@@ -390,9 +403,10 @@
 // Public function; see ieee754.h
 double IEEE754_FloatToDouble(uint32_t uFloat)
 {
-    // Pull out the three parts of the single-precision float
-    // Do all the work in 64 bits because that is what the end result is.
-    // It may give smaller code size and will keep static analyzers happier.
+    // Pull out the three parts of the single-precision float.  Do all
+    // the work in 64 bits because that is what the end result is.  It
+    // may give smaller code size and will keep static analyzers
+    // happier.
     const uint64_t uSingleSignificand      = uFloat & SINGLE_SIGNIFICAND_MASK;
     const int64_t  nSingleUnBiasedExponent = (int64_t)((uFloat & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
     const uint64_t uSingleSign             = (uFloat & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
@@ -406,9 +420,12 @@
         if(uSingleSignificand) {
             // Subnormal case
             uDoubleBiasedExponent = -SINGLE_EXPONENT_BIAS + DOUBLE_EXPONENT_BIAS + 1;
-            // A single-precision subnormal can always be converted to a normal double-precision float because the ranges line up
+            // A single-precision subnormal can always be converted to
+            // a normal double-precision float because the ranges line
+            // up
             uDoubleSignificand = uSingleSignificand;
-            // Shift bits from right of the decimal to left, reducing the exponent by 1 each time
+            // Shift bits from right of the decimal to left, reducing
+            // the exponent by 1 each time
             do {
                 uDoubleSignificand <<= 1;
                 uDoubleBiasedExponent--;
@@ -464,11 +481,12 @@
     const int32_t  nSingleExponent    = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
     const uint32_t uSingleSignificand =   uSingle & SINGLE_SIGNIFICAND_MASK;
 
-    // Bit mask that is the significand bits that would be lost when converting
-    // from single-precision to half-precision
+    // Bit mask that is the significand bits that would be lost when
+    // converting from single-precision to half-precision
     const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
 
-    // Optimizer will re organize so there is only one call to IEEE754_FloatToHalf()
+    // Optimizer will re organize so there is only one call to
+    // IEEE754_FloatToHalf() in the final code.
     if(uSingle == 0) {
         // Value is 0.0000, not a a subnormal
         result.uSize = IEEE754_UNION_IS_HALF;
commit	576aa0c1d6ba16ade7a7527774cf7479b34e6a4a	[log] [tgz]
author	Laurence Lundblade <lgl@securitytheory.com>	Tue Jul 21 21:36:52 2020 -0700
committer	Laurence Lundblade <lgl@securitytheory.com>	Tue Jul 21 21:36:52 2020 -0700
tree	01c909e50ac75652ee4624d40f17f87c9cd378af
parent	29ec4640a61597d4b5ed0b2db3e984cfa180f52d [diff]