Refine use of integer variables to quiet static analyzers and reduce object code size a little
Note that the no bugs of consequence were found by the static analyzer. The analyzer used was codesafe.cn
diff --git a/src/ieee754.c b/src/ieee754.c
index ea44e1d..ef0adef 100644
--- a/src/ieee754.c
+++ b/src/ieee754.c
@@ -19,7 +19,8 @@
the assumption that the optimizer will do a good job. The LLVM
optimizer, -Os, does seem to do the job and the resulting object code
is smaller from combining code for the many different cases (normal,
- subnormal, infinity, zero...) for the conversions.
+ subnormal, infinity, zero...) for the conversions. GCC is no where near
+ as good.
This code has really long lines and is much easier to read because of
them. Some coding guidelines prefer 80 column lines (can they not afford
@@ -27,7 +28,8 @@
columns.
Dead stripping is also really helpful to get code size down when
- floating-point encoding is not needed.
+ floating-point encoding is not needed. (If this is put in a library
+ and linking is against the library, then dead stripping is automatic).
This code works solely using shifts and masks and thus has no
dependency on any math libraries. It can even work if the CPU doesn't
@@ -47,6 +49,10 @@
- https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
- https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
+
+ - https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
+
+ - https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
*/
@@ -97,10 +103,10 @@
0xfe 254 127 Largest normal exponent
0xff 255 128 NaN and Infinity */
#define SINGLE_EXPONENT_BIAS (127)
-#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
+#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
-#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
+#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
// --------- Double-Precision ----------
@@ -132,14 +138,13 @@
/*
- Convenient functions to avoid type punning, compiler warnings and such
- The optimizer reduces them to a simple assignment.
- This is a crusty corner of C. It shouldn't be this hard.
+ Convenient functions to avoid type punning, compiler warnings and
+ such. The optimizer reduces them to a simple assignment. This is a
+ crusty corner of C. It shouldn't be this hard.
These are also in UsefulBuf.h under a different name. They are copied
- here to avoid a dependency on UsefulBuf.h. There is no
- object code size impact because these always optimze down to a
- simple assignment.
+ here to avoid a dependency on UsefulBuf.h. There is no object code
+ size impact because these always optimze down to a simple assignment.
*/
static inline uint32_t CopyFloatToUint32(float f)
{
@@ -175,13 +180,18 @@
{
// Pull the three parts out of the single-precision float
const uint32_t uSingle = CopyFloatToUint32(f);
- const int32_t nSingleUnbiasedExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
- const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
- const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
+ const int32_t nSingleUnbiasedExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
+ const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
+ const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
// Now convert the three parts to half-precision.
- uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
+
+ // All works is done on uint32_t with conversion to uint16_t at the end.
+ // This avoids integer promotions that static analyzers complain about and
+ // reduces code size.
+ uint32_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
+
if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
// +/- Infinity and NaNs -- single biased exponent is 0xff
uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
@@ -189,13 +199,13 @@
// Infinity
uHalfSignificand = 0;
} else {
- // Copy the LBSs of the NaN payload that will fit from the single to the half
+ // Copy the LSBs of the NaN payload that will fit from the single to the half
uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
// It's a qNaN; copy the qNaN bit
uHalfSignificand |= HALF_QUIET_NAN_BIT;
} else {
- // It's a sNaN; make sure the significand is not zero so it stays a NaN
+ // It's an sNaN; make sure the significand is not zero so it stays a NaN
// This is needed because not all significand bits are copied from single
if(!uHalfSignificand) {
// Set the LSB. This is what wikipedia shows for sNAN.
@@ -213,26 +223,28 @@
uHalfSignificand = 0;
} else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
// Exponent is too small to express in half-precision normal; make it a half-precision subnormal
- uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
+ uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
// Difference between single normal exponent and the base exponent of a half subnormal
- const uint32_t nExpDiff = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
+ const uint32_t uExpDiff = (uint32_t)-(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
// Also have to shift the significand by the difference in number of bits between a single and a half significand
- const int32_t nSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
+ const uint32_t uSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
- const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01L << SINGLE_NUM_SIGNIFICAND_BITS);
- uHalfSignificand = uSingleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
+ const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01UL << SINGLE_NUM_SIGNIFICAND_BITS);
+ uHalfSignificand = uSingleSignificandSubnormal >> (uExpDiff + uSignificandBitsDiff);
} else {
- // The normal case
- uHalfBiasedExponent = nSingleUnbiasedExponent + HALF_EXPONENT_BIAS;
+ // The normal case, exponent is in range for half-precision
+ uHalfBiasedExponent = (uint32_t)(nSingleUnbiasedExponent + HALF_EXPONENT_BIAS);
uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
}
uHalfSign = uSingleSign;
// Put the 3 values in the right place for a half precision
- const uint16_t uHalfPrecision = uHalfSignificand |
+ const uint32_t uHalfPrecision = uHalfSignificand |
(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
(uHalfSign << HALF_SIGN_SHIFT);
- return uHalfPrecision;
+ // Cast is safe because all the masks and shifts above work to make
+ // a half precision value which is only 16 bits.
+ return (uint16_t)uHalfPrecision;
}
@@ -241,13 +253,19 @@
{
// Pull the three parts out of the double-precision float
const uint64_t uDouble = CopyDoubleToUint64(d);
- const int64_t nDoubleUnbiasedExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
- const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
- const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
-
+ const int64_t nDoubleUnbiasedExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
+ const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
+ const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
// Now convert the three parts to half-precision.
- uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
+
+ // All works is done on uint64_t with conversion to uint16_t at the end.
+ // This avoids integer promotions that static analyzers complain about.
+ // Other options are for these to be unsigned int or fast_int16_t. Code
+ // size doesn't vary much between all these options for 64-bit LLVM,
+ // 64-bit GCC and 32-bit Armv7 LLVM.
+ uint64_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
+
if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
// +/- Infinity and NaNs -- single biased exponent is 0xff
uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
@@ -255,7 +273,7 @@
// Infinity
uHalfSignificand = 0;
} else {
- // Copy the LBSs of the NaN payload that will fit from the double to the half
+ // Copy the LSBs of the NaN payload that will fit from the double to the half
uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
// It's a qNaN; copy the qNaN bit
@@ -279,37 +297,42 @@
uHalfSignificand = 0;
} else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
// Exponent is too small to express in half-precision; round down to zero
- uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
+ uHalfBiasedExponent = HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS;
// Difference between double normal exponent and the base exponent of a half subnormal
- const uint64_t nExpDiff = -(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
+ const uint64_t uExpDiff = (uint64_t)-(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
// Also have to shift the significand by the difference in number of bits between a double and a half significand
- const int64_t nSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
+ const uint64_t uSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
const uint64_t uDoubleSignificandSubnormal = uDoubleSignificand + (0x01ULL << DOUBLE_NUM_SIGNIFICAND_BITS);
- uHalfSignificand = uDoubleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
+ uHalfSignificand = uDoubleSignificandSubnormal >> (uExpDiff + uSignificandBitsDiff);
} else {
- // The normal case
- uHalfBiasedExponent = nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS;
+ // The normal case, exponent is in range for half-precision
+ uHalfBiasedExponent = (uint32_t)(nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS);
uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
}
uHalfSign = uDoubleSign;
// Put the 3 values in the right place for a half precision
- const uint16_t uHalfPrecision = uHalfSignificand |
+ const uint64_t uHalfPrecision = uHalfSignificand |
(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) |
(uHalfSign << HALF_SIGN_SHIFT);
- return uHalfPrecision;
+ // Cast is safe because all the masks and shifts above work to make
+ // a half precision value which is only 16 bits.
+ return (uint16_t)uHalfPrecision;
}
+
// Public function; see ieee754.h
float IEEE754_HalfToFloat(uint16_t uHalfPrecision)
{
// Pull out the three parts of the half-precision float
- const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
- const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
- const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
+ // Do all the work in 32 bits because that is what the end result is
+ // may give smaller code size and will keep static analyzers happier.
+ const uint32_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
+ const int32_t nHalfUnBiasedExponent = (int32_t)((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
+ const uint32_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
// Make the three parts of the single-precision number
@@ -350,12 +373,11 @@
}
} else {
// Normal number
- uSingleBiasedExponent = nHalfUnBiasedExponent + SINGLE_EXPONENT_BIAS;
+ uSingleBiasedExponent = (uint32_t)(nHalfUnBiasedExponent + SINGLE_EXPONENT_BIAS);
uSingleSignificand = uHalfSignificand << (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
}
uSingleSign = uHalfSign;
-
// Shift the three parts of the single-precision into place
const uint32_t uSinglePrecision = uSingleSignificand |
(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
@@ -369,9 +391,11 @@
double IEEE754_HalfToDouble(uint16_t uHalfPrecision)
{
// Pull out the three parts of the half-precision float
- const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
- const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
- const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
+ // Do all the work in 64 bits because that is what the end result is
+ // may give smaller code size and will keep static analyzers happier.
+ const uint64_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
+ const int64_t nHalfUnBiasedExponent = (int64_t)((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
+ const uint64_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
// Make the three parts of hte single-precision number
@@ -412,8 +436,8 @@
}
} else {
// Normal number
- uDoubleBiasedExponent = nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS;
- uDoubleSignificand = (uint64_t)uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
+ uDoubleBiasedExponent = (uint64_t)(nHalfUnBiasedExponent + DOUBLE_EXPONENT_BIAS);
+ uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
}
uDoubleSign = uHalfSign;
@@ -433,7 +457,7 @@
// Pull the neeed two parts out of the single-precision float
const uint32_t uSingle = CopyFloatToUint32(f);
- const int32_t nSingleExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
+ const int32_t nSingleExponent = (int32_t)((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
// Bit mask that is the significand bits that would be lost when converting
@@ -469,7 +493,7 @@
// Pull the needed two parts out of the double-precision float
const uint64_t uDouble = CopyDoubleToUint64(d);
- const int64_t nDoubleExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
+ const int64_t nDoubleExponent = (int64_t)((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
// Masks to check whether dropped significand bits are zero or not