Blame - src/ieee754.c - mirror/QCBOR - TrustedFirmware Git Browser

blob: a74fa7874a7dcd3bd022648c3de24654dd083070 [file] [log] [blame]

Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	1	/*==============================================================================
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	2
Laurence Lundblade	d92a616	2018-11-01 11:38:35 +0700	[diff] [blame]	3	Copyright (c) 2018, Laurence Lundblade.
				4	All rights reserved.
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	5
Laurence Lundblade	0dbc917	2018-11-01 14:17:21 +0700	[diff] [blame]	6	Redistribution and use in source and binary forms, with or without
				7	modification, are permitted provided that the following conditions are
				8	met:
				9	* Redistributions of source code must retain the above copyright
				10	notice, this list of conditions and the following disclaimer.
				11	* Redistributions in binary form must reproduce the above
				12	copyright notice, this list of conditions and the following
				13	disclaimer in the documentation and/or other materials provided
				14	with the distribution.
				15	* The name "Laurence Lundblade" may not be used to
				16	endorse or promote products derived from this software without
				17	specific prior written permission.
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	18
Laurence Lundblade	0dbc917	2018-11-01 14:17:21 +0700	[diff] [blame]	19	THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
				20	WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
				21	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
				22	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
				23	BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				24	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				25	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
				26	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
				27	WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
				28	OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
				29	IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	30	==============================================================================*/
				31
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	32	//
				33	// ieee754.c
				34	// Indefinite
				35	//
				36	// Created by Laurence Lundblade on 7/23/18.
				37	// Copyright © 2018 Laurence Lundblade. All rights reserved.
				38	//
				39
				40	#include "ieee754.h"
				41	#include <string.h> // For memcpy()
				42
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	43
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	44	/*
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	45	This code is written for clarity and verifiability, not for size, on the assumption
				46	that the optimizer will do a good job. The LLVM optimizer, -Os, does seem to do the
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	47	job and the resulting object code is smaller from combining code for the many different
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	48	cases (normal, subnormal, infinity, zero...) for the conversions.
				49
Laurence Lundblade	570fab5	2018-10-13 18:28:27 +0800	[diff] [blame]	50	Dead stripping is also really helpful to get code size down when floating point
				51	encoding is not needed.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	52
Laurence Lundblade	570fab5	2018-10-13 18:28:27 +0800	[diff] [blame]	53	This code works solely using shifts and masks and thus has no dependency on
				54	any math libraries. It can even work if the CPU doesn't have any floating
				55	point support, though that isn't the most useful thing to do.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	56
				57	The memcpy() dependency is only for CopyFloatToUint32() and friends which only
				58	is needed to avoid type punning when converting the actual float bits to
				59	an unsigned value so the bit shifts and masks can work.
				60	*/
				61
				62	/*
				63	The references used to write this code:
				64
				65	- IEEE 754-2008, particularly section 3.6 and 6.2.1
				66
				67	- https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
				68
				69	- https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	70	*/
				71
				72
				73	// ----- Half Precsion -----------
				74	#define HALF_NUM_SIGNIFICAND_BITS (10)
				75	#define HALF_NUM_EXPONENT_BITS (5)
				76	#define HALF_NUM_SIGN_BITS (1)
				77
				78	#define HALF_SIGNIFICAND_SHIFT (0)
				79	#define HALF_EXPONENT_SHIFT (HALF_NUM_SIGNIFICAND_BITS)
				80	#define HALF_SIGN_SHIFT (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)
				81
				82	#define HALF_SIGNIFICAND_MASK (0x3ff) // The lower 10 bits // 0x03ff
				83	#define HALF_EXPONENT_MASK (0x1f << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
				84	#define HALF_SIGN_MASK (0x01 << HALF_SIGN_SHIFT) // // 0x80001 bit of sign
				85	#define HALF_QUIET_NAN_BIT (0x01 << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200
				86
				87	/* Biased Biased Unbiased Use
				88	0x00 0 -15 0 and subnormal
				89	0x01 1 -14 Smallest normal exponent
				90	0x1e 30 15 Largest normal exponent
				91	0x1F 31 16 NaN and Infinity */
				92	#define HALF_EXPONENT_BIAS (15)
				93	#define HALF_EXPONENT_MAX (HALF_EXPONENT_BIAS) // 15 Unbiased
				94	#define HALF_EXPONENT_MIN (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
				95	#define HALF_EXPONENT_ZERO (-HALF_EXPONENT_BIAS) // -15 Unbiased
				96	#define HALF_EXPONENT_INF_OR_NAN (HALF_EXPONENT_BIAS+1) // 16 Unbiased
				97
				98
				99	// ------ Single Precision --------
				100	#define SINGLE_NUM_SIGNIFICAND_BITS (23)
				101	#define SINGLE_NUM_EXPONENT_BITS (8)
				102	#define SINGLE_NUM_SIGN_BITS (1)
				103
				104	#define SINGLE_SIGNIFICAND_SHIFT (0)
				105	#define SINGLE_EXPONENT_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS)
				106	#define SINGLE_SIGN_SHIFT (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)
				107
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	108	#define SINGLE_SIGNIFICAND_MASK (0x7fffffUL) // The lower 23 bits
				109	#define SINGLE_EXPONENT_MASK (0xffUL << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
				110	#define SINGLE_SIGN_MASK (0x01UL << SINGLE_SIGN_SHIFT) // 1 bit of sign
				111	#define SINGLE_QUIET_NAN_BIT (0x01UL << (SINGLE_NUM_SIGNIFICAND_BITS-1))
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	112
				113	/* Biased Biased Unbiased Use
				114	0x0000 0 -127 0 and subnormal
				115	0x0001 1 -126 Smallest normal exponent
				116	0x7f 127 0 1
				117	0xfe 254 127 Largest normal exponent
				118	0xff 255 128 NaN and Infinity */
				119	#define SINGLE_EXPONENT_BIAS (127)
				120	#define SINGLE_EXPONENT_MAX (SINGLE_EXPONENT_BIAS) // 127 unbiased
				121	#define SINGLE_EXPONENT_MIN (-SINGLE_EXPONENT_BIAS+1) // -126 unbiased
				122	#define SINGLE_EXPONENT_ZERO (-SINGLE_EXPONENT_BIAS) // -127 unbiased
				123	#define SINGLE_EXPONENT_INF_OR_NAN (SINGLE_EXPONENT_BIAS+1) // 128 unbiased
				124
				125
				126	// --------- Double Precision ----------
				127	#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
				128	#define DOUBLE_NUM_EXPONENT_BITS (11)
				129	#define DOUBLE_NUM_SIGN_BITS (1)
				130
				131	#define DOUBLE_SIGNIFICAND_SHIFT (0)
				132	#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
				133	#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
				134
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	135	#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
				136	#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
				137	#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
				138	#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
				139
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	140
				141	/* Biased Biased Unbiased Use
				142	0x00000000 0 -1023 0 and subnormal
				143	0x00000001 1 -1022 Smallest normal exponent
				144	0x000007fe 2046 1023 Largest normal exponent
				145	0x000007ff 2047 1024 NaN and Infinity */
				146	#define DOUBLE_EXPONENT_BIAS (1023)
				147	#define DOUBLE_EXPONENT_MAX (DOUBLE_EXPONENT_BIAS) // unbiased
				148	#define DOUBLE_EXPONENT_MIN (-DOUBLE_EXPONENT_BIAS+1) // unbiased
				149	#define DOUBLE_EXPONENT_ZERO (-DOUBLE_EXPONENT_BIAS) // unbiased
				150	#define DOUBLE_EXPONENT_INF_OR_NAN (DOUBLE_EXPONENT_BIAS+1) // unbiased
				151
				152
				153
				154	/*
				155	Convenient functions to avoid type punning, compiler warnings and such
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	156	The optimizer reduces them to a simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	157	This is a crusty corner of C. It shouldn't be this hard.
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	158
				159	These are also in UsefulBuf.h under a different name. They are copied
Laurence Lundblade	3df8c7e	2018-11-02 13:12:41 +0700	[diff] [blame^]	160	here to avoid a dependency on UsefulBuf.h. There is no
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	161	object code size impact because these always optimze down to a
				162	simple assignment.
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	163	*/
				164	static inline uint32_t CopyFloatToUint32(float f)
				165	{
				166	uint32_t u32;
				167	memcpy(&u32, &f, sizeof(uint32_t));
				168	return u32;
				169	}
				170
				171	static inline uint64_t CopyDoubleToUint64(double d)
				172	{
				173	uint64_t u64;
				174	memcpy(&u64, &d, sizeof(uint64_t));
				175	return u64;
				176	}
				177
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	178	static inline float CopyUint32ToFloat(uint32_t u32)
				179	{
				180	float f;
				181	memcpy(&f, &u32, sizeof(uint32_t));
				182	return f;
				183	}
				184
				185
				186
				187	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	188	uint16_t IEEE754_FloatToHalf(float f)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	189	{
				190	// Pull the three parts out of the single-precision float
				191	const uint32_t uSingle = CopyFloatToUint32(f);
				192	const int32_t nSingleUnbiasedExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				193	const uint32_t uSingleSign = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;
				194	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
				195
				196
				197	// Now convert the three parts to half-precision.
				198	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				199	if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				200	// +/- Infinity and NaNs -- single biased exponent is 0xff
				201	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				202	if(!uSingleSignificand) {
				203	// Infinity
				204	uHalfSignificand = 0;
				205	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	206	// Copy the LBSs of the NaN payload that will fit from the single to the half
				207	uHalfSignificand = uSingleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				208	if(uSingleSignificand & SINGLE_QUIET_NAN_BIT) {
				209	// It's a qNaN; copy the qNaN bit
				210	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	211	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	212	// It's a sNaN; make sure the significand is not zero so it stays a NaN
				213	// This is needed because not all significand bits are copied from single
				214	if(!uHalfSignificand) {
				215	// Set the LSB. This is what wikipedia shows for sNAN.
				216	uHalfSignificand \|= 0x01;
				217	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	218	}
				219	}
				220	} else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
				221	// 0 or a subnormal number -- singled biased exponent is 0
				222	uHalfBiasedExponent = 0;
				223	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision
				224	} else if(nSingleUnbiasedExponent > HALF_EXPONENT_MAX) {
				225	// Exponent is too large to express in half-precision; round up to infinity
				226	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				227	uHalfSignificand = 0;
				228	} else if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN) {
				229	// Exponent is too small to express in half-precision normal; make it a half-precision subnormal
				230	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				231	// Difference between single normal exponent and the base exponent of a half subnormal
				232	const uint32_t nExpDiff = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);
				233	// Also have to shift the significand by the difference in number of bits between a single and a half significand
				234	const int32_t nSignificandBitsDiff = SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				235	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
				236	const uint32_t uSingleSignificandSubnormal = uSingleSignificand + (0x01L << SINGLE_NUM_SIGNIFICAND_BITS);
				237	uHalfSignificand = uSingleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				238	} else {
				239	// The normal case
				240	uHalfBiasedExponent = nSingleUnbiasedExponent + HALF_EXPONENT_BIAS;
				241	uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				242	}
				243	uHalfSign = uSingleSign;
				244
				245	// Put the 3 values in the right place for a half precision
				246	const uint16_t uHalfPrecision = uHalfSignificand \|
				247	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				248	(uHalfSign << HALF_SIGN_SHIFT);
				249	return uHalfPrecision;
				250	}
				251
				252
				253	// Public function; see ieee754.h
Laurence Lundblade	cc2ed34	2018-09-22 17:29:55 -0700	[diff] [blame]	254	uint16_t IEEE754_DoubleToHalf(double d)
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	255	{
				256	// Pull the three parts out of the double-precision float
				257	const uint64_t uDouble = CopyDoubleToUint64(d);
				258	const int64_t nDoubleUnbiasedExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				259	const uint64_t uDoubleSign = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
				260	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
				261
				262
				263	// Now convert the three parts to half-precision.
				264	uint16_t uHalfSign, uHalfSignificand, uHalfBiasedExponent;
				265	if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				266	// +/- Infinity and NaNs -- single biased exponent is 0xff
				267	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				268	if(!uDoubleSignificand) {
				269	// Infinity
				270	uHalfSignificand = 0;
				271	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	272	// Copy the LBSs of the NaN payload that will fit from the double to the half
				273	uHalfSignificand = uDoubleSignificand & (HALF_SIGNIFICAND_MASK & ~HALF_QUIET_NAN_BIT);
				274	if(uDoubleSignificand & DOUBLE_QUIET_NAN_BIT) {
				275	// It's a qNaN; copy the qNaN bit
				276	uHalfSignificand \|= HALF_QUIET_NAN_BIT;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	277	} else {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	278	// It's an sNaN; make sure the significand is not zero so it stays a NaN
				279	// This is needed because not all significand bits are copied from single
				280	if(!uHalfSignificand) {
				281	// Set the LSB. This is what wikipedia shows for sNAN.
				282	uHalfSignificand \|= 0x01;
				283	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	284	}
				285	}
				286	} else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
				287	// 0 or a subnormal number -- double biased exponent is 0
				288	uHalfBiasedExponent = 0;
				289	uHalfSignificand = 0; // Any subnormal single will be too small to express as a half precision; TODO, is this really true?
				290	} else if(nDoubleUnbiasedExponent > HALF_EXPONENT_MAX) {
				291	// Exponent is too large to express in half-precision; round up to infinity; TODO, is this really true?
				292	uHalfBiasedExponent = HALF_EXPONENT_INF_OR_NAN + HALF_EXPONENT_BIAS;
				293	uHalfSignificand = 0;
				294	} else if(nDoubleUnbiasedExponent < HALF_EXPONENT_MIN) {
				295	// Exponent is too small to express in half-precision; round down to zero
				296	uHalfBiasedExponent = (uint16_t)(HALF_EXPONENT_ZERO + HALF_EXPONENT_BIAS);
				297	// Difference between double normal exponent and the base exponent of a half subnormal
				298	const uint64_t nExpDiff = -(nDoubleUnbiasedExponent - HALF_EXPONENT_MIN);
				299	// Also have to shift the significand by the difference in number of bits between a double and a half significand
				300	const int64_t nSignificandBitsDiff = DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;
				301	// Add in the 1 that is implied in the significand of a normal number; it needs to be present in a subnormal
				302	const uint64_t uDoubleSignificandSubnormal = uDoubleSignificand + (0x01L << DOUBLE_NUM_SIGNIFICAND_BITS);
				303	uHalfSignificand = uDoubleSignificandSubnormal >> (nExpDiff + nSignificandBitsDiff);
				304	} else {
				305	// The normal case
				306	uHalfBiasedExponent = nDoubleUnbiasedExponent + HALF_EXPONENT_BIAS;
				307	uHalfSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				308	}
				309	uHalfSign = uDoubleSign;
				310
				311
				312	// Put the 3 values in the right place for a half precision
				313	const uint16_t uHalfPrecision = uHalfSignificand \|
				314	(uHalfBiasedExponent << HALF_EXPONENT_SHIFT) \|
				315	(uHalfSign << HALF_SIGN_SHIFT);
				316	return uHalfPrecision;
				317	}
				318
				319
				320	// Public function; see ieee754.h
				321	float IEEE754_HalfToFloat(uint16_t uHalfPrecision)
				322	{
				323	// Pull out the three parts of the half-precision float
				324	const uint16_t uHalfSignificand = uHalfPrecision & HALF_SIGNIFICAND_MASK;
				325	const int16_t nHalfUnBiasedExponent = ((uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT) - HALF_EXPONENT_BIAS;
				326	const uint16_t uHalfSign = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;
				327
				328
				329	// Make the three parts of the single-precision number
				330	uint32_t uSingleSignificand, uSingleSign, uSingleBiasedExponent;
				331	if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
				332	// 0 or subnormal
				333	if(uHalfSignificand) {
				334	// Subnormal case
				335	uSingleBiasedExponent = -HALF_EXPONENT_BIAS + SINGLE_EXPONENT_BIAS +1;
				336	// A half-precision subnormal can always be converted to a normal single-precision float because the ranges line up
				337	uSingleSignificand = uHalfSignificand;
				338	// Shift bits from right of the decimal to left, reducing the exponent by 1 each time
				339	do {
				340	uSingleSignificand <<= 1;
				341	uSingleBiasedExponent--;
				342	} while ((uSingleSignificand & 0x400) == 0);
				343	uSingleSignificand &= HALF_SIGNIFICAND_MASK;
				344	uSingleSignificand <<= (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				345	} else {
				346	// Just zero
				347	uSingleBiasedExponent = SINGLE_EXPONENT_ZERO + SINGLE_EXPONENT_BIAS;
				348	uSingleSignificand = 0;
				349	}
				350	} else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
				351	// NaN or Inifinity
				352	uSingleBiasedExponent = SINGLE_EXPONENT_INF_OR_NAN + SINGLE_EXPONENT_BIAS;
				353	if(uHalfSignificand) {
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	354	// NaN
				355	// First preserve the NaN payload from half to single
				356	uSingleSignificand = uHalfSignificand & ~HALF_QUIET_NAN_BIT;
				357	if(uHalfSignificand & HALF_QUIET_NAN_BIT) {
				358	// Next, set qNaN if needed since half qNaN bit is not copied above
				359	uSingleSignificand \|= SINGLE_QUIET_NAN_BIT;
				360	}
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	361	} else {
				362	// Infinity
				363	uSingleSignificand = 0;
				364	}
				365	} else {
				366	// Normal number
				367	uSingleBiasedExponent = nHalfUnBiasedExponent + SINGLE_EXPONENT_BIAS;
				368	uSingleSignificand = uHalfSignificand << (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
				369	}
				370	uSingleSign = uHalfSign;
				371
				372
				373	// Shift the three parts of the single precision into place
				374	const uint32_t uSinglePrecision = uSingleSignificand \|
				375	(uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) \|
				376	(uSingleSign << SINGLE_SIGN_SHIFT);
				377
				378	return CopyUint32ToFloat(uSinglePrecision);
				379	}
				380
				381
Laurence Lundblade	781fd82	2018-10-01 09:37:52 -0700	[diff] [blame]	382	/*
				383	double IEEE754_HalfToDouble(uint16_t uHalfPrecision) is not needed
				384	*/
				385
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	386
				387
				388	// Public function; see ieee754.h
				389	IEEE754_union IEEE754_FloatToSmallest(float f)
				390	{
				391	IEEE754_union result;
				392
				393	// Pull the neeed two parts out of the single-precision float
				394	const uint32_t uSingle = CopyFloatToUint32(f);
				395	const int32_t nSingleExponent = ((uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT) - SINGLE_EXPONENT_BIAS;
				396	const uint32_t uSingleSignificand = uSingle & SINGLE_SIGNIFICAND_MASK;
				397
				398	// Bit mask that is the significand bits that would be lost when converting
				399	// from single-precision to half-precision
				400	const uint64_t uDroppedSingleBits = SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				401
				402	// Optimizer will re organize so there is only one call to IEEE754_FloatToHalf()
				403	if(uSingle == 0) {
				404	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	405	result.uSize = IEEE754_UNION_IS_HALF;
				406	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	407	} else if(nSingleExponent == SINGLE_EXPONENT_INF_OR_NAN) {
				408	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	409	result.uSize = IEEE754_UNION_IS_HALF;
				410	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	411	} else if((nSingleExponent >= HALF_EXPONENT_MIN) && nSingleExponent <= HALF_EXPONENT_MAX && (!(uSingleSignificand & uDroppedSingleBits))) {
				412	// Normal number in exponent range and precision won't be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	413	result.uSize = IEEE754_UNION_IS_HALF;
				414	result.uValue = IEEE754_FloatToHalf(f);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	415	} else {
				416	// Subnormal, exponent out of range, or precision will be lost
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	417	result.uSize = IEEE754_UNION_IS_SINGLE;
				418	result.uValue = uSingle;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	419	}
				420
				421	return result;
				422	}
				423
Laurence Lundblade	8db3d3e	2018-09-29 11:46:37 -0700	[diff] [blame]	424	// Public function; see ieee754.h
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	425	IEEE754_union IEEE754_DoubleToSmallestInternal(double d, int bAllowHalfPrecision)
				426	{
				427	IEEE754_union result;
				428
				429	// Pull the needed two parts out of the double-precision float
				430	const uint64_t uDouble = CopyDoubleToUint64(d);
				431	const int64_t nDoubleExponent = ((uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT) - DOUBLE_EXPONENT_BIAS;
				432	const uint64_t uDoubleSignificand = uDouble & DOUBLE_SIGNIFICAND_MASK;
				433
				434	// Masks to check whether dropped significand bits are zero or not
				435	const uint64_t uDroppedDoubleBits = DOUBLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS;
				436	const uint64_t uDroppedSingleBits = DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS;
				437
				438	// The various cases
Laurence Lundblade	d711fb2	2018-09-26 14:35:22 -0700	[diff] [blame]	439	if(d == 0.0) { // Take care of positive and negative zero
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	440	// Value is 0.0000, not a a subnormal
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	441	result.uSize = IEEE754_UNION_IS_HALF;
				442	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	443	} else if(nDoubleExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
				444	// NaN, +/- infinity
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	445	result.uSize = IEEE754_UNION_IS_HALF;
				446	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	447	} else if(bAllowHalfPrecision && (nDoubleExponent >= HALF_EXPONENT_MIN) && nDoubleExponent <= HALF_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedDoubleBits))) {
				448	// Can convert to half without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	449	result.uSize = IEEE754_UNION_IS_HALF;
				450	result.uValue = IEEE754_DoubleToHalf(d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	451	} else if((nDoubleExponent >= SINGLE_EXPONENT_MIN) && nDoubleExponent <= SINGLE_EXPONENT_MAX && (!(uDoubleSignificand & uDroppedSingleBits))) {
				452	// Can convert to single without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	453	result.uSize = IEEE754_UNION_IS_SINGLE;
				454	result.uValue = CopyFloatToUint32((float)d);
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	455	} else {
				456	// Can't convert without precision loss
Laurence Lundblade	577d821	2018-11-01 14:04:08 +0700	[diff] [blame]	457	result.uSize = IEEE754_UNION_IS_DOUBLE;
				458	result.uValue = uDouble;
Laurence Lundblade	12d32c5	2018-09-19 11:25:27 -0700	[diff] [blame]	459	}
				460
				461	return result;
				462	}
				463