blob: 1bff43cf87214f61062861bf0b2c64b4afec5609 [file] [log] [blame]
Laurence Lundblade68a13352018-09-23 02:19:54 -07001/*==============================================================================
Laurence Lundblade2d85ce42018-10-12 14:12:47 +08002 float_tests.c -- tests for float and conversion to/from half-precision
Laurence Lundblade781fd822018-10-01 09:37:52 -07003
Laurence Lundblade68a13352018-09-23 02:19:54 -07004 Copyright 2018 Laurence Lundblade
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice shall be included
15 in all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 SOFTWARE.
25
26 (This is the MIT license)
27 ==============================================================================*/
Laurence Lundblade68a13352018-09-23 02:19:54 -070028// Created by Laurence Lundblade on 9/19/18.
Laurence Lundblade781fd822018-10-01 09:37:52 -070029
Laurence Lundblade68a13352018-09-23 02:19:54 -070030
Laurence Lundblade2d85ce42018-10-12 14:12:47 +080031#include "float_tests.h"
Laurence Lundblade68a13352018-09-23 02:19:54 -070032#include "qcbor.h"
Laurence Lundbladed711fb22018-09-26 14:35:22 -070033#include "half_to_double_from_rfc7049.h"
34#include <math.h> // For INFINITY and NAN and isnan()
Laurence Lundblade68a13352018-09-23 02:19:54 -070035
Laurence Lundblade2d85ce42018-10-12 14:12:47 +080036
37
38
39static uint8_t pExpectedEncodedFloat[] = {
40 0x98, 0x1e, 0xfa, 0x00, 0x00, 0x00, 0x00, 0xfa,
41 0x3f, 0x80, 0x00, 0x00, 0xfa, 0x3f, 0x8c, 0xcc,
42 0xcd, 0xfa, 0x3f, 0xc0, 0x00, 0x00, 0xfa, 0x47,
43 0x7f, 0xe0, 0x00, 0xfa, 0x47, 0xc3, 0x50, 0x00,
44 0xfa, 0x7f, 0x7f, 0xff, 0xff, 0xfa, 0x7f, 0x80,
45 0x00, 0x00, 0xfa, 0x33, 0x80, 0x00, 0x00, 0xfa,
46 0x38, 0x80, 0x00, 0x00, 0xfa, 0xc0, 0x80, 0x00,
47 0x00, 0xfa, 0xc0, 0x83, 0x33, 0x33, 0xfa, 0x7f,
48 0xc0, 0x00, 0x00, 0xfa, 0x7f, 0x80, 0x00, 0x00,
49 0xfa, 0xff, 0x80, 0x00, 0x00, 0xfb, 0x00, 0x00,
50 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, 0x3f,
51 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb,
52 0x3f, 0xf1, 0x99, 0x99, 0x99, 0x99, 0x99, 0x9a,
53 0xfb, 0x3f, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
54 0x00, 0xfb, 0x40, 0xef, 0xfc, 0x00, 0x00, 0x00,
55 0x00, 0x00, 0xfb, 0x40, 0xf8, 0x6a, 0x00, 0x00,
56 0x00, 0x00, 0x00, 0xfb, 0x47, 0xef, 0xff, 0xff,
57 0xe0, 0x00, 0x00, 0x00, 0xfb, 0x7e, 0x37, 0xe4,
58 0x3c, 0x88, 0x00, 0x75, 0x9c, 0xfb, 0x3e, 0x70,
59 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb, 0x3f,
60 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfb,
61 0xc0, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
62 0xfb, 0xc0, 0x10, 0x66, 0x66, 0x66, 0x66, 0x66,
63 0x66, 0xfb, 0x7f, 0xf8, 0x00, 0x00, 0x00, 0x00,
64 0x00, 0x00, 0xfb, 0x7f, 0xf0, 0x00, 0x00, 0x00,
65 0x00, 0x00, 0x00, 0xfb, 0xff, 0xf0, 0x00, 0x00,
66 0x00, 0x00, 0x00, 0x00};
67
68
69int FloatValuesTest1()
70{
71 QCBOREncodeContext ECtx;
72 int nReturn = 0;
73
74 UsefulBuf_MakeStackUB(EncodedStorage, 1000);
75
76 QCBOREncode_Init(&ECtx, EncodedStorage);
77 QCBOREncode_OpenArray(&ECtx);
78
79 // These are all samples published
80 // in RFC 7049.
81 QCBOREncode_AddFloat(&ECtx, 0.0);
82 QCBOREncode_AddFloat(&ECtx, 1.0);
83 QCBOREncode_AddFloat(&ECtx, 1.1); // appx
84 QCBOREncode_AddFloat(&ECtx, 1.5);
85 QCBOREncode_AddFloat(&ECtx, 65504.0);
86 QCBOREncode_AddFloat(&ECtx, 100000.0);
87 QCBOREncode_AddFloat(&ECtx, 3.4028234663852886e+38);
88 QCBOREncode_AddFloat(&ECtx, 1.0e+300); // Infinity?
89 QCBOREncode_AddFloat(&ECtx, 5.960464477539063e-8);
90 QCBOREncode_AddFloat(&ECtx, 0.00006103515625);
91 QCBOREncode_AddFloat(&ECtx, -4.0);
92 QCBOREncode_AddFloat(&ECtx, -4.1); // appx
93
94 QCBOREncode_AddFloat(&ECtx, NAN);
95 QCBOREncode_AddFloat(&ECtx, INFINITY);
96 QCBOREncode_AddFloat(&ECtx, -INFINITY);
97
98
99 QCBOREncode_AddDouble(&ECtx, 0.0);
100 QCBOREncode_AddDouble(&ECtx, 1.0);
101 QCBOREncode_AddDouble(&ECtx, 1.1); // appx
102 QCBOREncode_AddDouble(&ECtx, 1.5);
103 QCBOREncode_AddDouble(&ECtx, 65504.0);
104 QCBOREncode_AddDouble(&ECtx, 100000.0);
105 QCBOREncode_AddDouble(&ECtx, 3.4028234663852886e+38);
106 QCBOREncode_AddDouble(&ECtx, 1.0e+300); // Infinity?
107 QCBOREncode_AddDouble(&ECtx, 5.960464477539063e-8);
108 QCBOREncode_AddDouble(&ECtx, 0.00006103515625);
109 QCBOREncode_AddDouble(&ECtx, -4.0);
110 QCBOREncode_AddDouble(&ECtx, -4.1); // appx
111
112 QCBOREncode_AddDouble(&ECtx, NAN);
113 QCBOREncode_AddDouble(&ECtx, INFINITY);
114 QCBOREncode_AddDouble(&ECtx, -INFINITY);
115
116 QCBOREncode_CloseArray(&ECtx);
117
118 UsefulBufC Encoded;
119 if(QCBOREncode_Finish2(&ECtx, &Encoded)) {
120 nReturn = -1;
121 }
122
123 if(UsefulBuf_Compare(Encoded, UsefulBuf_FromByteArrayLiteral(pExpectedEncodedFloat))) {
124 nReturn = -2;
125 }
126
127 //printencoded(pEncoded, nEncodedLen);
128
129 return(nReturn);
130}
131
132
133
134
Laurence Lundblade68a13352018-09-23 02:19:54 -0700135static const uint8_t ExpectedHalf[] = {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700136 0xB1,
Laurence Lundblade68a13352018-09-23 02:19:54 -0700137 0x64,
138 0x7A, 0x65, 0x72, 0x6F,
139 0xF9, 0x00, 0x00, // 0.000
140 0x6A,
141 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
142 0xF9, 0x7C, 0x00, // Infinity
143 0x73,
144 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79,
145 0xF9, 0xFC, 0x00, // -Inifinity
146 0x63,
147 0x4E, 0x61, 0x4E,
148 0xF9, 0x7E, 0x00, // NaN
149 0x63,
150 0x6F, 0x6E, 0x65,
151 0xF9, 0x3C, 0x00, // 1.0
152 0x69,
153 0x6F, 0x6E, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64,
154 0xF9, 0x35, 0x55, // 0.333251953125
155 0x76,
156 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
157 0xF9, 0x7B, 0xFF, // 65504.0
158 0x78, 0x18, 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E,
159 0xF9, 0x7C, 0x00, // Infinity
160 0x72,
161 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
162 0xF9, 0x00, 0x01, // 0.000000059604
163 0x6F,
164 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
165 0xF9, 0x03, 0xFF, // 0.0000609755516
166 0x71,
167 0x62, 0x69, 0x67, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C,
168 0xF9, 0x04, 0x00, // 0.000061988
169 0x70,
170 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65,
171 0xF9, 0x00, 0x00,
172 0x03,
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700173 0xF9, 0xC0, 0x00, // -2
174 0x04,
175 0xF9, 0x7E, 0x00, // qNaN
176 0x05,
177 0xF9, 0x7C, 0x01, // sNaN
178 0x06,
179 0xF9, 0x7E, 0x0F, // qNaN with payload 0x0f
180 0x07,
181 0xF9, 0x7C, 0x0F, // sNaN with payload 0x0f
182
Laurence Lundblade68a13352018-09-23 02:19:54 -0700183};
184
185
186
187int half_precision_encode_basic()
188{
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700189 UsefulBuf_MakeStackUB(EncodedHalfsMem, 250);
Laurence Lundblade68a13352018-09-23 02:19:54 -0700190
191 QCBOREncodeContext EC;
192 QCBOREncode_Init(&EC, EncodedHalfsMem);
193 // These are mostly from https://en.wikipedia.org/wiki/Half-precision_floating-point_format
194 QCBOREncode_OpenMap(&EC);
195 QCBOREncode_AddFloatAsHalfToMap(&EC, "zero", 0.00F);
196 QCBOREncode_AddFloatAsHalfToMap(&EC, "infinitity", INFINITY);
197 QCBOREncode_AddFloatAsHalfToMap(&EC, "negative infinitity", -INFINITY);
198 QCBOREncode_AddFloatAsHalfToMap(&EC, "NaN", NAN);
199 QCBOREncode_AddFloatAsHalfToMap(&EC, "one", 1.0F);
200 QCBOREncode_AddFloatAsHalfToMap(&EC, "one third", 0.333251953125F);
201 QCBOREncode_AddFloatAsHalfToMap(&EC, "largest half-precision",65504.0F);
202 // Float 65536.0F is 0x47800000 in hex. It has an exponent of 16, which is larger than 15, the largest half-precision exponent
203 QCBOREncode_AddFloatAsHalfToMap(&EC, "too-large half-precision", 65536.0F);
204 // Should convert to smallest possible half precision which is encodded as 0x00 0x01 or 5.960464477539063e-8
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700205 QCBOREncode_AddFloatAsHalfToMap(&EC, "smallest subnormal", 0.0000000596046448F);
Laurence Lundblade68a13352018-09-23 02:19:54 -0700206 QCBOREncode_AddFloatAsHalfToMap(&EC, "smallest normal", 0.0000610351526F); // in hex single is 0x387fffff, exponent -15, significand 7fffff
207 QCBOREncode_AddFloatAsHalfToMap(&EC, "biggest subnormal", 0.0000610351563F); // in hex single is 0x38800000, exponent -14, significand 0
208 QCBOREncode_AddFloatAsHalfToMap(&EC, "subnormal single", 4e-40F);
209 QCBOREncode_AddFloatAsHalfToMapN(&EC, 3, -2.0F);
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700210 QCBOREncode_AddFloatAsHalfToMapN(&EC, 4, UsefulBufUtil_CopyUint32ToFloat(0x7fc00000L)); // qNaN
211 QCBOREncode_AddFloatAsHalfToMapN(&EC, 5, UsefulBufUtil_CopyUint32ToFloat(0x7f800001L)); // sNaN
212 QCBOREncode_AddFloatAsHalfToMapN(&EC, 6, UsefulBufUtil_CopyUint32ToFloat(0x7fc0f00fL)); // qNaN with payload
213 QCBOREncode_AddFloatAsHalfToMapN(&EC, 7, UsefulBufUtil_CopyUint32ToFloat(0x7f80f00fL)); // sNaN with payload
Laurence Lundblade68a13352018-09-23 02:19:54 -0700214 QCBOREncode_CloseMap(&EC);
215
Laurence Lundblade781fd822018-10-01 09:37:52 -0700216 UsefulBufC EncodedHalfs;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700217 int nReturn = QCBOREncode_Finish2(&EC, &EncodedHalfs);
Laurence Lundblade68a13352018-09-23 02:19:54 -0700218 if(nReturn) {
219 return -1;
220 }
221
Laurence Lundblade781fd822018-10-01 09:37:52 -0700222 if(UsefulBuf_Compare(EncodedHalfs, UsefulBuf_FromByteArrayLiteral(ExpectedHalf))) {
Laurence Lundblade68a13352018-09-23 02:19:54 -0700223 return -3;
224 }
225
226 return 0;
227}
228
229
230int half_precision_decode_basic()
231{
232 UsefulBufC HalfPrecision = UsefulBuf_FromByteArrayLiteral(ExpectedHalf);
233
234 QCBORDecodeContext DC;
235 QCBORDecode_Init(&DC, HalfPrecision, 0);
236
237 QCBORItem Item;
238
239 QCBORDecode_GetNext(&DC, &Item);
240 if(Item.uDataType != QCBOR_TYPE_MAP) {
241 return -1;
242 }
243
244 QCBORDecode_GetNext(&DC, &Item);
245 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != 0.0F) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700246 return -2;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700247 }
248
249 QCBORDecode_GetNext(&DC, &Item);
250 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != INFINITY) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700251 return -3;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700252 }
253
254 QCBORDecode_GetNext(&DC, &Item);
255 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != -INFINITY) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700256 return -4;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700257 }
258
259 QCBORDecode_GetNext(&DC, &Item); // TODO, is this really converting right? It is carrying payload, but this confuses things.
260 if(Item.uDataType != QCBOR_TYPE_FLOAT || !isnan(Item.val.fnum)) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700261 return -5;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700262 }
263
264 QCBORDecode_GetNext(&DC, &Item);
265 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != 1.0F) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700266 return -6;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700267 }
268
269 QCBORDecode_GetNext(&DC, &Item);
270 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != 0.333251953125F) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700271 return -7;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700272 }
273
274 QCBORDecode_GetNext(&DC, &Item);
275 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != 65504.0F) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700276 return -8;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700277 }
278
279 QCBORDecode_GetNext(&DC, &Item);
280 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != INFINITY) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700281 return -9;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700282 }
283
284 QCBORDecode_GetNext(&DC, &Item); // TODO: check this
285 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != 0.0000000596046448F) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700286 return -10;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700287 }
288
289 QCBORDecode_GetNext(&DC, &Item); // TODO: check this
290 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != 0.0000609755516F) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700291 return -11;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700292 }
293
294 QCBORDecode_GetNext(&DC, &Item); // TODO check this
295 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != 0.0000610351563F) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700296 return -12;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700297 }
298
299 QCBORDecode_GetNext(&DC, &Item);
300 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != 0) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700301 return -13;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700302 }
303
304 QCBORDecode_GetNext(&DC, &Item);
305 if(Item.uDataType != QCBOR_TYPE_FLOAT || Item.val.fnum != -2.0F) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700306 return -14;
307 }
308
309 QCBORDecode_GetNext(&DC, &Item);
310 if(Item.uDataType != QCBOR_TYPE_FLOAT || UsefulBufUtil_CopyFloatToUint32(Item.val.fnum) != 0x7fc00000L) {
311 return -15;
312 }
313 QCBORDecode_GetNext(&DC, &Item);
314 if(Item.uDataType != QCBOR_TYPE_FLOAT || UsefulBufUtil_CopyFloatToUint32(Item.val.fnum) != 0x7f800001) {
315 return -16;
316 }
317 QCBORDecode_GetNext(&DC, &Item);
318 if(Item.uDataType != QCBOR_TYPE_FLOAT || UsefulBufUtil_CopyFloatToUint32(Item.val.fnum) != 0x7fc0000f) {
319 return -17;
320 }
321 QCBORDecode_GetNext(&DC, &Item);
322 if(Item.uDataType != QCBOR_TYPE_FLOAT || UsefulBufUtil_CopyFloatToUint32(Item.val.fnum) != 0x7f80000f) {
323 return -18;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700324 }
325
326 if(QCBORDecode_Finish(&DC)) {
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700327 return -19;
Laurence Lundblade68a13352018-09-23 02:19:54 -0700328 }
329
330 return 0;
331}
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700332
333
334int half_precision_to_float_transitive_test()
335{
336 for(uint32_t uHalfP = 0; uHalfP < 0xffff; uHalfP += 1) {
337 // Contruct the CBOR for the half-precision float by hand
338 UsefulBuf_MakeStackUB(EncodedCBORMem, 3);
339 UsefulOutBuf UOB;
340 UsefulOutBuf_Init(&UOB, EncodedCBORMem);
341
342 const uint8_t uHalfPrecInitialByte = HALF_PREC_FLOAT + (CBOR_MAJOR_TYPE_SIMPLE << 5); // 0xf9
343 UsefulOutBuf_AppendByte(&UOB, uHalfPrecInitialByte); // The initial byte for a half-precision float
344 UsefulOutBuf_AppendUint16(&UOB, (uint16_t)uHalfP);
345
346
347 // Now parse the hand-constructed CBOR. This will invoke the conversion to a float
348 QCBORDecodeContext DC;
349 QCBORDecode_Init(&DC, UsefulOutBuf_OutUBuf(&UOB), 0);
350
351 QCBORItem Item;
352 QCBORDecode_GetNext(&DC, &Item);
353 if(Item.uDataType != QCBOR_TYPE_FLOAT) {
354 return -1;
355 }
356
357 //printf("%04x QCBOR:%15.15f \n", uHalfP,Item.val.fnum);
358
359
360 // Now generate CBOR with the half-precision value. This will invoke the conversion from float to half
361 UsefulBuf_MakeStackUB(OtherEncodedCBORMem, 5);
362 QCBOREncodeContext EC;
363 QCBOREncode_Init(&EC, OtherEncodedCBORMem);
364 QCBOREncode_AddFloatAsHalf(&EC, Item.val.fnum);
Laurence Lundblade781fd822018-10-01 09:37:52 -0700365 UsefulBufC EnCBOR;
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700366 QCBOREncode_Finish2(&EC, &EnCBOR); // todo check return code
367
368
369 // Finally parse the CBOR by hand to get at half-precision that was actually encoded.
370 UsefulInputBuf UIB;
Laurence Lundblade781fd822018-10-01 09:37:52 -0700371 UsefulInputBuf_Init(&UIB, EnCBOR);
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700372 if(UsefulInputBuf_GetByte(&UIB) != uHalfPrecInitialByte) {
373 return -2;
374 }
375 if(UsefulInputBuf_GetUint16(&UIB) != uHalfP) { // the moment of truth did we get back what we started with?
376 return -3;
377 }
378 }
379
380 return 0;
381}
382
383
384int half_precision_to_float_vs_rfc_test()
385{
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700386 for(uint32_t uHalfP = 0; uHalfP < 0xffff; uHalfP += 60) {
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700387 unsigned char x[2];
388 x[1] = uHalfP & 0xff;
389 x[0] = uHalfP >> 8;
390 double d = decode_half(x);
391
392 // Contruct the CBOR for the half-precision float by hand
393 UsefulBuf_MakeStackUB(__xx, 3);
394 UsefulOutBuf UOB;
395 UsefulOutBuf_Init(&UOB, __xx);
396
397 const uint8_t uHalfPrecInitialByte = HALF_PREC_FLOAT + (CBOR_MAJOR_TYPE_SIMPLE << 5); // 0xf9
398 UsefulOutBuf_AppendByte(&UOB, uHalfPrecInitialByte); // The initial byte for a half-precision float
399 UsefulOutBuf_AppendUint16(&UOB, (uint16_t)uHalfP);
400
401 // Now parse the hand-constructed CBOR. This will invoke the conversion to a float
402 QCBORDecodeContext DC;
403 QCBORDecode_Init(&DC, UsefulOutBuf_OutUBuf(&UOB), 0);
404
405 QCBORItem Item;
406
407 QCBORDecode_GetNext(&DC, &Item);
408 if(Item.uDataType != QCBOR_TYPE_FLOAT) {
409 return -1;
410 }
411
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700412 //printf("%04x QCBOR:%15.15f RFC: %15.15f (%8x)\n", uHalfP,Item.val.fnum, d , UsefulBufUtil_CopyFloatToUint32(d));
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700413
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700414 if(isnan(d)) {
415 // The RFC code uses the native instructions which may or may not
416 // handle sNaN, qNaN and NaN payloads correctly. This test just
417 // makes sure it is a NaN and doesn't worry about the type of NaN
418 if(!isnan(Item.val.fnum)) {
419 return -3;
420 }
421 } else {
422 if(Item.val.fnum != d) {
423 return -2;
424 }
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700425 }
426 }
427 return 0;
428}
429
430
431/*
432 {"zero": 0.0, "negative zero": -0.0, "infinitity": Infinity, "negative infinitity": -Infinity, "NaN": NaN, "one": 1.0, "one third": 0.333251953125, "largest half-precision": 65504.0, "largest half-precision point one": 65504.1, "too-large half-precision": 65536.0, "smallest subnormal": 5.96046448e-8, "smallest normal": 0.00006103515261202119, "biggest subnormal": 0.00006103515625, "subnormal single": 4.00000646641519e-40, 3: -2.0, "large single exp": 2.5521177519070385e+38, "too-large single exp": 5.104235503814077e+38, "biggest single with prec": 16777216.0, "first single with prec loss": 16777217.0, 1: "fin"}
433
434 */
435static const uint8_t sExpectedSmallest[] = {
436 0xB4, 0x64, 0x7A, 0x65, 0x72, 0x6F, 0xF9, 0x00, 0x00, 0x6D, 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x7A, 0x65, 0x72, 0x6F, 0xF9, 0x80, 0x00, 0x6A, 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79, 0xF9, 0x7C, 0x00, 0x73, 0x6E, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x20, 0x69, 0x6E, 0x66, 0x69, 0x6E, 0x69, 0x74, 0x69, 0x74, 0x79, 0xF9, 0xFC, 0x00, 0x63, 0x4E, 0x61, 0x4E, 0xF9, 0x7E, 0x00, 0x63, 0x6F, 0x6E, 0x65, 0xF9, 0x3C, 0x00, 0x69, 0x6F, 0x6E, 0x65, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0xF9, 0x35, 0x55, 0x76, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E, 0xF9, 0x7B, 0xFF, 0x78, 0x20, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E, 0x20, 0x70, 0x6F, 0x69, 0x6E, 0x74, 0x20, 0x6F, 0x6E, 0x65, 0xFB, 0x40, 0xEF, 0xFC, 0x03, 0x33, 0x33, 0x33, 0x33, 0x78, 0x18, 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x20, 0x68, 0x61, 0x6C, 0x66, 0x2D, 0x70, 0x72, 0x65, 0x63, 0x69, 0x73, 0x69, 0x6F, 0x6E, 0xFA, 0x47, 0x80, 0x00, 0x00, 0x72, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0xFB, 0x3E, 0x70, 0x00, 0x00, 0x00, 0x1C, 0x5F, 0x68, 0x6F, 0x73, 0x6D, 0x61, 0x6C, 0x6C, 0x65, 0x73, 0x74, 0x20, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0xFA, 0x38, 0x7F, 0xFF, 0xFF, 0x71, 0x62, 0x69, 0x67, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0xF9, 0x04, 0x00, 0x70, 0x73, 0x75, 0x62, 0x6E, 0x6F, 0x72, 0x6D, 0x61, 0x6C, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0xFB, 0x37, 0xC1, 0x6C, 0x28, 0x00, 0x00, 0x00, 0x00, 0x03, 0xF9, 0xC0, 0x00, 0x70, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x65, 0x78, 0x70, 0xFA, 0x7F, 0x40, 0x00, 0x00, 0x74, 0x74, 0x6F, 0x6F, 0x2D, 0x6C, 0x61, 0x72, 0x67, 0x65, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x65, 0x78, 0x70, 0xFB, 0x47, 0xF8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x18, 0x62, 0x69, 0x67, 0x67, 0x65, 0x73, 0x74, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x70, 0x72, 0x65, 0x63, 0xFA, 0x4B, 0x80, 0x00, 0x00, 0x78, 0x1B, 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x73, 0x69, 0x6E, 0x67, 0x6C, 0x65, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x70, 0x72, 0x65, 0x63, 0x20, 0x6C, 0x6F, 0x73, 0x73, 0xFB, 0x41, 0x70, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x63, 0x66, 0x69, 0x6E
437};
438
439
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700440
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700441int double_as_smallest_encode_basic()
442{
443 UsefulBuf_MakeStackUB(EncodedHalfsMem, 420);
444
445 QCBOREncodeContext EC;
446 QCBOREncode_Init(&EC, EncodedHalfsMem);
447 // These are mostly from https://en.wikipedia.org/wiki/Half-precision_floating-point_format
448 QCBOREncode_OpenMap(&EC);
449 // 64 # text(4)
450 // 7A65726F # "zero"
451 // F9 0000 # primitive(0)
452 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "zero", 0.00);
453
454 // 64 # text(4)
455 // 7A65726F # "negative zero"
456 // F9 8000 # primitive(0)
457 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "negative zero", -0.00);
458
459 // 6A # text(10)
460 // 696E66696E6974697479 # "infinitity"
461 // F9 7C00 # primitive(31744)
462 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "infinitity", INFINITY);
463
464 // 73 # text(19)
465 // 6E6567617469766520696E66696E6974697479 # "negative infinitity"
466 // F9 FC00 # primitive(64512)
467 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "negative infinitity", -INFINITY);
468
469 // 63 # text(3)
470 // 4E614E # "NaN"
471 // F9 7E00 # primitive(32256)
472 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "NaN", NAN);
473
474 // TODO: test a few NaN variants
475
476 // 63 # text(3)
477 // 6F6E65 # "one"
478 // F9 3C00 # primitive(15360)
479 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "one", 1.0);
480
481 // 69 # text(9)
482 // 6F6E65207468697264 # "one third"
483 // F9 3555 # primitive(13653)
484 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "one third", 0.333251953125);
485
486 // 76 # text(22)
487 // 6C6172676573742068616C662D707265636973696F6E # "largest half-precision"
488 // F9 7BFF # primitive(31743)
489 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "largest half-precision",65504.0);
490
491 // 76 # text(22)
492 // 6C6172676573742068616C662D707265636973696F6E # "largest half-precision"
493 // F9 7BFF # primitive(31743)
494 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "largest half-precision point one",65504.1);
495
496 // Float 65536.0F is 0x47800000 in hex. It has an exponent of 16, which is larger than 15, the largest half-precision exponent
497 // 78 18 # text(24)
498 // 746F6F2D6C617267652068616C662D707265636973696F6E # "too-large half-precision"
499 // FA 47800000 # primitive(31743)
500 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "too-large half-precision", 65536.0);
501
502 // The smallest possible half-precision subnormal, but digitis are lost converting
503 // to half, so this turns into a double
504 // 72 # text(18)
505 // 736D616C6C657374207375626E6F726D616C # "smallest subnormal"
506 // FB 3E700000001C5F68 # primitive(4499096027744984936)
507 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "smallest subnormal", 0.0000000596046448);
508
509 // The smallest possible half-precision snormal, but digitis are lost converting
510 // to half, so this turns into a single TODO: confirm this is right
511 // 6F # text(15)
512 // 736D616C6C657374206E6F726D616C # "smallest normal"
513 // FA 387FFFFF # primitive(947912703)
514 // in hex single is 0x387fffff, exponent -15, significand 7fffff
515 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "smallest normal", 0.0000610351526F);
516
517 // 71 # text(17)
518 // 62696767657374207375626E6F726D616C # "biggest subnormal"
519 // F9 0400 # primitive(1024)
520 // in hex single is 0x38800000, exponent -14, significand 0
521 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "biggest subnormal", 0.0000610351563F);
522
523 // 70 # text(16)
524 // 7375626E6F726D616C2073696E676C65 # "subnormal single"
525 // FB 37C16C2800000000 # primitive(4017611261645684736)
526 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "subnormal single", 4e-40F);
527
528 // 03 # unsigned(3)
529 // F9 C000 # primitive(49152)
530 QCBOREncode_AddDoubleAsSmallestToMapN(&EC, 3, -2.0);
531
532 // 70 # text(16)
533 // 6C617267652073696E676C6520657870 # "large single exp"
534 // FA 7F400000 # primitive(2134900736)
535 // (0x01LL << (DOUBLE_NUM_SIGNIFICAND_BITS-1)) | ((127LL + DOUBLE_EXPONENT_BIAS) << DOUBLE_EXPONENT_SHIFT);
536 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "large single exp", 2.5521177519070385E+38); // Exponent fits single
537
538 // 74 # text(20)
539 // 746F6F2D6C617267652073696E676C6520657870 # "too-large single exp"
540 // FB 47F8000000000000 # primitive(5185894970917126144)
541 // (0x01LL << (DOUBLE_NUM_SIGNIFICAND_BITS-1)) | ((128LL + DOUBLE_EXPONENT_BIAS) << DOUBLE_EXPONENT_SHIFT);
542 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "too-large single exp", 5.104235503814077E+38); // Exponent too large for single
543
544 // 66 # text(6)
545 // 646664666465 # "dfdfde"
546 // FA 4B800000 # primitive(1266679808)
547 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "biggest single with prec",16777216); // Single with no precision loss
548
549 // 78 18 # text(24)
550 // 626967676573742073696E676C6520776974682070726563 # "biggest single with prec"
551 // FA 4B800000 # primitive(1266679808)
552 QCBOREncode_AddDoubleAsSmallestToMap(&EC, "first single with prec loss",16777217); // Double becuase of precision loss
553
554 // Just a convenient marker when cutting and pasting encoded CBOR
555 QCBOREncode_AddSZStringToMapN(&EC, 1, "fin");
556
557 QCBOREncode_CloseMap(&EC);
558
Laurence Lundblade781fd822018-10-01 09:37:52 -0700559 UsefulBufC EncodedHalfs;
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700560 int nReturn = QCBOREncode_Finish2(&EC, &EncodedHalfs);
561 if(nReturn) {
562 return -1;
563 }
564
Laurence Lundblade781fd822018-10-01 09:37:52 -0700565 if(UsefulBuf_Compare(EncodedHalfs, UsefulBuf_FromByteArrayLiteral(sExpectedSmallest))) {
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700566 return -3;
567 }
568
569 return 0;
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700570};
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700571
572
573
Laurence Lundblade7d40d812018-09-30 02:44:01 -0700574#ifdef NAN_EXPERIMENT
575/*
576 Code for checking what the double to float cast does with
577 NaNs. Not run as part of tests. Keep it around to
578 be able to check various platforms and CPUs.
579 */
580
581#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
582#define DOUBLE_NUM_EXPONENT_BITS (11)
583#define DOUBLE_NUM_SIGN_BITS (1)
584
585#define DOUBLE_SIGNIFICAND_SHIFT (0)
586#define DOUBLE_EXPONENT_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS)
587#define DOUBLE_SIGN_SHIFT (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)
588
589#define DOUBLE_SIGNIFICAND_MASK (0xfffffffffffffULL) // The lower 52 bits
590#define DOUBLE_EXPONENT_MASK (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
591#define DOUBLE_SIGN_MASK (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
592#define DOUBLE_QUIET_NAN_BIT (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))
593
594
595static int NaNExperiments() {
596 double dqNaN = UsefulBufUtil_CopyUint64ToDouble(DOUBLE_EXPONENT_MASK | DOUBLE_QUIET_NAN_BIT);
597 double dsNaN = UsefulBufUtil_CopyUint64ToDouble(DOUBLE_EXPONENT_MASK | 0x01);
598 double dqNaNPayload = UsefulBufUtil_CopyUint64ToDouble(DOUBLE_EXPONENT_MASK | DOUBLE_QUIET_NAN_BIT | 0xf00f);
599
600 float f1 = (float)dqNaN;
601 float f2 = (float)dsNaN;
602 float f3 = (float)dqNaNPayload;
603
604
605 uint32_t uqNaN = UsefulBufUtil_CopyFloatToUint32((float)dqNaN);
606 uint32_t usNaN = UsefulBufUtil_CopyFloatToUint32((float)dsNaN);
607 uint32_t uqNaNPayload = UsefulBufUtil_CopyFloatToUint32((float)dqNaNPayload);
608
609 // Result of this on x86 is that every NaN is a qNaN. The intel
610 // CVTSD2SS instruction ignores the NaN payload and even converts
611 // a sNaN to a qNaN.
612
613 return 0;
614}
615#endif
616
617
Laurence Lundbladed711fb22018-09-26 14:35:22 -0700618