xref: /third_party/glslang/gtests/HexFloat.cpp (revision 617a3bab)
1// Copyright (c) 2015-2016 The Khronos Group Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <cfloat>
16#include <cmath>
17#include <cstdio>
18#include <sstream>
19#include <string>
20#include <tuple>
21
22#include <gmock/gmock.h>
23#include "SPIRV/hex_float.h"
24
25namespace {
26using ::testing::Eq;
27using spvutils::BitwiseCast;
28using spvutils::Float16;
29using spvutils::FloatProxy;
30using spvutils::HexFloat;
31using spvutils::ParseNormalFloat;
32
33// In this file "encode" means converting a number into a string,
34// and "decode" means converting a string into a number.
35
36using HexFloatTest =
37    ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
38using DecodeHexFloatTest =
39    ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
40using HexDoubleTest =
41    ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
42using DecodeHexDoubleTest =
43    ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
44
45// Hex-encodes a float value.
46template <typename T>
47std::string EncodeViaHexFloat(const T& value) {
48  std::stringstream ss;
49  ss << spvutils::HexFloat<T>(value);
50  return ss.str();
51}
52
53// The following two tests can't be DRY because they take different parameter
54// types.
55
56TEST_P(HexFloatTest, EncodeCorrectly) {
57  EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
58}
59
60TEST_P(HexDoubleTest, EncodeCorrectly) {
61  EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
62}
63
64// Decodes a hex-float string.
65template <typename T>
66FloatProxy<T> Decode(const std::string& str) {
67  spvutils::HexFloat<FloatProxy<T>> decoded(0.f);
68  EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
69  return decoded.value();
70}
71
72TEST_P(HexFloatTest, DecodeCorrectly) {
73  EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
74}
75
76TEST_P(HexDoubleTest, DecodeCorrectly) {
77  EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
78}
79
80INSTANTIATE_TEST_SUITE_P(
81    Float32Tests, HexFloatTest,
82    ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
83        {0.f, "0x0p+0"},
84        {1.f, "0x1p+0"},
85        {2.f, "0x1p+1"},
86        {3.f, "0x1.8p+1"},
87        {0.5f, "0x1p-1"},
88        {0.25f, "0x1p-2"},
89        {0.75f, "0x1.8p-1"},
90        {-0.f, "-0x0p+0"},
91        {-1.f, "-0x1p+0"},
92        {-0.5f, "-0x1p-1"},
93        {-0.25f, "-0x1p-2"},
94        {-0.75f, "-0x1.8p-1"},
95
96        // Larger numbers
97        {512.f, "0x1p+9"},
98        {-512.f, "-0x1p+9"},
99        {1024.f, "0x1p+10"},
100        {-1024.f, "-0x1p+10"},
101        {1024.f + 8.f, "0x1.02p+10"},
102        {-1024.f - 8.f, "-0x1.02p+10"},
103
104        // Small numbers
105        {1.0f / 512.f, "0x1p-9"},
106        {1.0f / -512.f, "-0x1p-9"},
107        {1.0f / 1024.f, "0x1p-10"},
108        {1.0f / -1024.f, "-0x1p-10"},
109        {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
110        {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
111
112        // lowest non-denorm
113        {float(ldexp(1.0f, -126)), "0x1p-126"},
114        {float(ldexp(-1.0f, -126)), "-0x1p-126"},
115
116        // Denormalized values
117        {float(ldexp(1.0f, -127)), "0x1p-127"},
118        {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
119        {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
120        {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
121        {float(ldexp(-1.0f, -127)), "-0x1p-127"},
122        {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
123        {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
124        {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
125
126        {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
127        {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
128         "0x1.8p-128"},
129
130    })));
131
132INSTANTIATE_TEST_SUITE_P(
133    Float32NanTests, HexFloatTest,
134    ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
135        // Various NAN and INF cases
136        {uint32_t(0xFF800000), "-0x1p+128"},         // -inf
137        {uint32_t(0x7F800000), "0x1p+128"},          // inf
138        {uint32_t(0xFFC00000), "-0x1.8p+128"},       // -nan
139        {uint32_t(0xFF800100), "-0x1.0002p+128"},    // -nan
140        {uint32_t(0xFF800c00), "-0x1.0018p+128"},    // -nan
141        {uint32_t(0xFF80F000), "-0x1.01ep+128"},     // -nan
142        {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"},  // -nan
143        {uint32_t(0x7FC00000), "0x1.8p+128"},        // +nan
144        {uint32_t(0x7F800100), "0x1.0002p+128"},     // +nan
145        {uint32_t(0x7f800c00), "0x1.0018p+128"},     // +nan
146        {uint32_t(0x7F80F000), "0x1.01ep+128"},      // +nan
147        {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"},   // +nan
148    })));
149
150INSTANTIATE_TEST_SUITE_P(
151    Float64Tests, HexDoubleTest,
152    ::testing::ValuesIn(
153        std::vector<std::pair<FloatProxy<double>, std::string>>({
154            {0., "0x0p+0"},
155            {1., "0x1p+0"},
156            {2., "0x1p+1"},
157            {3., "0x1.8p+1"},
158            {0.5, "0x1p-1"},
159            {0.25, "0x1p-2"},
160            {0.75, "0x1.8p-1"},
161            {-0., "-0x0p+0"},
162            {-1., "-0x1p+0"},
163            {-0.5, "-0x1p-1"},
164            {-0.25, "-0x1p-2"},
165            {-0.75, "-0x1.8p-1"},
166
167            // Larger numbers
168            {512., "0x1p+9"},
169            {-512., "-0x1p+9"},
170            {1024., "0x1p+10"},
171            {-1024., "-0x1p+10"},
172            {1024. + 8., "0x1.02p+10"},
173            {-1024. - 8., "-0x1.02p+10"},
174
175            // Large outside the range of normal floats
176            {ldexp(1.0, 128), "0x1p+128"},
177            {ldexp(1.0, 129), "0x1p+129"},
178            {ldexp(-1.0, 128), "-0x1p+128"},
179            {ldexp(-1.0, 129), "-0x1p+129"},
180            {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
181            {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
182            {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
183            {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
184
185            // Small numbers
186            {1.0 / 512., "0x1p-9"},
187            {1.0 / -512., "-0x1p-9"},
188            {1.0 / 1024., "0x1p-10"},
189            {1.0 / -1024., "-0x1p-10"},
190            {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
191            {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
192
193            // Small outside the range of normal floats
194            {ldexp(1.0, -128), "0x1p-128"},
195            {ldexp(1.0, -129), "0x1p-129"},
196            {ldexp(-1.0, -128), "-0x1p-128"},
197            {ldexp(-1.0, -129), "-0x1p-129"},
198            {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
199            {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
200            {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
201            {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
202
203            // lowest non-denorm
204            {ldexp(1.0, -1022), "0x1p-1022"},
205            {ldexp(-1.0, -1022), "-0x1p-1022"},
206
207            // Denormalized values
208            {ldexp(1.0, -1023), "0x1p-1023"},
209            {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
210            {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
211            {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
212            {ldexp(-1.0, -1024), "-0x1p-1024"},
213            {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
214            {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
215            {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
216
217            {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
218            {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
219             "0x1.8p-1024"},
220
221        })));
222
223INSTANTIATE_TEST_SUITE_P(
224    Float64NanTests, HexDoubleTest,
225    ::testing::ValuesIn(std::vector<
226                        std::pair<FloatProxy<double>, std::string>>({
227        // Various NAN and INF cases
228        {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"},                //-inf
229        {uint64_t(0x7FF0000000000000LL), "0x1p+1024"},                 //+inf
230        {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"},              // -nan
231        {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},             // -nan
232        {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"},  // -nan
233        {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"},          // -nan
234        {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"},  // -nan
235        {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},               // +nan
236        {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"},              // +nan
237        {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"},   // -nan
238        {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"},           // -nan
239        {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"},   // -nan
240    })));
241
242TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
243  std::stringstream s;
244  s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
245    << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
246  EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
247}
248
249TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
250  std::stringstream s;
251  s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
252    << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
253    << 9;
254  EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
255}
256
257TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
258  EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
259}
260
261TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
262  EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
263}
264
265INSTANTIATE_TEST_SUITE_P(
266    Float32DecodeTests, DecodeHexFloatTest,
267    ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
268        {"0x0p+000", 0.f},
269        {"0x0p0", 0.f},
270        {"0x0p-0", 0.f},
271
272        // flush to zero cases
273        {"0x1p-500", 0.f},  // Exponent underflows.
274        {"-0x1p-500", -0.f},
275        {"0x0.00000000001p-126", 0.f},  // Fraction causes underflow.
276        {"-0x0.0000000001p-127", -0.f},
277        {"-0x0.01p-142", -0.f},  // Fraction causes additional underflow.
278        {"0x0.01p-142", 0.f},
279
280        // Some floats that do not encode the same way as they decode.
281        {"0x2p+0", 2.f},
282        {"0xFFp+0", 255.f},
283        {"0x0.8p+0", 0.5f},
284        {"0x0.4p+0", 0.25f},
285    })));
286
287INSTANTIATE_TEST_SUITE_P(
288    Float32DecodeInfTests, DecodeHexFloatTest,
289    ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
290        // inf cases
291        {"-0x1p+128", uint32_t(0xFF800000)},   // -inf
292        {"0x32p+127", uint32_t(0x7F800000)},   // inf
293        {"0x32p+500", uint32_t(0x7F800000)},   // inf
294        {"-0x32p+127", uint32_t(0xFF800000)},  // -inf
295    })));
296
297INSTANTIATE_TEST_SUITE_P(
298    Float64DecodeTests, DecodeHexDoubleTest,
299    ::testing::ValuesIn(
300        std::vector<std::pair<std::string, FloatProxy<double>>>({
301            {"0x0p+000", 0.},
302            {"0x0p0", 0.},
303            {"0x0p-0", 0.},
304
305            // flush to zero cases
306            {"0x1p-5000", 0.},  // Exponent underflows.
307            {"-0x1p-5000", -0.},
308            {"0x0.0000000000000001p-1023", 0.},  // Fraction causes underflow.
309            {"-0x0.000000000000001p-1024", -0.},
310            {"-0x0.01p-1090", -0.f},  // Fraction causes additional underflow.
311            {"0x0.01p-1090", 0.},
312
313            // Some floats that do not encode the same way as they decode.
314            {"0x2p+0", 2.},
315            {"0xFFp+0", 255.},
316            {"0x0.8p+0", 0.5},
317            {"0x0.4p+0", 0.25},
318        })));
319
320INSTANTIATE_TEST_SUITE_P(
321    Float64DecodeInfTests, DecodeHexDoubleTest,
322    ::testing::ValuesIn(
323        std::vector<std::pair<std::string, FloatProxy<double>>>({
324            // inf cases
325            {"-0x1p+1024", uint64_t(0xFFF0000000000000)},   // -inf
326            {"0x32p+1023", uint64_t(0x7FF0000000000000)},   // inf
327            {"0x32p+5000", uint64_t(0x7FF0000000000000)},   // inf
328            {"-0x32p+1023", uint64_t(0xFFF0000000000000)},  // -inf
329        })));
330
331TEST(FloatProxy, ValidConversion) {
332  EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
333  EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
334  EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
335  EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
336  EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
337  EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
338
339  EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
340  EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
341  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
342  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
343  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
344  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
345  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
346  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
347  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
348  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
349  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
350  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
351
352  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
353  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
354  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
355  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
356  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
357  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
358  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
359  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
360  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
361  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
362  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
363  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
364}
365
366TEST(FloatProxy, Nan) {
367  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
368  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
369  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
370  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
371  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
372  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
373  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
374  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
375  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
376  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
377}
378
379TEST(FloatProxy, Negation) {
380  EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
381  EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
382
383  EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
384  EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
385
386  EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
387  EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
388
389  EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
390  EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
391
392  EXPECT_THAT(
393      (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
394      Eq(-std::numeric_limits<float>::infinity()));
395  EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
396                  .getAsFloat(),
397              Eq(std::numeric_limits<float>::infinity()));
398}
399
400// Test conversion of FloatProxy values to strings.
401//
402// In previous cases, we always wrapped the FloatProxy value in a HexFloat
403// before conversion to a string.  In the following cases, the FloatProxy
404// decides for itself whether to print as a regular number or as a hex float.
405
406using FloatProxyFloatTest =
407    ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
408using FloatProxyDoubleTest =
409    ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
410
411// Converts a float value to a string via a FloatProxy.
412template <typename T>
413std::string EncodeViaFloatProxy(const T& value) {
414  std::stringstream ss;
415  ss << value;
416  return ss.str();
417}
418
419// Converts a floating point string so that the exponent prefix
420// is 'e', and the exponent value does not have leading zeros.
421// The Microsoft runtime library likes to write things like "2.5E+010".
422// Convert that to "2.5e+10".
423// We don't care what happens to strings that are not floating point
424// strings.
425std::string NormalizeExponentInFloatString(std::string in) {
426  std::string result;
427  // Reserve one spot for the terminating null, even when the sscanf fails.
428  std::vector<char> prefix(in.size() + 1);
429  char e;
430  char plus_or_minus;
431  int exponent;  // in base 10
432  if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
433                        &plus_or_minus, &exponent)) &&
434      (e == 'e' || e == 'E') &&
435      (plus_or_minus == '-' || plus_or_minus == '+')) {
436    // It looks like a floating point value with exponent.
437    std::stringstream out;
438    out << prefix.data() << 'e' << plus_or_minus << exponent;
439    result = out.str();
440  } else {
441    result = in;
442  }
443  return result;
444}
445
446TEST(NormalizeFloat, Sample) {
447  EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
448  EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
449  EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
450  EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
451  EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
452}
453
454// The following two tests can't be DRY because they take different parameter
455// types.
456TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
457  EXPECT_THAT(
458      NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
459      Eq(GetParam().second));
460}
461
462TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
463  EXPECT_THAT(
464      NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
465      Eq(GetParam().second));
466}
467
468INSTANTIATE_TEST_SUITE_P(
469    Float32Tests, FloatProxyFloatTest,
470    ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
471        // Zero
472        {0.f, "0"},
473        // Normal numbers
474        {1.f, "1"},
475        {-0.25f, "-0.25"},
476        {1000.0f, "1000"},
477
478        // Still normal numbers, but with large magnitude exponents.
479        {float(ldexp(1.f, 126)), "8.50706e+37"},
480        {float(ldexp(-1.f, -126)), "-1.17549e-38"},
481
482        // denormalized values are printed as hex floats.
483        {float(ldexp(1.0f, -127)), "0x1p-127"},
484        {float(ldexp(1.5f, -128)), "0x1.8p-128"},
485        {float(ldexp(1.25, -129)), "0x1.4p-129"},
486        {float(ldexp(1.125, -130)), "0x1.2p-130"},
487        {float(ldexp(-1.0f, -127)), "-0x1p-127"},
488        {float(ldexp(-1.0f, -128)), "-0x1p-128"},
489        {float(ldexp(-1.0f, -129)), "-0x1p-129"},
490        {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
491
492        // NaNs
493        {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
494        {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
495
496        {std::numeric_limits<float>::infinity(), "0x1p+128"},
497        {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
498    })));
499
500INSTANTIATE_TEST_SUITE_P(
501    Float64Tests, FloatProxyDoubleTest,
502    ::testing::ValuesIn(
503        std::vector<std::pair<FloatProxy<double>, std::string>>({
504            {0., "0"},
505            {1., "1"},
506            {-0.25, "-0.25"},
507            {1000.0, "1000"},
508
509            // Large outside the range of normal floats
510            {ldexp(1.0, 128), "3.40282366920938e+38"},
511            {ldexp(1.5, 129), "1.02084710076282e+39"},
512            {ldexp(-1.0, 128), "-3.40282366920938e+38"},
513            {ldexp(-1.5, 129), "-1.02084710076282e+39"},
514
515            // Small outside the range of normal floats
516            {ldexp(1.5, -129), "2.20405190779179e-39"},
517            {ldexp(-1.5, -129), "-2.20405190779179e-39"},
518
519            // lowest non-denorm
520            {ldexp(1.0, -1022), "2.2250738585072e-308"},
521            {ldexp(-1.0, -1022), "-2.2250738585072e-308"},
522
523            // Denormalized values
524            {ldexp(1.125, -1023), "0x1.2p-1023"},
525            {ldexp(-1.375, -1024), "-0x1.6p-1024"},
526
527            // NaNs
528            {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
529            {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
530
531            // Infinity
532            {std::numeric_limits<double>::infinity(), "0x1p+1024"},
533            {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
534
535        })));
536
537// double is used so that unbiased_exponent can be used with the output
538// of ldexp directly.
539int32_t unbiased_exponent(double f) {
540  return spvutils::HexFloat<spvutils::FloatProxy<float>>(
541      static_cast<float>(f)).getUnbiasedNormalizedExponent();
542}
543
544int16_t unbiased_half_exponent(uint16_t f) {
545  return spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>(f)
546      .getUnbiasedNormalizedExponent();
547}
548
549TEST(HexFloatOperationTest, UnbiasedExponent) {
550  // Float cases
551  EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
552  EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
553  EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
554  EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
555  // Saturates to 128
556  EXPECT_EQ(128, unbiased_exponent(ldexp(1.0f, 256)));
557
558  EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
559  EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm
560  EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
561  EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
562  EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
563  // Smallest representable number
564  EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
565  // Should get rounded to 0 first.
566  EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
567
568  // Float16 cases
569  // The exponent is represented in the bits 0x7C00
570  // The offset is -15
571  EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
572  EXPECT_EQ(3, unbiased_half_exponent(0x4800));
573  EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
574  EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
575  EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
576  EXPECT_EQ(10, unbiased_half_exponent(0x6400));
577
578  // Smallest representable number
579  EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
580}
581
582// Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
583float float_fractions(const std::vector<uint32_t>& fractions) {
584  float f = 0;
585  for(int32_t i: fractions) {
586    f += std::ldexp(1.0f, -i);
587  }
588  return f;
589}
590
591// Returns the normalized significand of a HexFloat<FloatProxy<float>>
592// that was created by calling float_fractions with the input fractions,
593// raised to the power of exp.
594uint32_t normalized_significand(const std::vector<uint32_t>& fractions, uint32_t exp) {
595  return spvutils::HexFloat<spvutils::FloatProxy<float>>(
596             static_cast<float>(ldexp(float_fractions(fractions), exp)))
597      .getNormalizedSignificand();
598}
599
600// Sets the bits from MSB to LSB of the significand part of a float.
601// For example 0 would set the bit 23 (counting from LSB to MSB),
602// and 1 would set the 22nd bit.
603uint32_t bits_set(const std::vector<uint32_t>& bits) {
604  const uint32_t top_bit = 1u << 22u;
605  uint32_t val= 0;
606  for(uint32_t i: bits) {
607    val |= top_bit >> i;
608  }
609  return val;
610}
611
612// The same as bits_set but for a Float16 value instead of 32-bit floating
613// point.
614uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
615  const uint32_t top_bit = 1u << 9u;
616  uint32_t val= 0;
617  for(uint32_t i: bits) {
618    val |= top_bit >> i;
619  }
620  return static_cast<uint16_t>(val);
621}
622
623TEST(HexFloatOperationTest, NormalizedSignificand) {
624  // For normalized numbers (the following) it should be a simple matter
625  // of getting rid of the top implicit bit
626  EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
627  EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
628  EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
629  EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
630  EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
631  EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
632
633  // For denormalized numbers we expect the normalized significand to
634  // shift as if it were normalized. This means, in practice that the
635  // top_most set bit will be cut off. Looks very similar to above (on purpose)
636  EXPECT_EQ(bits_set({}), normalized_significand({0}, -127));
637  EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -128));
638  EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -127));
639  EXPECT_EQ(bits_set({}), normalized_significand({22}, -127));
640  EXPECT_EQ(bits_set({0}), normalized_significand({21, 22}, -127));
641}
642
643// Returns the 32-bit floating point value created by
644// calling setFromSignUnbiasedExponentAndNormalizedSignificand
645// on a HexFloat<FloatProxy<float>>
646float set_from_sign(bool negative, int32_t unbiased_exponent,
647                   uint32_t significand, bool round_denorm_up) {
648  spvutils::HexFloat<spvutils::FloatProxy<float>>  f(0.f);
649  f.setFromSignUnbiasedExponentAndNormalizedSignificand(
650      negative, unbiased_exponent, significand, round_denorm_up);
651  return f.value().getAsFloat();
652}
653
654TEST(HexFloatOperationTests,
655     SetFromSignUnbiasedExponentAndNormalizedSignificand) {
656
657  EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
658
659  // Tests insertion of various denormalized numbers with and without round up.
660  EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, false));
661  EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, true));
662  EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
663  EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -150, 1, true));
664
665  EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
666  EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
667  EXPECT_EQ(float_fractions({0, 1, 2, 5}),
668            set_from_sign(false, 0, bits_set({0, 1, 4}), false));
669  EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
670            set_from_sign(false, -32, bits_set({0, 1, 4}), false));
671  EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
672            set_from_sign(false, -128, bits_set({0, 1, 4}), false));
673
674  // The negative cases from above.
675  EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
676  EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
677  EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
678  EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
679            set_from_sign(true, 0, bits_set({0, 1, 4}), false));
680  EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
681            set_from_sign(true, -32, bits_set({0, 1, 4}), false));
682  EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
683            set_from_sign(true, -128, bits_set({0, 1, 4}), false));
684}
685
686TEST(HexFloatOperationTests, NonRounding) {
687  // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
688  // except in the denorm case which is a bit more complex.
689  using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
690  bool carry_bit = false;
691
692  spvutils::round_direction rounding[] = {
693      spvutils::kRoundToZero,
694      spvutils::kRoundToNearestEven,
695      spvutils::kRoundToPositiveInfinity,
696      spvutils::kRoundToNegativeInfinity};
697
698  // Everything fits, so this should be straight-forward
699  for (spvutils::round_direction round : rounding) {
700    EXPECT_EQ(bits_set({}), HF(0.f).getRoundedNormalizedSignificand<HF>(
701                                round, &carry_bit));
702    EXPECT_FALSE(carry_bit);
703
704    EXPECT_EQ(bits_set({0}),
705              HF(float_fractions({0, 1}))
706                  .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
707    EXPECT_FALSE(carry_bit);
708
709    EXPECT_EQ(bits_set({1, 3}),
710              HF(float_fractions({0, 2, 4}))
711                  .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
712    EXPECT_FALSE(carry_bit);
713
714    EXPECT_EQ(
715        bits_set({0, 1, 4}),
716        HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
717            .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
718    EXPECT_FALSE(carry_bit);
719
720    EXPECT_EQ(
721        bits_set({0, 1, 4, 22}),
722        HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
723            .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
724    EXPECT_FALSE(carry_bit);
725  }
726}
727
728struct RoundSignificandCase {
729  float source_float;
730  std::pair<int16_t, bool> expected_results;
731  spvutils::round_direction round;
732};
733
734using HexFloatRoundTest =
735    ::testing::TestWithParam<RoundSignificandCase>;
736
737TEST_P(HexFloatRoundTest, RoundDownToFP16) {
738  using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
739  using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
740
741  HF input_value(GetParam().source_float);
742  bool carry_bit = false;
743  EXPECT_EQ(GetParam().expected_results.first,
744            input_value.getRoundedNormalizedSignificand<HF16>(
745                GetParam().round, &carry_bit));
746  EXPECT_EQ(carry_bit, GetParam().expected_results.second);
747}
748
749// clang-format off
750INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatRoundTest,
751  ::testing::ValuesIn(std::vector<RoundSignificandCase>(
752  {
753    {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToZero},
754    {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToNearestEven},
755    {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToPositiveInfinity},
756    {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToNegativeInfinity},
757    {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
758
759    {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
760    {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
761    {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
762    {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNearestEven},
763
764    {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToZero},
765    {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), spvutils::kRoundToPositiveInfinity},
766    {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNegativeInfinity},
767    {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), spvutils::kRoundToNearestEven},
768
769    {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
770    {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
771    {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
772    {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
773
774    {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
775    {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToPositiveInfinity},
776    {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNegativeInfinity},
777    {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
778
779    {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
780    {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
781    {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
782    {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
783
784    // Carries
785    {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), spvutils::kRoundToZero},
786    {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), spvutils::kRoundToPositiveInfinity},
787    {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), spvutils::kRoundToNegativeInfinity},
788    {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), spvutils::kRoundToNearestEven},
789
790    // Cases where original number was denorm. Note: this should have no effect
791    // the number is pre-normalized.
792    {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
793    {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
794    {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
795    {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
796  })));
797// clang-format on
798
799struct UpCastSignificandCase {
800  uint16_t source_half;
801  uint32_t expected_result;
802};
803
804using HexFloatRoundUpSignificandTest =
805    ::testing::TestWithParam<UpCastSignificandCase>;
806TEST_P(HexFloatRoundUpSignificandTest, Widening) {
807  using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
808  using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
809  bool carry_bit = false;
810
811  spvutils::round_direction rounding[] = {
812      spvutils::kRoundToZero,
813      spvutils::kRoundToNearestEven,
814      spvutils::kRoundToPositiveInfinity,
815      spvutils::kRoundToNegativeInfinity};
816
817  // Everything fits, so everything should just be bit-shifts.
818  for (spvutils::round_direction round : rounding) {
819    carry_bit = false;
820    HF16 input_value(GetParam().source_half);
821    EXPECT_EQ(
822        GetParam().expected_result,
823        input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
824        << std::hex << "0x"
825        << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
826        << "  0x" << GetParam().expected_result;
827    EXPECT_FALSE(carry_bit);
828  }
829}
830
831INSTANTIATE_TEST_SUITE_P(F16toF32, HexFloatRoundUpSignificandTest,
832  // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
833  // They are ignored for this test.
834  ::testing::ValuesIn(std::vector<UpCastSignificandCase>(
835  {
836    {0x3F00, 0x600000},
837    {0x0F00, 0x600000},
838    {0x0F01, 0x602000},
839    {0x0FFF, 0x7FE000},
840  })));
841
842struct DownCastTest {
843  float source_float;
844  uint16_t expected_half;
845  std::vector<spvutils::round_direction> directions;
846};
847
848std::string get_round_text(spvutils::round_direction direction) {
849#define CASE(round_direction) \
850  case round_direction:      \
851    return #round_direction
852
853  switch (direction) {
854    CASE(spvutils::kRoundToZero);
855    CASE(spvutils::kRoundToPositiveInfinity);
856    CASE(spvutils::kRoundToNegativeInfinity);
857    CASE(spvutils::kRoundToNearestEven);
858  }
859#undef CASE
860  return "";
861}
862
863using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
864
865TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
866  using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
867  using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
868  HF f(GetParam().source_float);
869  for (auto round : GetParam().directions) {
870    HF16 half(0);
871    f.castTo(half, round);
872    EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
873        << get_round_text(round) << "  " << std::hex
874        << spvutils::BitwiseCast<uint32_t>(GetParam().source_float)
875        << " cast to: " << half.value().getAsFloat().get_value();
876  }
877}
878
879const uint16_t positive_infinity = 0x7C00;
880const uint16_t negative_infinity = 0xFC00;
881
882INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatFP32To16Tests,
883  ::testing::ValuesIn(std::vector<DownCastTest>(
884  {
885    // Exactly representable as half.
886    {0.f, 0x0, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
887    {-0.f, 0x8000, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
888    {1.0f, 0x3C00, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
889    {-1.0f, 0xBC00, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
890
891    {float_fractions({0, 1, 10}) , 0x3E01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
892    {-float_fractions({0, 1, 10}) , 0xBE01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
893    {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)), 0x4A01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
894    {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)), 0xCA01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
895
896
897    // Underflow
898    {static_cast<float>(ldexp(1.0f, -25)), 0x0, {spvutils::kRoundToZero, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
899    {static_cast<float>(ldexp(1.0f, -25)), 0x1, {spvutils::kRoundToPositiveInfinity}},
900    {static_cast<float>(-ldexp(1.0f, -25)), 0x8000, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNearestEven}},
901    {static_cast<float>(-ldexp(1.0f, -25)), 0x8001, {spvutils::kRoundToNegativeInfinity}},
902    {static_cast<float>(ldexp(1.0f, -24)), 0x1, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
903
904    // Overflow
905    {static_cast<float>(ldexp(1.0f, 16)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
906    {static_cast<float>(ldexp(1.0f, 18)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
907    {static_cast<float>(ldexp(1.3f, 16)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
908    {static_cast<float>(-ldexp(1.0f, 16)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
909    {static_cast<float>(-ldexp(1.0f, 18)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
910    {static_cast<float>(-ldexp(1.3f, 16)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
911
912    // Transfer of Infinities
913    {std::numeric_limits<float>::infinity(), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
914    {-std::numeric_limits<float>::infinity(), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
915
916    // Nans are below because we cannot test for equality.
917  })));
918
919struct UpCastCase{
920  uint16_t source_half;
921  float expected_float;
922};
923
924using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
925TEST_P(HexFloatFP16To32Tests, WideningCasts) {
926  using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
927  using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
928  HF16 f(GetParam().source_half);
929
930  spvutils::round_direction rounding[] = {
931      spvutils::kRoundToZero,
932      spvutils::kRoundToNearestEven,
933      spvutils::kRoundToPositiveInfinity,
934      spvutils::kRoundToNegativeInfinity};
935
936  // Everything fits, so everything should just be bit-shifts.
937  for (spvutils::round_direction round : rounding) {
938    HF flt(0.f);
939    f.castTo(flt, round);
940    EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
941        << get_round_text(round) << "  " << std::hex
942        << spvutils::BitwiseCast<uint16_t>(GetParam().source_half)
943        << " cast to: " << flt.value().getAsFloat();
944  }
945}
946
947INSTANTIATE_TEST_SUITE_P(F16ToF32, HexFloatFP16To32Tests,
948  ::testing::ValuesIn(std::vector<UpCastCase>(
949  {
950    {0x0000, 0.f},
951    {0x8000, -0.f},
952    {0x3C00, 1.0f},
953    {0xBC00, -1.0f},
954    {0x3F00, float_fractions({0, 1, 2})},
955    {0xBF00, -float_fractions({0, 1, 2})},
956    {0x3F01, float_fractions({0, 1, 2, 10})},
957    {0xBF01, -float_fractions({0, 1, 2, 10})},
958
959    // denorm
960    {0x0001, static_cast<float>(ldexp(1.0, -24))},
961    {0x0002, static_cast<float>(ldexp(1.0, -23))},
962    {0x8001, static_cast<float>(-ldexp(1.0, -24))},
963    {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
964
965    // inf
966    {0x7C00, std::numeric_limits<float>::infinity()},
967    {0xFC00, -std::numeric_limits<float>::infinity()},
968  })));
969
970TEST(HexFloatOperationTests, NanTests) {
971  using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
972  using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
973  spvutils::round_direction rounding[] = {
974      spvutils::kRoundToZero,
975      spvutils::kRoundToNearestEven,
976      spvutils::kRoundToPositiveInfinity,
977      spvutils::kRoundToNegativeInfinity};
978
979  // Everything fits, so everything should just be bit-shifts.
980  for (spvutils::round_direction round : rounding) {
981    HF16 f16(0);
982    HF f(0.f);
983    HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
984    EXPECT_TRUE(f16.value().isNan());
985    HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
986    EXPECT_TRUE(f16.value().isNan());
987
988    HF16(0x7C01).castTo(f, round);
989    EXPECT_TRUE(f.value().isNan());
990    HF16(0x7C11).castTo(f, round);
991    EXPECT_TRUE(f.value().isNan());
992    HF16(0xFC01).castTo(f, round);
993    EXPECT_TRUE(f.value().isNan());
994    HF16(0x7C10).castTo(f, round);
995    EXPECT_TRUE(f.value().isNan());
996    HF16(0xFF00).castTo(f, round);
997    EXPECT_TRUE(f.value().isNan());
998  }
999}
1000
1001// A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
1002template <typename T>
1003struct FloatParseCase {
1004  std::string literal;
1005  bool negate_value;
1006  bool expect_success;
1007  HexFloat<FloatProxy<T>> expected_value;
1008};
1009
1010using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
1011
1012TEST_P(ParseNormalFloatTest, Samples) {
1013  std::stringstream input(GetParam().literal);
1014  HexFloat<FloatProxy<float>> parsed_value(0.0f);
1015  ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1016  EXPECT_NE(GetParam().expect_success, input.fail())
1017      << " literal: " << GetParam().literal
1018      << " negate: " << GetParam().negate_value;
1019  if (GetParam().expect_success) {
1020    EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1021        << " literal: " << GetParam().literal
1022        << " negate: " << GetParam().negate_value;
1023  }
1024}
1025
1026// Returns a FloatParseCase with expected failure.
1027template <typename T>
1028FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
1029                                    T expected_value) {
1030  HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1031  return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
1032}
1033
1034// Returns a FloatParseCase that should successfully parse to a given value.
1035template <typename T>
1036FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
1037                                     T expected_value) {
1038  HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1039  return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
1040}
1041
1042INSTANTIATE_TEST_SUITE_P(
1043    FloatParse, ParseNormalFloatTest,
1044    ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
1045        // Failing cases due to trivially incorrect syntax.
1046        BadFloatParseCase("abc", false, 0.0f),
1047        BadFloatParseCase("abc", true, 0.0f),
1048
1049        // Valid cases.
1050        GoodFloatParseCase("0", false, 0.0f),
1051        GoodFloatParseCase("0.0", false, 0.0f),
1052        GoodFloatParseCase("-0.0", false, -0.0f),
1053        GoodFloatParseCase("2.0", false, 2.0f),
1054        GoodFloatParseCase("-2.0", false, -2.0f),
1055        GoodFloatParseCase("+2.0", false, 2.0f),
1056        // Cases with negate_value being true.
1057        GoodFloatParseCase("0.0", true, -0.0f),
1058        GoodFloatParseCase("2.0", true, -2.0f),
1059
1060        // When negate_value is true, we should not accept a
1061        // leading minus or plus.
1062        BadFloatParseCase("-0.0", true, 0.0f),
1063        BadFloatParseCase("-2.0", true, 0.0f),
1064        BadFloatParseCase("+0.0", true, 0.0f),
1065        BadFloatParseCase("+2.0", true, 0.0f),
1066
1067        // Overflow is an error for 32-bit float parsing.
1068        BadFloatParseCase("1e40", false, FLT_MAX),
1069        BadFloatParseCase("1e40", true, -FLT_MAX),
1070        BadFloatParseCase("-1e40", false, -FLT_MAX),
1071        // We can't have -1e40 and negate_value == true since
1072        // that represents an original case of "--1e40" which
1073        // is invalid.
1074  }));
1075
1076using ParseNormalFloat16Test =
1077    ::testing::TestWithParam<FloatParseCase<Float16>>;
1078
1079TEST_P(ParseNormalFloat16Test, Samples) {
1080  std::stringstream input(GetParam().literal);
1081  HexFloat<FloatProxy<Float16>> parsed_value(0);
1082  ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1083  EXPECT_NE(GetParam().expect_success, input.fail())
1084      << " literal: " << GetParam().literal
1085      << " negate: " << GetParam().negate_value;
1086  if (GetParam().expect_success) {
1087    EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1088        << " literal: " << GetParam().literal
1089        << " negate: " << GetParam().negate_value;
1090  }
1091}
1092
1093INSTANTIATE_TEST_SUITE_P(
1094    Float16Parse, ParseNormalFloat16Test,
1095    ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
1096        // Failing cases due to trivially incorrect syntax.
1097        BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
1098        BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
1099
1100        // Valid cases.
1101        GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
1102        GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
1103        GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
1104        GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
1105        GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
1106        GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
1107        // Cases with negate_value being true.
1108        GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
1109        GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
1110
1111        // When negate_value is true, we should not accept a leading minus or
1112        // plus.
1113        BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
1114        BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
1115        BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
1116        BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
1117    }));
1118
1119// A test case for detecting infinities.
1120template <typename T>
1121struct OverflowParseCase {
1122  std::string input;
1123  bool expect_success;
1124  T expected_value;
1125};
1126
1127using FloatProxyParseOverflowFloatTest =
1128    ::testing::TestWithParam<OverflowParseCase<float>>;
1129
1130TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
1131  std::istringstream input(GetParam().input);
1132  HexFloat<FloatProxy<float>> value(0.0f);
1133  input >> value;
1134  EXPECT_NE(GetParam().expect_success, input.fail());
1135  if (GetParam().expect_success) {
1136    EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
1137  }
1138}
1139
1140INSTANTIATE_TEST_SUITE_P(
1141    FloatOverflow, FloatProxyParseOverflowFloatTest,
1142    ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
1143        {"0", true, 0.0f},
1144        {"0.0", true, 0.0f},
1145        {"1.0", true, 1.0f},
1146        {"1e38", true, 1e38f},
1147        {"-1e38", true, -1e38f},
1148        {"1e40", false, FLT_MAX},
1149        {"-1e40", false, -FLT_MAX},
1150        {"1e400", false, FLT_MAX},
1151        {"-1e400", false, -FLT_MAX},
1152    })));
1153
1154using FloatProxyParseOverflowDoubleTest =
1155    ::testing::TestWithParam<OverflowParseCase<double>>;
1156
1157TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
1158  std::istringstream input(GetParam().input);
1159  HexFloat<FloatProxy<double>> value(0.0);
1160  input >> value;
1161  EXPECT_NE(GetParam().expect_success, input.fail());
1162  if (GetParam().expect_success) {
1163    EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
1164  }
1165}
1166
1167INSTANTIATE_TEST_SUITE_P(
1168    DoubleOverflow, FloatProxyParseOverflowDoubleTest,
1169    ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
1170        {"0", true, 0.0},
1171        {"0.0", true, 0.0},
1172        {"1.0", true, 1.0},
1173        {"1e38", true, 1e38},
1174        {"-1e38", true, -1e38},
1175        {"1e40", true, 1e40},
1176        {"-1e40", true, -1e40},
1177        {"1e400", false, DBL_MAX},
1178        {"-1e400", false, -DBL_MAX},
1179    })));
1180
1181using FloatProxyParseOverflowFloat16Test =
1182    ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
1183
1184TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
1185  std::istringstream input(GetParam().input);
1186  HexFloat<FloatProxy<Float16>> value(0);
1187  input >> value;
1188  EXPECT_NE(GetParam().expect_success, input.fail()) << " literal: "
1189                                                     << GetParam().input;
1190  if (GetParam().expect_success) {
1191    EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
1192        << " literal: " << GetParam().input;
1193  }
1194}
1195
1196INSTANTIATE_TEST_SUITE_P(
1197    Float16Overflow, FloatProxyParseOverflowFloat16Test,
1198    ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
1199        {"0", true, uint16_t{0}},
1200        {"0.0", true, uint16_t{0}},
1201        {"1.0", true, uint16_t{0x3c00}},
1202        // Overflow for 16-bit float is an error, and returns max or
1203        // lowest value.
1204        {"1e38", false, uint16_t{0x7bff}},
1205        {"1e40", false, uint16_t{0x7bff}},
1206        {"1e400", false, uint16_t{0x7bff}},
1207        {"-1e38", false, uint16_t{0xfbff}},
1208        {"-1e40", false, uint16_t{0xfbff}},
1209        {"-1e400", false, uint16_t{0xfbff}},
1210    })));
1211
1212TEST(FloatProxy, Max) {
1213  EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
1214              Eq(uint16_t{0x7bff}));
1215  EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
1216              Eq(std::numeric_limits<float>::max()));
1217  EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
1218              Eq(std::numeric_limits<double>::max()));
1219}
1220
1221TEST(FloatProxy, Lowest) {
1222  EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
1223              Eq(uint16_t{0xfbff}));
1224  EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
1225              Eq(std::numeric_limits<float>::lowest()));
1226  EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
1227              Eq(std::numeric_limits<double>::lowest()));
1228}
1229
1230// TODO(awoloszyn): Add fp16 tests and HexFloatTraits.
1231}  // anonymous namespace
1232