1// Copyright (c) 2015-2016 The Khronos Group Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <cfloat>
16#include <cmath>
17#include <cstdio>
18#include <limits>
19#include <sstream>
20#include <string>
21#include <tuple>
22#include <utility>
23#include <vector>
24
25#include "gmock/gmock.h"
26#include "source/util/hex_float.h"
27#include "test/unit_spirv.h"
28
29namespace spvtools {
30namespace utils {
31namespace {
32
33using ::testing::Eq;
34
35// In this file "encode" means converting a number into a string,
36// and "decode" means converting a string into a number.
37
38using HexFloatTest =
39    ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
40using DecodeHexFloatTest =
41    ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
42using HexDoubleTest =
43    ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
44using DecodeHexDoubleTest =
45    ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
46using RoundTripFloatTest = ::testing::TestWithParam<float>;
47using RoundTripDoubleTest = ::testing::TestWithParam<double>;
48
49// Hex-encodes a float value.
50template <typename T>
51std::string EncodeViaHexFloat(const T& value) {
52  std::stringstream ss;
53  ss << HexFloat<T>(value);
54  return ss.str();
55}
56
57// The following two tests can't be DRY because they take different parameter
58// types.
59
60TEST_P(HexFloatTest, EncodeCorrectly) {
61  EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
62}
63
64TEST_P(HexDoubleTest, EncodeCorrectly) {
65  EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
66}
67
68// Decodes a hex-float string.
69template <typename T>
70FloatProxy<T> Decode(const std::string& str) {
71  HexFloat<FloatProxy<T>> decoded(0.f);
72  EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
73  return decoded.value();
74}
75
76TEST_P(HexFloatTest, DecodeCorrectly) {
77  EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
78}
79
80TEST_P(HexDoubleTest, DecodeCorrectly) {
81  EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
82}
83
84INSTANTIATE_TEST_SUITE_P(
85    Float32Tests, HexFloatTest,
86    ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
87        {0.f, "0x0p+0"},
88        {1.f, "0x1p+0"},
89        {2.f, "0x1p+1"},
90        {3.f, "0x1.8p+1"},
91        {0.5f, "0x1p-1"},
92        {0.25f, "0x1p-2"},
93        {0.75f, "0x1.8p-1"},
94        {-0.f, "-0x0p+0"},
95        {-1.f, "-0x1p+0"},
96        {-0.5f, "-0x1p-1"},
97        {-0.25f, "-0x1p-2"},
98        {-0.75f, "-0x1.8p-1"},
99
100        // Larger numbers
101        {512.f, "0x1p+9"},
102        {-512.f, "-0x1p+9"},
103        {1024.f, "0x1p+10"},
104        {-1024.f, "-0x1p+10"},
105        {1024.f + 8.f, "0x1.02p+10"},
106        {-1024.f - 8.f, "-0x1.02p+10"},
107
108        // Small numbers
109        {1.0f / 512.f, "0x1p-9"},
110        {1.0f / -512.f, "-0x1p-9"},
111        {1.0f / 1024.f, "0x1p-10"},
112        {1.0f / -1024.f, "-0x1p-10"},
113        {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
114        {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
115
116        // lowest non-denorm
117        {float(ldexp(1.0f, -126)), "0x1p-126"},
118        {float(ldexp(-1.0f, -126)), "-0x1p-126"},
119
120        // Denormalized values
121        {float(ldexp(1.0f, -127)), "0x1p-127"},
122        {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
123        {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
124        {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
125        {float(ldexp(-1.0f, -127)), "-0x1p-127"},
126        {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
127        {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
128        {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
129
130        {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
131        {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
132         "0x1.8p-128"},
133
134    })));
135
136INSTANTIATE_TEST_SUITE_P(
137    Float32NanTests, HexFloatTest,
138    ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
139        // Various NAN and INF cases
140        {uint32_t(0xFF800000), "-0x1p+128"},         // -inf
141        {uint32_t(0x7F800000), "0x1p+128"},          // inf
142        {uint32_t(0xFFC00000), "-0x1.8p+128"},       // -nan
143        {uint32_t(0xFF800100), "-0x1.0002p+128"},    // -nan
144        {uint32_t(0xFF800c00), "-0x1.0018p+128"},    // -nan
145        {uint32_t(0xFF80F000), "-0x1.01ep+128"},     // -nan
146        {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"},  // -nan
147        {uint32_t(0x7FC00000), "0x1.8p+128"},        // +nan
148        {uint32_t(0x7F800100), "0x1.0002p+128"},     // +nan
149        {uint32_t(0x7f800c00), "0x1.0018p+128"},     // +nan
150        {uint32_t(0x7F80F000), "0x1.01ep+128"},      // +nan
151        {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"},   // +nan
152    })));
153
154INSTANTIATE_TEST_SUITE_P(
155    Float64Tests, HexDoubleTest,
156    ::testing::ValuesIn(
157        std::vector<std::pair<FloatProxy<double>, std::string>>({
158            {0., "0x0p+0"},
159            {1., "0x1p+0"},
160            {2., "0x1p+1"},
161            {3., "0x1.8p+1"},
162            {0.5, "0x1p-1"},
163            {0.25, "0x1p-2"},
164            {0.75, "0x1.8p-1"},
165            {-0., "-0x0p+0"},
166            {-1., "-0x1p+0"},
167            {-0.5, "-0x1p-1"},
168            {-0.25, "-0x1p-2"},
169            {-0.75, "-0x1.8p-1"},
170
171            // Larger numbers
172            {512., "0x1p+9"},
173            {-512., "-0x1p+9"},
174            {1024., "0x1p+10"},
175            {-1024., "-0x1p+10"},
176            {1024. + 8., "0x1.02p+10"},
177            {-1024. - 8., "-0x1.02p+10"},
178
179            // Large outside the range of normal floats
180            {ldexp(1.0, 128), "0x1p+128"},
181            {ldexp(1.0, 129), "0x1p+129"},
182            {ldexp(-1.0, 128), "-0x1p+128"},
183            {ldexp(-1.0, 129), "-0x1p+129"},
184            {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
185            {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
186            {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
187            {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
188
189            // Small numbers
190            {1.0 / 512., "0x1p-9"},
191            {1.0 / -512., "-0x1p-9"},
192            {1.0 / 1024., "0x1p-10"},
193            {1.0 / -1024., "-0x1p-10"},
194            {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
195            {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
196
197            // Small outside the range of normal floats
198            {ldexp(1.0, -128), "0x1p-128"},
199            {ldexp(1.0, -129), "0x1p-129"},
200            {ldexp(-1.0, -128), "-0x1p-128"},
201            {ldexp(-1.0, -129), "-0x1p-129"},
202            {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
203            {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
204            {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
205            {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
206
207            // lowest non-denorm
208            {ldexp(1.0, -1022), "0x1p-1022"},
209            {ldexp(-1.0, -1022), "-0x1p-1022"},
210
211            // Denormalized values
212            {ldexp(1.0, -1023), "0x1p-1023"},
213            {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
214            {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
215            {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
216            {ldexp(-1.0, -1024), "-0x1p-1024"},
217            {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
218            {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
219            {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
220
221            {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
222            {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
223             "0x1.8p-1024"},
224
225        })));
226
227INSTANTIATE_TEST_SUITE_P(
228    Float64NanTests, HexDoubleTest,
229    ::testing::ValuesIn(std::vector<
230                        std::pair<FloatProxy<double>, std::string>>({
231        // Various NAN and INF cases
232        {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"},                // -inf
233        {uint64_t(0x7FF0000000000000LL), "0x1p+1024"},                 // +inf
234        {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"},              // -nan
235        {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},             // -nan
236        {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"},  // -nan
237        {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"},          // -nan
238        {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"},  // -nan
239        {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},               // +nan
240        {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"},              // +nan
241        {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"},   // -nan
242        {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"},           // -nan
243        {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"},   // -nan
244    })));
245
246// Tests that encoding a value and decoding it again restores
247// the same value.
248TEST_P(RoundTripFloatTest, CanStoreAccurately) {
249  std::stringstream ss;
250  ss << FloatProxy<float>(GetParam());
251  ss.seekg(0);
252  FloatProxy<float> res;
253  ss >> res;
254  EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
255}
256
257TEST_P(RoundTripDoubleTest, CanStoreAccurately) {
258  std::stringstream ss;
259  ss << FloatProxy<double>(GetParam());
260  ss.seekg(0);
261  FloatProxy<double> res;
262  ss >> res;
263  EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
264}
265
266INSTANTIATE_TEST_SUITE_P(
267    Float32StoreTests, RoundTripFloatTest,
268    ::testing::ValuesIn(std::vector<float>(
269        {// Value requiring more than 6 digits of precision to be
270         // represented accurately.
271         3.0000002f})));
272
273INSTANTIATE_TEST_SUITE_P(
274    Float64StoreTests, RoundTripDoubleTest,
275    ::testing::ValuesIn(std::vector<double>(
276        {// Value requiring more than 15 digits of precision to be
277         // represented accurately.
278         1.5000000000000002})));
279
280TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
281  std::stringstream s;
282  s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
283    << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
284  EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
285}
286
287TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
288  std::stringstream s;
289  s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
290    << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
291    << 9;
292  EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
293}
294
295TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
296  EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
297}
298
299TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
300  EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
301}
302
303INSTANTIATE_TEST_SUITE_P(
304    Float32DecodeTests, DecodeHexFloatTest,
305    ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
306        {"0x0p+000", 0.f},
307        {"0x0p0", 0.f},
308        {"0x0p-0", 0.f},
309
310        // flush to zero cases
311        {"0x1p-500", 0.f},  // Exponent underflows.
312        {"-0x1p-500", -0.f},
313        {"0x0.00000000001p-126", 0.f},  // Fraction causes underflow.
314        {"-0x0.0000000001p-127", -0.f},
315        {"-0x0.01p-142", -0.f},  // Fraction causes additional underflow.
316        {"0x0.01p-142", 0.f},
317
318        // Some floats that do not encode the same way as they decode.
319        {"0x2p+0", 2.f},
320        {"0xFFp+0", 255.f},
321        {"0x0.8p+0", 0.5f},
322        {"0x0.4p+0", 0.25f},
323    })));
324
325INSTANTIATE_TEST_SUITE_P(
326    Float32DecodeInfTests, DecodeHexFloatTest,
327    ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
328        // inf cases
329        {"-0x1p+128", uint32_t(0xFF800000)},   // -inf
330        {"0x32p+127", uint32_t(0x7F800000)},   // inf
331        {"0x32p+500", uint32_t(0x7F800000)},   // inf
332        {"-0x32p+127", uint32_t(0xFF800000)},  // -inf
333    })));
334
335INSTANTIATE_TEST_SUITE_P(
336    Float64DecodeTests, DecodeHexDoubleTest,
337    ::testing::ValuesIn(
338        std::vector<std::pair<std::string, FloatProxy<double>>>({
339            {"0x0p+000", 0.},
340            {"0x0p0", 0.},
341            {"0x0p-0", 0.},
342
343            // flush to zero cases
344            {"0x1p-5000", 0.},  // Exponent underflows.
345            {"-0x1p-5000", -0.},
346            {"0x0.0000000000000001p-1023", 0.},  // Fraction causes underflow.
347            {"-0x0.000000000000001p-1024", -0.},
348            {"-0x0.01p-1090", -0.f},  // Fraction causes additional underflow.
349            {"0x0.01p-1090", 0.},
350
351            // Some floats that do not encode the same way as they decode.
352            {"0x2p+0", 2.},
353            {"0xFFp+0", 255.},
354            {"0x0.8p+0", 0.5},
355            {"0x0.4p+0", 0.25},
356        })));
357
358INSTANTIATE_TEST_SUITE_P(
359    Float64DecodeInfTests, DecodeHexDoubleTest,
360    ::testing::ValuesIn(
361        std::vector<std::pair<std::string, FloatProxy<double>>>({
362            // inf cases
363            {"-0x1p+1024", uint64_t(0xFFF0000000000000)},   // -inf
364            {"0x32p+1023", uint64_t(0x7FF0000000000000)},   // inf
365            {"0x32p+5000", uint64_t(0x7FF0000000000000)},   // inf
366            {"-0x32p+1023", uint64_t(0xFFF0000000000000)},  // -inf
367        })));
368
369TEST(FloatProxy, ValidConversion) {
370  EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
371  EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
372  EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
373  EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
374  EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
375  EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
376
377  EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
378  EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
379  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
380  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
381  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
382  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
383  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
384  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
385  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
386  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
387  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
388  EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
389
390  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
391  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
392  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
393  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
394  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
395  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
396  EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
397  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
398  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
399  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
400  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
401  EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
402}
403
404TEST(FloatProxy, Nan) {
405  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
406  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
407  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
408  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
409  EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
410  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
411  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
412  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
413  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
414  EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
415}
416
417TEST(FloatProxy, Negation) {
418  EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
419  EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
420
421  EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
422  EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
423
424  EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
425  EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
426
427  EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
428  EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
429
430  EXPECT_THAT(
431      (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
432      Eq(-std::numeric_limits<float>::infinity()));
433  EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
434                  .getAsFloat(),
435              Eq(std::numeric_limits<float>::infinity()));
436}
437
438// Test conversion of FloatProxy values to strings.
439//
440// In previous cases, we always wrapped the FloatProxy value in a HexFloat
441// before conversion to a string.  In the following cases, the FloatProxy
442// decides for itself whether to print as a regular number or as a hex float.
443
444using FloatProxyFloatTest =
445    ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
446using FloatProxyDoubleTest =
447    ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
448
449// Converts a float value to a string via a FloatProxy.
450template <typename T>
451std::string EncodeViaFloatProxy(const T& value) {
452  std::stringstream ss;
453  ss << value;
454  return ss.str();
455}
456
457// Converts a floating point string so that the exponent prefix
458// is 'e', and the exponent value does not have leading zeros.
459// The Microsoft runtime library likes to write things like "2.5E+010".
460// Convert that to "2.5e+10".
461// We don't care what happens to strings that are not floating point
462// strings.
463std::string NormalizeExponentInFloatString(std::string in) {
464  std::string result;
465  // Reserve one spot for the terminating null, even when the sscanf fails.
466  std::vector<char> prefix(in.size() + 1);
467  char e;
468  char plus_or_minus;
469  int exponent;  // in base 10
470  if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
471                        &plus_or_minus, &exponent)) &&
472      (e == 'e' || e == 'E') &&
473      (plus_or_minus == '-' || plus_or_minus == '+')) {
474    // It looks like a floating point value with exponent.
475    std::stringstream out;
476    out << prefix.data() << 'e' << plus_or_minus << exponent;
477    result = out.str();
478  } else {
479    result = in;
480  }
481  return result;
482}
483
484TEST(NormalizeFloat, Sample) {
485  EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
486  EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
487  EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
488  EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
489  EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
490}
491
492// The following two tests can't be DRY because they take different parameter
493// types.
494TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
495  EXPECT_THAT(
496      NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
497      Eq(GetParam().second));
498}
499
500TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
501  EXPECT_THAT(
502      NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
503      Eq(GetParam().second));
504}
505
506INSTANTIATE_TEST_SUITE_P(
507    Float32Tests, FloatProxyFloatTest,
508    ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
509        // Zero
510        {0.f, "0"},
511        // Normal numbers
512        {1.f, "1"},
513        {-0.25f, "-0.25"},
514        {1000.0f, "1000"},
515
516        // Still normal numbers, but with large magnitude exponents.
517        {float(ldexp(1.f, 126)), "8.50705917e+37"},
518        {float(ldexp(-1.f, -126)), "-1.17549435e-38"},
519
520        // denormalized values are printed as hex floats.
521        {float(ldexp(1.0f, -127)), "0x1p-127"},
522        {float(ldexp(1.5f, -128)), "0x1.8p-128"},
523        {float(ldexp(1.25, -129)), "0x1.4p-129"},
524        {float(ldexp(1.125, -130)), "0x1.2p-130"},
525        {float(ldexp(-1.0f, -127)), "-0x1p-127"},
526        {float(ldexp(-1.0f, -128)), "-0x1p-128"},
527        {float(ldexp(-1.0f, -129)), "-0x1p-129"},
528        {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
529
530        // NaNs
531        {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
532        {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
533
534        {std::numeric_limits<float>::infinity(), "0x1p+128"},
535        {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
536    })));
537
538INSTANTIATE_TEST_SUITE_P(
539    Float64Tests, FloatProxyDoubleTest,
540    ::testing::ValuesIn(
541        std::vector<std::pair<FloatProxy<double>, std::string>>({
542            {0., "0"},
543            {1., "1"},
544            {-0.25, "-0.25"},
545            {1000.0, "1000"},
546
547            // Large outside the range of normal floats
548            {ldexp(1.0, 128), "3.4028236692093846e+38"},
549            {ldexp(1.5, 129), "1.0208471007628154e+39"},
550            {ldexp(-1.0, 128), "-3.4028236692093846e+38"},
551            {ldexp(-1.5, 129), "-1.0208471007628154e+39"},
552
553            // Small outside the range of normal floats
554            {ldexp(1.5, -129), "2.2040519077917891e-39"},
555            {ldexp(-1.5, -129), "-2.2040519077917891e-39"},
556
557            // lowest non-denorm
558            {ldexp(1.0, -1022), "2.2250738585072014e-308"},
559            {ldexp(-1.0, -1022), "-2.2250738585072014e-308"},
560
561            // Denormalized values
562            {ldexp(1.125, -1023), "0x1.2p-1023"},
563            {ldexp(-1.375, -1024), "-0x1.6p-1024"},
564
565            // NaNs
566            {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
567            {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
568
569            // Infinity
570            {std::numeric_limits<double>::infinity(), "0x1p+1024"},
571            {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
572
573        })));
574
575// double is used so that unbiased_exponent can be used with the output
576// of ldexp directly.
577int32_t unbiased_exponent(double f) {
578  return HexFloat<FloatProxy<float>>(static_cast<float>(f))
579      .getUnbiasedNormalizedExponent();
580}
581
582int16_t unbiased_half_exponent(uint16_t f) {
583  return HexFloat<FloatProxy<Float16>>(f).getUnbiasedNormalizedExponent();
584}
585
586TEST(HexFloatOperationTest, UnbiasedExponent) {
587  // Float cases
588  EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
589  EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
590  EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
591  EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
592
593  EXPECT_EQ(128,
594            HexFloat<FloatProxy<float>>(std::numeric_limits<float>::infinity())
595                .getUnbiasedNormalizedExponent());
596
597  EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
598  EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127)));  // First denorm
599  EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
600  EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
601  EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
602  // Smallest representable number
603  EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
604  // Should get rounded to 0 first.
605  EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
606
607  // Float16 cases
608  // The exponent is represented in the bits 0x7C00
609  // The offset is -15
610  EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
611  EXPECT_EQ(3, unbiased_half_exponent(0x4800));
612  EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
613  EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
614  EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
615  EXPECT_EQ(10, unbiased_half_exponent(0x6400));
616
617  // Smallest representable number
618  EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
619}
620
621// Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
622float float_fractions(const std::vector<uint32_t>& fractions) {
623  float f = 0;
624  for (int32_t i : fractions) {
625    f += std::ldexp(1.0f, -i);
626  }
627  return f;
628}
629
630// Returns the normalized significand of a HexFloat<FloatProxy<float>>
631// that was created by calling float_fractions with the input fractions,
632// raised to the power of exp.
633uint32_t normalized_significand(const std::vector<uint32_t>& fractions,
634                                uint32_t exp) {
635  return HexFloat<FloatProxy<float>>(
636             static_cast<float>(ldexp(float_fractions(fractions), exp)))
637      .getNormalizedSignificand();
638}
639
640// Sets the bits from MSB to LSB of the significand part of a float.
641// For example 0 would set the bit 23 (counting from LSB to MSB),
642// and 1 would set the 22nd bit.
643uint32_t bits_set(const std::vector<uint32_t>& bits) {
644  const uint32_t top_bit = 1u << 22u;
645  uint32_t val = 0;
646  for (uint32_t i : bits) {
647    val |= top_bit >> i;
648  }
649  return val;
650}
651
652// The same as bits_set but for a Float16 value instead of 32-bit floating
653// point.
654uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
655  const uint32_t top_bit = 1u << 9u;
656  uint32_t val = 0;
657  for (uint32_t i : bits) {
658    val |= top_bit >> i;
659  }
660  return static_cast<uint16_t>(val);
661}
662
663TEST(HexFloatOperationTest, NormalizedSignificand) {
664  // For normalized numbers (the following) it should be a simple matter
665  // of getting rid of the top implicit bit
666  EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
667  EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
668  EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
669  EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
670  EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
671  EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
672
673  // For denormalized numbers we expect the normalized significand to
674  // shift as if it were normalized. This means, in practice that the
675  // top_most set bit will be cut off. Looks very similar to above (on purpose)
676  EXPECT_EQ(bits_set({}),
677            normalized_significand({0}, static_cast<uint32_t>(-127)));
678  EXPECT_EQ(bits_set({3}),
679            normalized_significand({0, 4}, static_cast<uint32_t>(-128)));
680  EXPECT_EQ(bits_set({3}),
681            normalized_significand({0, 4}, static_cast<uint32_t>(-127)));
682  EXPECT_EQ(bits_set({}),
683            normalized_significand({22}, static_cast<uint32_t>(-127)));
684  EXPECT_EQ(bits_set({0}),
685            normalized_significand({21, 22}, static_cast<uint32_t>(-127)));
686}
687
688// Returns the 32-bit floating point value created by
689// calling setFromSignUnbiasedExponentAndNormalizedSignificand
690// on a HexFloat<FloatProxy<float>>
691float set_from_sign(bool negative, int32_t unbiased_exponent,
692                    uint32_t significand, bool round_denorm_up) {
693  HexFloat<FloatProxy<float>> f(0.f);
694  f.setFromSignUnbiasedExponentAndNormalizedSignificand(
695      negative, unbiased_exponent, significand, round_denorm_up);
696  return f.value().getAsFloat();
697}
698
699TEST(HexFloatOperationTests,
700     SetFromSignUnbiasedExponentAndNormalizedSignificand) {
701  EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
702
703  // Tests insertion of various denormalized numbers with and without round up.
704  EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
705            set_from_sign(false, -149, 0, false));
706  EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
707            set_from_sign(false, -149, 0, true));
708  EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
709  EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
710            set_from_sign(false, -150, 1, true));
711
712  EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
713  EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
714  EXPECT_EQ(float_fractions({0, 1, 2, 5}),
715            set_from_sign(false, 0, bits_set({0, 1, 4}), false));
716  EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
717            set_from_sign(false, -32, bits_set({0, 1, 4}), false));
718  EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
719            set_from_sign(false, -128, bits_set({0, 1, 4}), false));
720
721  // The negative cases from above.
722  EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
723  EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
724  EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
725  EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
726            set_from_sign(true, 0, bits_set({0, 1, 4}), false));
727  EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
728            set_from_sign(true, -32, bits_set({0, 1, 4}), false));
729  EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
730            set_from_sign(true, -128, bits_set({0, 1, 4}), false));
731}
732
733TEST(HexFloatOperationTests, NonRounding) {
734  // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
735  // except in the denorm case which is a bit more complex.
736  using HF = HexFloat<FloatProxy<float>>;
737  bool carry_bit = false;
738
739  round_direction rounding[] = {round_direction::kToZero,
740                                round_direction::kToNearestEven,
741                                round_direction::kToPositiveInfinity,
742                                round_direction::kToNegativeInfinity};
743
744  // Everything fits, so this should be straight-forward
745  for (round_direction round : rounding) {
746    EXPECT_EQ(bits_set({}),
747              HF(0.f).getRoundedNormalizedSignificand<HF>(round, &carry_bit));
748    EXPECT_FALSE(carry_bit);
749
750    EXPECT_EQ(bits_set({0}),
751              HF(float_fractions({0, 1}))
752                  .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
753    EXPECT_FALSE(carry_bit);
754
755    EXPECT_EQ(bits_set({1, 3}),
756              HF(float_fractions({0, 2, 4}))
757                  .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
758    EXPECT_FALSE(carry_bit);
759
760    EXPECT_EQ(
761        bits_set({0, 1, 4}),
762        HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
763            .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
764    EXPECT_FALSE(carry_bit);
765
766    EXPECT_EQ(bits_set({0, 1, 4, 22}),
767              HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
768                  .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
769    EXPECT_FALSE(carry_bit);
770  }
771}
772
773using RD = round_direction;
774struct RoundSignificandCase {
775  float source_float;
776  std::pair<int16_t, bool> expected_results;
777  round_direction round;
778};
779
780using HexFloatRoundTest = ::testing::TestWithParam<RoundSignificandCase>;
781
782TEST_P(HexFloatRoundTest, RoundDownToFP16) {
783  using HF = HexFloat<FloatProxy<float>>;
784  using HF16 = HexFloat<FloatProxy<Float16>>;
785
786  HF input_value(GetParam().source_float);
787  bool carry_bit = false;
788  EXPECT_EQ(GetParam().expected_results.first,
789            input_value.getRoundedNormalizedSignificand<HF16>(GetParam().round,
790                                                              &carry_bit));
791  EXPECT_EQ(carry_bit, GetParam().expected_results.second);
792}
793
794// clang-format off
795INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatRoundTest,
796  ::testing::ValuesIn(std::vector<RoundSignificandCase>(
797  {
798    {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToZero},
799    {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNearestEven},
800    {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToPositiveInfinity},
801    {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNegativeInfinity},
802    {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
803
804    {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
805    {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
806    {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
807    {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNearestEven},
808
809    {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToZero},
810    {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToPositiveInfinity},
811    {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
812    {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToNearestEven},
813
814    {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
815    {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
816    {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
817    {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
818
819    {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
820    {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToPositiveInfinity},
821    {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
822    {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
823
824    {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
825    {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
826    {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
827    {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
828
829    // Carries
830    {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToZero},
831    {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToPositiveInfinity},
832    {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToNegativeInfinity},
833    {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToNearestEven},
834
835    // Cases where original number was denorm. Note: this should have no effect
836    // the number is pre-normalized.
837    {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), RD::kToZero},
838    {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
839    {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
840    {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
841  })));
842// clang-format on
843
844struct UpCastSignificandCase {
845  uint16_t source_half;
846  uint32_t expected_result;
847};
848
849using HexFloatRoundUpSignificandTest =
850    ::testing::TestWithParam<UpCastSignificandCase>;
851TEST_P(HexFloatRoundUpSignificandTest, Widening) {
852  using HF = HexFloat<FloatProxy<float>>;
853  using HF16 = HexFloat<FloatProxy<Float16>>;
854  bool carry_bit = false;
855
856  round_direction rounding[] = {round_direction::kToZero,
857                                round_direction::kToNearestEven,
858                                round_direction::kToPositiveInfinity,
859                                round_direction::kToNegativeInfinity};
860
861  // Everything fits, so everything should just be bit-shifts.
862  for (round_direction round : rounding) {
863    carry_bit = false;
864    HF16 input_value(GetParam().source_half);
865    EXPECT_EQ(
866        GetParam().expected_result,
867        input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
868        << std::hex << "0x"
869        << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
870        << "  0x" << GetParam().expected_result;
871    EXPECT_FALSE(carry_bit);
872  }
873}
874
875INSTANTIATE_TEST_SUITE_P(
876    F16toF32, HexFloatRoundUpSignificandTest,
877    // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
878    // They are ignored for this test.
879    ::testing::ValuesIn(std::vector<UpCastSignificandCase>({
880        {0x3F00, 0x600000},
881        {0x0F00, 0x600000},
882        {0x0F01, 0x602000},
883        {0x0FFF, 0x7FE000},
884    })));
885
886struct DownCastTest {
887  float source_float;
888  uint16_t expected_half;
889  std::vector<round_direction> directions;
890};
891
892std::string get_round_text(round_direction direction) {
893#define CASE(round_direction) \
894  case round_direction:       \
895    return #round_direction
896
897  switch (direction) {
898    CASE(round_direction::kToZero);
899    CASE(round_direction::kToPositiveInfinity);
900    CASE(round_direction::kToNegativeInfinity);
901    CASE(round_direction::kToNearestEven);
902  }
903#undef CASE
904  return "";
905}
906
907using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
908
909TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
910  using HF = HexFloat<FloatProxy<float>>;
911  using HF16 = HexFloat<FloatProxy<Float16>>;
912  HF f(GetParam().source_float);
913  for (auto round : GetParam().directions) {
914    HF16 half(0);
915    f.castTo(half, round);
916    EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
917        << get_round_text(round) << "  " << std::hex
918        << BitwiseCast<uint32_t>(GetParam().source_float)
919        << " cast to: " << half.value().getAsFloat().get_value();
920  }
921}
922
923const uint16_t positive_infinity = 0x7C00;
924const uint16_t negative_infinity = 0xFC00;
925
926INSTANTIATE_TEST_SUITE_P(
927    F32ToF16, HexFloatFP32To16Tests,
928    ::testing::ValuesIn(std::vector<DownCastTest>({
929        // Exactly representable as half.
930        {0.f,
931         0x0,
932         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
933          RD::kToNearestEven}},
934        {-0.f,
935         0x8000,
936         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
937          RD::kToNearestEven}},
938        {1.0f,
939         0x3C00,
940         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
941          RD::kToNearestEven}},
942        {-1.0f,
943         0xBC00,
944         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
945          RD::kToNearestEven}},
946
947        {float_fractions({0, 1, 10}),
948         0x3E01,
949         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
950          RD::kToNearestEven}},
951        {-float_fractions({0, 1, 10}),
952         0xBE01,
953         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
954          RD::kToNearestEven}},
955        {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)),
956         0x4A01,
957         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
958          RD::kToNearestEven}},
959        {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)),
960         0xCA01,
961         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
962          RD::kToNearestEven}},
963
964        // Underflow
965        {static_cast<float>(ldexp(1.0f, -25)),
966         0x0,
967         {RD::kToZero, RD::kToNegativeInfinity, RD::kToNearestEven}},
968        {static_cast<float>(ldexp(1.0f, -25)), 0x1, {RD::kToPositiveInfinity}},
969        {static_cast<float>(-ldexp(1.0f, -25)),
970         0x8000,
971         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNearestEven}},
972        {static_cast<float>(-ldexp(1.0f, -25)),
973         0x8001,
974         {RD::kToNegativeInfinity}},
975        {static_cast<float>(ldexp(1.0f, -24)),
976         0x1,
977         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
978          RD::kToNearestEven}},
979
980        // Overflow
981        {static_cast<float>(ldexp(1.0f, 16)),
982         positive_infinity,
983         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
984          RD::kToNearestEven}},
985        {static_cast<float>(ldexp(1.0f, 18)),
986         positive_infinity,
987         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
988          RD::kToNearestEven}},
989        {static_cast<float>(ldexp(1.3f, 16)),
990         positive_infinity,
991         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
992          RD::kToNearestEven}},
993        {static_cast<float>(-ldexp(1.0f, 16)),
994         negative_infinity,
995         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
996          RD::kToNearestEven}},
997        {static_cast<float>(-ldexp(1.0f, 18)),
998         negative_infinity,
999         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
1000          RD::kToNearestEven}},
1001        {static_cast<float>(-ldexp(1.3f, 16)),
1002         negative_infinity,
1003         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
1004          RD::kToNearestEven}},
1005
1006        // Transfer of Infinities
1007        {std::numeric_limits<float>::infinity(),
1008         positive_infinity,
1009         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
1010          RD::kToNearestEven}},
1011        {-std::numeric_limits<float>::infinity(),
1012         negative_infinity,
1013         {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
1014          RD::kToNearestEven}},
1015
1016        // Nans are below because we cannot test for equality.
1017    })));
1018
1019struct UpCastCase {
1020  uint16_t source_half;
1021  float expected_float;
1022};
1023
1024using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
1025TEST_P(HexFloatFP16To32Tests, WideningCasts) {
1026  using HF = HexFloat<FloatProxy<float>>;
1027  using HF16 = HexFloat<FloatProxy<Float16>>;
1028  HF16 f(GetParam().source_half);
1029
1030  round_direction rounding[] = {round_direction::kToZero,
1031                                round_direction::kToNearestEven,
1032                                round_direction::kToPositiveInfinity,
1033                                round_direction::kToNegativeInfinity};
1034
1035  // Everything fits, so everything should just be bit-shifts.
1036  for (round_direction round : rounding) {
1037    HF flt(0.f);
1038    f.castTo(flt, round);
1039    EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
1040        << get_round_text(round) << "  " << std::hex
1041        << BitwiseCast<uint16_t>(GetParam().source_half)
1042        << " cast to: " << flt.value().getAsFloat();
1043  }
1044}
1045
1046INSTANTIATE_TEST_SUITE_P(
1047    F16ToF32, HexFloatFP16To32Tests,
1048    ::testing::ValuesIn(std::vector<UpCastCase>({
1049        {0x0000, 0.f},
1050        {0x8000, -0.f},
1051        {0x3C00, 1.0f},
1052        {0xBC00, -1.0f},
1053        {0x3F00, float_fractions({0, 1, 2})},
1054        {0xBF00, -float_fractions({0, 1, 2})},
1055        {0x3F01, float_fractions({0, 1, 2, 10})},
1056        {0xBF01, -float_fractions({0, 1, 2, 10})},
1057
1058        // denorm
1059        {0x0001, static_cast<float>(ldexp(1.0, -24))},
1060        {0x0002, static_cast<float>(ldexp(1.0, -23))},
1061        {0x8001, static_cast<float>(-ldexp(1.0, -24))},
1062        {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
1063
1064        // inf
1065        {0x7C00, std::numeric_limits<float>::infinity()},
1066        {0xFC00, -std::numeric_limits<float>::infinity()},
1067    })));
1068
1069TEST(HexFloatOperationTests, NanTests) {
1070  using HF = HexFloat<FloatProxy<float>>;
1071  using HF16 = HexFloat<FloatProxy<Float16>>;
1072  round_direction rounding[] = {round_direction::kToZero,
1073                                round_direction::kToNearestEven,
1074                                round_direction::kToPositiveInfinity,
1075                                round_direction::kToNegativeInfinity};
1076
1077  // Everything fits, so everything should just be bit-shifts.
1078  for (round_direction round : rounding) {
1079    HF16 f16(0);
1080    HF f(0.f);
1081    HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
1082    EXPECT_TRUE(f16.value().isNan());
1083    HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
1084    EXPECT_TRUE(f16.value().isNan());
1085
1086    HF16(0x7C01).castTo(f, round);
1087    EXPECT_TRUE(f.value().isNan());
1088    HF16(0x7C11).castTo(f, round);
1089    EXPECT_TRUE(f.value().isNan());
1090    HF16(0xFC01).castTo(f, round);
1091    EXPECT_TRUE(f.value().isNan());
1092    HF16(0x7C10).castTo(f, round);
1093    EXPECT_TRUE(f.value().isNan());
1094    HF16(0xFF00).castTo(f, round);
1095    EXPECT_TRUE(f.value().isNan());
1096  }
1097}
1098
1099// A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
1100template <typename T>
1101struct FloatParseCase {
1102  std::string literal;
1103  bool negate_value;
1104  bool expect_success;
1105  HexFloat<FloatProxy<T>> expected_value;
1106};
1107
1108using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
1109
1110TEST_P(ParseNormalFloatTest, Samples) {
1111  std::stringstream input(GetParam().literal);
1112  HexFloat<FloatProxy<float>> parsed_value(0.0f);
1113  ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1114  EXPECT_NE(GetParam().expect_success, input.fail())
1115      << " literal: " << GetParam().literal
1116      << " negate: " << GetParam().negate_value;
1117  if (GetParam().expect_success) {
1118    EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1119        << " literal: " << GetParam().literal
1120        << " negate: " << GetParam().negate_value;
1121  }
1122}
1123
1124// Returns a FloatParseCase with expected failure.
1125template <typename T>
1126FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
1127                                    T expected_value) {
1128  HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1129  return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
1130}
1131
1132// Returns a FloatParseCase that should successfully parse to a given value.
1133template <typename T>
1134FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
1135                                     T expected_value) {
1136  HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1137  return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
1138}
1139
1140INSTANTIATE_TEST_SUITE_P(
1141    FloatParse, ParseNormalFloatTest,
1142    ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
1143        // Failing cases due to trivially incorrect syntax.
1144        BadFloatParseCase("abc", false, 0.0f),
1145        BadFloatParseCase("abc", true, 0.0f),
1146
1147        // Valid cases.
1148        GoodFloatParseCase("0", false, 0.0f),
1149        GoodFloatParseCase("0.0", false, 0.0f),
1150        GoodFloatParseCase("-0.0", false, -0.0f),
1151        GoodFloatParseCase("2.0", false, 2.0f),
1152        GoodFloatParseCase("-2.0", false, -2.0f),
1153        GoodFloatParseCase("+2.0", false, 2.0f),
1154        // Cases with negate_value being true.
1155        GoodFloatParseCase("0.0", true, -0.0f),
1156        GoodFloatParseCase("2.0", true, -2.0f),
1157
1158        // When negate_value is true, we should not accept a
1159        // leading minus or plus.
1160        BadFloatParseCase("-0.0", true, 0.0f),
1161        BadFloatParseCase("-2.0", true, 0.0f),
1162        BadFloatParseCase("+0.0", true, 0.0f),
1163        BadFloatParseCase("+2.0", true, 0.0f),
1164
1165        // Overflow is an error for 32-bit float parsing.
1166        BadFloatParseCase("1e40", false, FLT_MAX),
1167        BadFloatParseCase("1e40", true, -FLT_MAX),
1168        BadFloatParseCase("-1e40", false, -FLT_MAX),
1169        // We can't have -1e40 and negate_value == true since
1170        // that represents an original case of "--1e40" which
1171        // is invalid.
1172    }));
1173
1174using ParseNormalFloat16Test =
1175    ::testing::TestWithParam<FloatParseCase<Float16>>;
1176
1177TEST_P(ParseNormalFloat16Test, Samples) {
1178  std::stringstream input(GetParam().literal);
1179  HexFloat<FloatProxy<Float16>> parsed_value(0);
1180  ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1181  EXPECT_NE(GetParam().expect_success, input.fail())
1182      << " literal: " << GetParam().literal
1183      << " negate: " << GetParam().negate_value;
1184  if (GetParam().expect_success) {
1185    EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1186        << " literal: " << GetParam().literal
1187        << " negate: " << GetParam().negate_value;
1188  }
1189}
1190
1191INSTANTIATE_TEST_SUITE_P(
1192    Float16Parse, ParseNormalFloat16Test,
1193    ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
1194        // Failing cases due to trivially incorrect syntax.
1195        BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
1196        BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
1197
1198        // Valid cases.
1199        GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
1200        GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
1201        GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
1202        GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
1203        GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
1204        GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
1205        // Cases with negate_value being true.
1206        GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
1207        GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
1208
1209        // When negate_value is true, we should not accept a leading minus or
1210        // plus.
1211        BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
1212        BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
1213        BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
1214        BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
1215    }));
1216
1217// A test case for detecting infinities.
1218template <typename T>
1219struct OverflowParseCase {
1220  std::string input;
1221  bool expect_success;
1222  T expected_value;
1223};
1224
1225using FloatProxyParseOverflowFloatTest =
1226    ::testing::TestWithParam<OverflowParseCase<float>>;
1227
1228TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
1229  std::istringstream input(GetParam().input);
1230  HexFloat<FloatProxy<float>> value(0.0f);
1231  input >> value;
1232  EXPECT_NE(GetParam().expect_success, input.fail());
1233  if (GetParam().expect_success) {
1234    EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
1235  }
1236}
1237
1238INSTANTIATE_TEST_SUITE_P(
1239    FloatOverflow, FloatProxyParseOverflowFloatTest,
1240    ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
1241        {"0", true, 0.0f},
1242        {"0.0", true, 0.0f},
1243        {"1.0", true, 1.0f},
1244        {"1e38", true, 1e38f},
1245        {"-1e38", true, -1e38f},
1246        {"1e40", false, FLT_MAX},
1247        {"-1e40", false, -FLT_MAX},
1248        {"1e400", false, FLT_MAX},
1249        {"-1e400", false, -FLT_MAX},
1250    })));
1251
1252using FloatProxyParseOverflowDoubleTest =
1253    ::testing::TestWithParam<OverflowParseCase<double>>;
1254
1255TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
1256  std::istringstream input(GetParam().input);
1257  HexFloat<FloatProxy<double>> value(0.0);
1258  input >> value;
1259  EXPECT_NE(GetParam().expect_success, input.fail());
1260  if (GetParam().expect_success) {
1261    EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
1262  }
1263}
1264
1265INSTANTIATE_TEST_SUITE_P(
1266    DoubleOverflow, FloatProxyParseOverflowDoubleTest,
1267    ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
1268        {"0", true, 0.0},
1269        {"0.0", true, 0.0},
1270        {"1.0", true, 1.0},
1271        {"1e38", true, 1e38},
1272        {"-1e38", true, -1e38},
1273        {"1e40", true, 1e40},
1274        {"-1e40", true, -1e40},
1275        {"1e400", false, DBL_MAX},
1276        {"-1e400", false, -DBL_MAX},
1277    })));
1278
1279using FloatProxyParseOverflowFloat16Test =
1280    ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
1281
1282TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
1283  std::istringstream input(GetParam().input);
1284  HexFloat<FloatProxy<Float16>> value(0);
1285  input >> value;
1286  EXPECT_NE(GetParam().expect_success, input.fail())
1287      << " literal: " << GetParam().input;
1288  if (GetParam().expect_success) {
1289    EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
1290        << " literal: " << GetParam().input;
1291  }
1292}
1293
1294INSTANTIATE_TEST_SUITE_P(
1295    Float16Overflow, FloatProxyParseOverflowFloat16Test,
1296    ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
1297        {"0", true, uint16_t{0}},
1298        {"0.0", true, uint16_t{0}},
1299        {"1.0", true, uint16_t{0x3c00}},
1300        // Overflow for 16-bit float is an error, and returns max or
1301        // lowest value.
1302        {"1e38", false, uint16_t{0x7bff}},
1303        {"1e40", false, uint16_t{0x7bff}},
1304        {"1e400", false, uint16_t{0x7bff}},
1305        {"-1e38", false, uint16_t{0xfbff}},
1306        {"-1e40", false, uint16_t{0xfbff}},
1307        {"-1e400", false, uint16_t{0xfbff}},
1308    })));
1309
1310TEST(FloatProxy, Max) {
1311  EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
1312              Eq(uint16_t{0x7bff}));
1313  EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
1314              Eq(std::numeric_limits<float>::max()));
1315  EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
1316              Eq(std::numeric_limits<double>::max()));
1317}
1318
1319TEST(FloatProxy, Lowest) {
1320  EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
1321              Eq(uint16_t{0xfbff}));
1322  EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
1323              Eq(std::numeric_limits<float>::lowest()));
1324  EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
1325              Eq(std::numeric_limits<double>::lowest()));
1326}
1327
1328template <typename T>
1329struct StreamParseCase {
1330  StreamParseCase(const std::string& lit, bool succ, const std::string& suffix,
1331                  T value)
1332      : literal(lit),
1333        expect_success(succ),
1334        expected_suffix(suffix),
1335        expected_value(HexFloat<FloatProxy<T>>(value)) {}
1336
1337  std::string literal;
1338  bool expect_success;
1339  std::string expected_suffix;
1340  HexFloat<FloatProxy<T>> expected_value;
1341};
1342
1343template <typename T>
1344std::ostream& operator<<(std::ostream& os, const StreamParseCase<T>& fspc) {
1345  os << "StreamParseCase(" << fspc.literal
1346     << ", expect_success:" << int(fspc.expect_success) << ","
1347     << fspc.expected_suffix << "," << fspc.expected_value << ")";
1348  return os;
1349}
1350
1351using Float32StreamParseTest = ::testing::TestWithParam<StreamParseCase<float>>;
1352using Float16StreamParseTest =
1353    ::testing::TestWithParam<StreamParseCase<Float16>>;
1354
1355TEST_P(Float32StreamParseTest, Samples) {
1356  std::stringstream input(GetParam().literal);
1357  HexFloat<FloatProxy<float>> parsed_value(0.0f);
1358  // Hex floats must be read with the stream input operator.
1359  input >> parsed_value;
1360  if (GetParam().expect_success) {
1361    EXPECT_FALSE(input.fail());
1362    std::string suffix;
1363    input >> suffix;
1364    // EXPECT_EQ(suffix, GetParam().expected_suffix);
1365    EXPECT_EQ(parsed_value.value().getAsFloat(),
1366              GetParam().expected_value.value().getAsFloat());
1367  } else {
1368    EXPECT_TRUE(input.fail());
1369  }
1370}
1371
1372// Returns a Float16 constructed from its sign bit, unbiased exponent, and
1373// mantissa.
1374Float16 makeF16(int sign_bit, int unbiased_exp, int mantissa) {
1375  EXPECT_LE(0, sign_bit);
1376  EXPECT_LE(sign_bit, 1);
1377  // Exponent is 5 bits, with bias of 15.
1378  EXPECT_LE(-15, unbiased_exp);  // -15 means zero or subnormal
1379  EXPECT_LE(unbiased_exp, 16);   // 16 means infinity or NaN
1380  EXPECT_LE(0, mantissa);
1381  EXPECT_LE(mantissa, 0x3ff);
1382  const unsigned biased_exp = 15 + unbiased_exp;
1383  const uint32_t as_bits = sign_bit << 15 | (biased_exp << 10) | mantissa;
1384  EXPECT_LE(as_bits, 0xffffu);
1385  return Float16(static_cast<uint16_t>(as_bits));
1386}
1387
1388TEST_P(Float16StreamParseTest, Samples) {
1389  std::stringstream input(GetParam().literal);
1390  HexFloat<FloatProxy<Float16>> parsed_value(makeF16(0, 0, 0));
1391  // Hex floats must be read with the stream input operator.
1392  input >> parsed_value;
1393  if (GetParam().expect_success) {
1394    EXPECT_FALSE(input.fail());
1395    std::string suffix;
1396    input >> suffix;
1397    const auto got = parsed_value.value();
1398    const auto expected = GetParam().expected_value.value();
1399    EXPECT_EQ(got.data(), expected.data())
1400        << "got: " << got << " expected: " << expected;
1401  } else {
1402    EXPECT_TRUE(input.fail());
1403  }
1404}
1405
1406INSTANTIATE_TEST_SUITE_P(
1407    HexFloat32FillSignificantDigits, Float32StreamParseTest,
1408    ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
1409        {"0x123456p0", true, "", ldexpf(0x123456, 0)},
1410        // Patterns that fill all mantissa bits
1411        {"0x1.fffffep+23", true, "", ldexpf(0x1fffffe, -1)},
1412        {"0x1f.ffffep+19", true, "", ldexpf(0x1fffffe, -1)},
1413        {"0x1ff.fffep+15", true, "", ldexpf(0x1fffffe, -1)},
1414        {"0x1fff.ffep+11", true, "", ldexpf(0x1fffffe, -1)},
1415        {"0x1ffff.fep+7", true, "", ldexpf(0x1fffffe, -1)},
1416        {"0x1fffff.ep+3", true, "", ldexpf(0x1fffffe, -1)},
1417        {"0x1fffffe.p-1", true, "", ldexpf(0x1fffffe, -1)},
1418        {"0xffffff.p+0", true, "", ldexpf(0x1fffffe, -1)},
1419        {"0xffffff.p+0", true, "", ldexpf(0xffffff, 0)},
1420        // Now drop some bits in the middle
1421        {"0xa5a5a5.p+0", true, "", ldexpf(0xa5a5a5, 0)},
1422        {"0x5a5a5a.p+0", true, "", ldexpf(0x5a5a5a, 0)}}));
1423
1424INSTANTIATE_TEST_SUITE_P(
1425    HexFloat32ExcessSignificantDigits, Float32StreamParseTest,
1426    ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
1427        // Base cases
1428        {"0x1.fffffep0", true, "", ldexpf(0xffffff, -23)},
1429        {"0xa5a5a5p0", true, "", ldexpf(0xa5a5a5, 0)},
1430        {"0xa.5a5a5p+9", true, "", ldexpf(0xa5a5a5, -11)},
1431        {"0x5a5a5ap0", true, "", ldexpf(0x5a5a5a, 0)},
1432        {"0x5.a5a5ap+9", true, "", ldexpf(0x5a5a5a, -11)},
1433        // Truncate extra bits: zeroes
1434        {"0x1.fffffe0p0", true, "", ldexpf(0xffffff, -23)},
1435        {"0xa5a5a5000p0", true, "", ldexpf(0xa5a5a5, 12)},
1436        {"0xa.5a5a5000p+9", true, "", ldexpf(0xa5a5a5, -11)},
1437        {"0x5a5a5a000p0", true, "", ldexpf(0x5a5a5a, 12)},
1438        {"0x5.a5a5a000p+9", true, "", ldexpf(0x5a5a5a, -11)},
1439        // Truncate extra bits: ones
1440        {"0x1.ffffffp0",  // Extra bits in the last nibble
1441         true, "", ldexpf(0xffffff, -23)},
1442        {"0x1.fffffffp0", true, "", ldexpf(0xffffff, -23)},
1443        {"0xa5a5a5fffp0", true, "", ldexpf(0xa5a5a5, 12)},
1444        {"0xa.5a5a5fffp+9", true, "", ldexpf(0xa5a5a5, -11)},
1445        {"0x5a5a5afffp0",
1446         // The 5 nibble (0101), leads with 0, so the result can fit a leading
1447         // 1 bit , yielding 8 (1000).
1448         true, "", ldexpf(0x5a5a5a8, 8)},
1449        {"0x5.a5a5afffp+9", true, "", ldexpf(0x5a5a5a8, 8 - 32 + 9)}}));
1450
1451INSTANTIATE_TEST_SUITE_P(
1452    HexFloat32ExponentMissingDigits, Float32StreamParseTest,
1453    ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
1454        {"0x1.0p1", true, "", 2.0f},
1455        {"0x1.0p1a", true, "a", 2.0f},
1456        {"-0x1.0p1f", true, "f", -2.0f},
1457        {"0x1.0p", false, "", 0.0f},
1458        {"0x1.0pa", false, "", 0.0f},
1459        {"0x1.0p!", false, "", 0.0f},
1460        {"0x1.0p+", false, "", 0.0f},
1461        {"0x1.0p+a", false, "", 0.0f},
1462        {"0x1.0p+!", false, "", 0.0f},
1463        {"0x1.0p-", false, "", 0.0f},
1464        {"0x1.0p-a", false, "", 0.0f},
1465        {"0x1.0p-!", false, "", 0.0f},
1466        {"0x1.0p++", false, "", 0.0f},
1467        {"0x1.0p+-", false, "", 0.0f},
1468        {"0x1.0p-+", false, "", 0.0f},
1469        {"0x1.0p--", false, "", 0.0f}}));
1470
1471INSTANTIATE_TEST_SUITE_P(
1472    HexFloat32ExponentTrailingSign, Float32StreamParseTest,
1473    ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
1474        // Don't consume a sign after the binary exponent digits.
1475        {"0x1.0p1", true, "", 2.0f},
1476        {"0x1.0p1+", true, "+", 2.0f},
1477        {"0x1.0p1-", true, "-", 2.0f}}));
1478
1479INSTANTIATE_TEST_SUITE_P(
1480    HexFloat32PositiveExponentOverflow, Float32StreamParseTest,
1481    ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
1482        // Positive exponents
1483        {"0x1.0p1", true, "", 2.0f},       // fine, a normal number
1484        {"0x1.0p15", true, "", 32768.0f},  // fine, a normal number
1485        {"0x1.0p127", true, "", float(ldexp(1.0f, 127))},   // good large number
1486        {"0x0.8p128", true, "", float(ldexp(1.0f, 127))},   // good large number
1487        {"0x0.1p131", true, "", float(ldexp(1.0f, 127))},   // good large number
1488        {"0x0.01p135", true, "", float(ldexp(1.0f, 127))},  // good large number
1489        {"0x1.0p128", true, "", float(ldexp(1.0f, 128))},   // infinity
1490        {"0x1.0p4294967295", true, "", float(ldexp(1.0f, 128))},  // infinity
1491        {"0x1.0p5000000000", true, "", float(ldexp(1.0f, 128))},  // infinity
1492        {"0x0.0p5000000000", true, "", 0.0f},  // zero mantissa, zero result
1493    }));
1494
1495INSTANTIATE_TEST_SUITE_P(
1496    HexFloat32NegativeExponentOverflow, Float32StreamParseTest,
1497    ::testing::ValuesIn(std::vector<StreamParseCase<float>>{
1498        // Positive results, digits before '.'
1499        {"0x1.0p-126", true, "",
1500         float(ldexp(1.0f, -126))},  // fine, a small normal number
1501        {"0x1.0p-127", true, "", float(ldexp(1.0f, -127))},  // denorm number
1502        {"0x1.0p-149", true, "",
1503         float(ldexp(1.0f, -149))},  // smallest positive denormal
1504        {"0x0.8p-148", true, "",
1505         float(ldexp(1.0f, -149))},  // smallest positive denormal
1506        {"0x0.1p-145", true, "",
1507         float(ldexp(1.0f, -149))},  // smallest positive denormal
1508        {"0x0.01p-141", true, "",
1509         float(ldexp(1.0f, -149))},  // smallest positive denormal
1510
1511        // underflow rounds down to zero
1512        {"0x1.0p-150", true, "", 0.0f},
1513        {"0x1.0p-4294967296", true, "",
1514         0.0f},  // avoid exponent overflow in parser
1515        {"0x1.0p-5000000000", true, "",
1516         0.0f},  // avoid exponent overflow in parser
1517        {"0x0.0p-5000000000", true, "", 0.0f},  // zero mantissa, zero result
1518    }));
1519
1520INSTANTIATE_TEST_SUITE_P(
1521    HexFloat16ExcessSignificantDigits, Float16StreamParseTest,
1522    ::testing::ValuesIn(std::vector<StreamParseCase<Float16>>{
1523        // Zero
1524        {"0x1.c00p0", true, "", makeF16(0, 0, 0x300)},
1525        {"0x0p0", true, "", makeF16(0, -15, 0x0)},
1526        {"0x000.0000p0", true, "", makeF16(0, -15, 0x0)},
1527        // All leading 1s
1528        {"0x1p0", true, "", makeF16(0, 0, 0x0)},
1529        {"0x1.8p0", true, "", makeF16(0, 0, 0x200)},
1530        {"0x1.cp0", true, "", makeF16(0, 0, 0x300)},
1531        {"0x1.ep0", true, "", makeF16(0, 0, 0x380)},
1532        {"0x1.fp0", true, "", makeF16(0, 0, 0x3c0)},
1533        {"0x1.f8p0", true, "", makeF16(0, 0, 0x3e0)},
1534        {"0x1.fcp0", true, "", makeF16(0, 0, 0x3f0)},
1535        {"0x1.fep0", true, "", makeF16(0, 0, 0x3f8)},
1536        {"0x1.ffp0", true, "", makeF16(0, 0, 0x3fc)},
1537        // Fill trailing zeros to all significant places
1538        // that might be used for significant digits.
1539        {"0x1.ff8p0", true, "", makeF16(0, 0, 0x3fe)},
1540        {"0x1.ffcp0", true, "", makeF16(0, 0, 0x3ff)},
1541        {"0x1.800p0", true, "", makeF16(0, 0, 0x200)},
1542        {"0x1.c00p0", true, "", makeF16(0, 0, 0x300)},
1543        {"0x1.e00p0", true, "", makeF16(0, 0, 0x380)},
1544        {"0x1.f00p0", true, "", makeF16(0, 0, 0x3c0)},
1545        {"0x1.f80p0", true, "", makeF16(0, 0, 0x3e0)},
1546        {"0x1.fc0p0", true, "", makeF16(0, 0, 0x3f0)},
1547        {"0x1.fe0p0", true, "", makeF16(0, 0, 0x3f8)},
1548        {"0x1.ff0p0", true, "", makeF16(0, 0, 0x3fc)},
1549        {"0x1.ff8p0", true, "", makeF16(0, 0, 0x3fe)},
1550        {"0x1.ffcp0", true, "", makeF16(0, 0, 0x3ff)},
1551        // Add several trailing zeros
1552        {"0x1.c00000p0", true, "", makeF16(0, 0, 0x300)},
1553        {"0x1.e00000p0", true, "", makeF16(0, 0, 0x380)},
1554        {"0x1.f00000p0", true, "", makeF16(0, 0, 0x3c0)},
1555        {"0x1.f80000p0", true, "", makeF16(0, 0, 0x3e0)},
1556        {"0x1.fc0000p0", true, "", makeF16(0, 0, 0x3f0)},
1557        {"0x1.fe0000p0", true, "", makeF16(0, 0, 0x3f8)},
1558        {"0x1.ff0000p0", true, "", makeF16(0, 0, 0x3fc)},
1559        {"0x1.ff8000p0", true, "", makeF16(0, 0, 0x3fe)},
1560        {"0x1.ffcp0000", true, "", makeF16(0, 0, 0x3ff)},
1561        // Samples that drop out bits in the middle.
1562        //   5 = 0101    4 = 0100
1563        //   a = 1010    8 = 1000
1564        {"0x1.5a4p0", true, "", makeF16(0, 0, 0x169)},
1565        {"0x1.a58p0", true, "", makeF16(0, 0, 0x296)},
1566        // Samples that drop out bits *and* truncate significant bits
1567        // that can't be represented.
1568        {"0x1.5a40000p0", true, "", makeF16(0, 0, 0x169)},
1569        {"0x1.5a7ffffp0", true, "", makeF16(0, 0, 0x169)},
1570        {"0x1.a580000p0", true, "", makeF16(0, 0, 0x296)},
1571        {"0x1.a5bffffp0", true, "", makeF16(0, 0, 0x296)},
1572        // Try some negations.
1573        {"-0x0p0", true, "", makeF16(1, -15, 0x0)},
1574        {"-0x000.0000p0", true, "", makeF16(1, -15, 0x0)},
1575        {"-0x1.5a40000p0", true, "", makeF16(1, 0, 0x169)},
1576        {"-0x1.5a7ffffp0", true, "", makeF16(1, 0, 0x169)},
1577        {"-0x1.a580000p0", true, "", makeF16(1, 0, 0x296)},
1578        {"-0x1.a5bffffp0", true, "", makeF16(1, 0, 0x296)}}));
1579
1580INSTANTIATE_TEST_SUITE_P(
1581    HexFloat16IncreasingExponentsAndMantissa, Float16StreamParseTest,
1582    ::testing::ValuesIn(std::vector<StreamParseCase<Float16>>{
1583        // Zero
1584        {"0x0p0", true, "", makeF16(0, -15, 0x0)},
1585        {"0x0p5000000000000", true, "", makeF16(0, -15, 0x0)},
1586        {"-0x0p5000000000000", true, "", makeF16(1, -15, 0x0)},
1587        // Leading 1
1588        {"0x1p0", true, "", makeF16(0, 0, 0x0)},
1589        {"0x1p1", true, "", makeF16(0, 1, 0x0)},
1590        {"0x1p16", true, "", makeF16(0, 16, 0x0)},
1591        {"0x1p-1", true, "", makeF16(0, -1, 0x0)},
1592        {"0x1p-14", true, "", makeF16(0, -14, 0x0)},
1593        // Leading 2
1594        {"0x2p0", true, "", makeF16(0, 1, 0x0)},
1595        {"0x2p1", true, "", makeF16(0, 2, 0x0)},
1596        {"0x2p15", true, "", makeF16(0, 16, 0x0)},
1597        {"0x2p-1", true, "", makeF16(0, 0, 0x0)},
1598        {"0x2p-15", true, "", makeF16(0, -14, 0x0)},
1599        // Leading 8
1600        {"0x8p0", true, "", makeF16(0, 3, 0x0)},
1601        {"0x8p1", true, "", makeF16(0, 4, 0x0)},
1602        {"0x8p13", true, "", makeF16(0, 16, 0x0)},
1603        {"0x8p-3", true, "", makeF16(0, 0, 0x0)},
1604        {"0x8p-17", true, "", makeF16(0, -14, 0x0)},
1605        // Leading 10
1606        {"0x10.0p0", true, "", makeF16(0, 4, 0x0)},
1607        {"0x10.0p1", true, "", makeF16(0, 5, 0x0)},
1608        {"0x10.0p12", true, "", makeF16(0, 16, 0x0)},
1609        {"0x10.0p-5", true, "", makeF16(0, -1, 0x0)},
1610        {"0x10.0p-18", true, "", makeF16(0, -14, 0x0)},
1611        // Samples that drop out bits *and* truncate significant bits
1612        // that can't be represented.
1613        // Progressively increase the leading digit.
1614        {"0x1.5a40000p0", true, "", makeF16(0, 0, 0x169)},
1615        {"0x1.5a7ffffp0", true, "", makeF16(0, 0, 0x169)},
1616        {"0x2.5a40000p0", true, "", makeF16(0, 1, 0x0b4)},
1617        {"0x2.5a7ffffp0", true, "", makeF16(0, 1, 0x0b4)},
1618        {"0x4.5a40000p0", true, "", makeF16(0, 2, 0x05a)},
1619        {"0x4.5a7ffffp0", true, "", makeF16(0, 2, 0x05a)},
1620        {"0x8.5a40000p0", true, "", makeF16(0, 3, 0x02d)},
1621        {"0x8.5a7ffffp0", true, "", makeF16(0, 3, 0x02d)}}));
1622
1623}  // namespace
1624}  // namespace utils
1625}  // namespace spvtools
1626