1// Copyright (c) 2015-2016 The Khronos Group Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include <cfloat> 16#include <cmath> 17#include <cstdio> 18#include <limits> 19#include <sstream> 20#include <string> 21#include <tuple> 22#include <utility> 23#include <vector> 24 25#include "gmock/gmock.h" 26#include "source/util/hex_float.h" 27#include "test/unit_spirv.h" 28 29namespace spvtools { 30namespace utils { 31namespace { 32 33using ::testing::Eq; 34 35// In this file "encode" means converting a number into a string, 36// and "decode" means converting a string into a number. 37 38using HexFloatTest = 39 ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>; 40using DecodeHexFloatTest = 41 ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>; 42using HexDoubleTest = 43 ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>; 44using DecodeHexDoubleTest = 45 ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>; 46using RoundTripFloatTest = ::testing::TestWithParam<float>; 47using RoundTripDoubleTest = ::testing::TestWithParam<double>; 48 49// Hex-encodes a float value. 50template <typename T> 51std::string EncodeViaHexFloat(const T& value) { 52 std::stringstream ss; 53 ss << HexFloat<T>(value); 54 return ss.str(); 55} 56 57// The following two tests can't be DRY because they take different parameter 58// types. 59 60TEST_P(HexFloatTest, EncodeCorrectly) { 61 EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second)); 62} 63 64TEST_P(HexDoubleTest, EncodeCorrectly) { 65 EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second)); 66} 67 68// Decodes a hex-float string. 69template <typename T> 70FloatProxy<T> Decode(const std::string& str) { 71 HexFloat<FloatProxy<T>> decoded(0.f); 72 EXPECT_TRUE((std::stringstream(str) >> decoded).eof()); 73 return decoded.value(); 74} 75 76TEST_P(HexFloatTest, DecodeCorrectly) { 77 EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first)); 78} 79 80TEST_P(HexDoubleTest, DecodeCorrectly) { 81 EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first)); 82} 83 84INSTANTIATE_TEST_SUITE_P( 85 Float32Tests, HexFloatTest, 86 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({ 87 {0.f, "0x0p+0"}, 88 {1.f, "0x1p+0"}, 89 {2.f, "0x1p+1"}, 90 {3.f, "0x1.8p+1"}, 91 {0.5f, "0x1p-1"}, 92 {0.25f, "0x1p-2"}, 93 {0.75f, "0x1.8p-1"}, 94 {-0.f, "-0x0p+0"}, 95 {-1.f, "-0x1p+0"}, 96 {-0.5f, "-0x1p-1"}, 97 {-0.25f, "-0x1p-2"}, 98 {-0.75f, "-0x1.8p-1"}, 99 100 // Larger numbers 101 {512.f, "0x1p+9"}, 102 {-512.f, "-0x1p+9"}, 103 {1024.f, "0x1p+10"}, 104 {-1024.f, "-0x1p+10"}, 105 {1024.f + 8.f, "0x1.02p+10"}, 106 {-1024.f - 8.f, "-0x1.02p+10"}, 107 108 // Small numbers 109 {1.0f / 512.f, "0x1p-9"}, 110 {1.0f / -512.f, "-0x1p-9"}, 111 {1.0f / 1024.f, "0x1p-10"}, 112 {1.0f / -1024.f, "-0x1p-10"}, 113 {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"}, 114 {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"}, 115 116 // lowest non-denorm 117 {float(ldexp(1.0f, -126)), "0x1p-126"}, 118 {float(ldexp(-1.0f, -126)), "-0x1p-126"}, 119 120 // Denormalized values 121 {float(ldexp(1.0f, -127)), "0x1p-127"}, 122 {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"}, 123 {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"}, 124 {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"}, 125 {float(ldexp(-1.0f, -127)), "-0x1p-127"}, 126 {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"}, 127 {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"}, 128 {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"}, 129 130 {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"}, 131 {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)), 132 "0x1.8p-128"}, 133 134 }))); 135 136INSTANTIATE_TEST_SUITE_P( 137 Float32NanTests, HexFloatTest, 138 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({ 139 // Various NAN and INF cases 140 {uint32_t(0xFF800000), "-0x1p+128"}, // -inf 141 {uint32_t(0x7F800000), "0x1p+128"}, // inf 142 {uint32_t(0xFFC00000), "-0x1.8p+128"}, // -nan 143 {uint32_t(0xFF800100), "-0x1.0002p+128"}, // -nan 144 {uint32_t(0xFF800c00), "-0x1.0018p+128"}, // -nan 145 {uint32_t(0xFF80F000), "-0x1.01ep+128"}, // -nan 146 {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"}, // -nan 147 {uint32_t(0x7FC00000), "0x1.8p+128"}, // +nan 148 {uint32_t(0x7F800100), "0x1.0002p+128"}, // +nan 149 {uint32_t(0x7f800c00), "0x1.0018p+128"}, // +nan 150 {uint32_t(0x7F80F000), "0x1.01ep+128"}, // +nan 151 {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"}, // +nan 152 }))); 153 154INSTANTIATE_TEST_SUITE_P( 155 Float64Tests, HexDoubleTest, 156 ::testing::ValuesIn( 157 std::vector<std::pair<FloatProxy<double>, std::string>>({ 158 {0., "0x0p+0"}, 159 {1., "0x1p+0"}, 160 {2., "0x1p+1"}, 161 {3., "0x1.8p+1"}, 162 {0.5, "0x1p-1"}, 163 {0.25, "0x1p-2"}, 164 {0.75, "0x1.8p-1"}, 165 {-0., "-0x0p+0"}, 166 {-1., "-0x1p+0"}, 167 {-0.5, "-0x1p-1"}, 168 {-0.25, "-0x1p-2"}, 169 {-0.75, "-0x1.8p-1"}, 170 171 // Larger numbers 172 {512., "0x1p+9"}, 173 {-512., "-0x1p+9"}, 174 {1024., "0x1p+10"}, 175 {-1024., "-0x1p+10"}, 176 {1024. + 8., "0x1.02p+10"}, 177 {-1024. - 8., "-0x1.02p+10"}, 178 179 // Large outside the range of normal floats 180 {ldexp(1.0, 128), "0x1p+128"}, 181 {ldexp(1.0, 129), "0x1p+129"}, 182 {ldexp(-1.0, 128), "-0x1p+128"}, 183 {ldexp(-1.0, 129), "-0x1p+129"}, 184 {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"}, 185 {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"}, 186 {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"}, 187 {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"}, 188 189 // Small numbers 190 {1.0 / 512., "0x1p-9"}, 191 {1.0 / -512., "-0x1p-9"}, 192 {1.0 / 1024., "0x1p-10"}, 193 {1.0 / -1024., "-0x1p-10"}, 194 {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"}, 195 {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"}, 196 197 // Small outside the range of normal floats 198 {ldexp(1.0, -128), "0x1p-128"}, 199 {ldexp(1.0, -129), "0x1p-129"}, 200 {ldexp(-1.0, -128), "-0x1p-128"}, 201 {ldexp(-1.0, -129), "-0x1p-129"}, 202 {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"}, 203 {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"}, 204 {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"}, 205 {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"}, 206 207 // lowest non-denorm 208 {ldexp(1.0, -1022), "0x1p-1022"}, 209 {ldexp(-1.0, -1022), "-0x1p-1022"}, 210 211 // Denormalized values 212 {ldexp(1.0, -1023), "0x1p-1023"}, 213 {ldexp(1.0, -1023) / 2.0, "0x1p-1024"}, 214 {ldexp(1.0, -1023) / 4.0, "0x1p-1025"}, 215 {ldexp(1.0, -1023) / 8.0, "0x1p-1026"}, 216 {ldexp(-1.0, -1024), "-0x1p-1024"}, 217 {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"}, 218 {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"}, 219 {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"}, 220 221 {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"}, 222 {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0), 223 "0x1.8p-1024"}, 224 225 }))); 226 227INSTANTIATE_TEST_SUITE_P( 228 Float64NanTests, HexDoubleTest, 229 ::testing::ValuesIn(std::vector< 230 std::pair<FloatProxy<double>, std::string>>({ 231 // Various NAN and INF cases 232 {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"}, // -inf 233 {uint64_t(0x7FF0000000000000LL), "0x1p+1024"}, // +inf 234 {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"}, // -nan 235 {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, // -nan 236 {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"}, // -nan 237 {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"}, // -nan 238 {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"}, // -nan 239 {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, // +nan 240 {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"}, // +nan 241 {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"}, // -nan 242 {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"}, // -nan 243 {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"}, // -nan 244 }))); 245 246// Tests that encoding a value and decoding it again restores 247// the same value. 248TEST_P(RoundTripFloatTest, CanStoreAccurately) { 249 std::stringstream ss; 250 ss << FloatProxy<float>(GetParam()); 251 ss.seekg(0); 252 FloatProxy<float> res; 253 ss >> res; 254 EXPECT_THAT(GetParam(), Eq(res.getAsFloat())); 255} 256 257TEST_P(RoundTripDoubleTest, CanStoreAccurately) { 258 std::stringstream ss; 259 ss << FloatProxy<double>(GetParam()); 260 ss.seekg(0); 261 FloatProxy<double> res; 262 ss >> res; 263 EXPECT_THAT(GetParam(), Eq(res.getAsFloat())); 264} 265 266INSTANTIATE_TEST_SUITE_P( 267 Float32StoreTests, RoundTripFloatTest, 268 ::testing::ValuesIn(std::vector<float>( 269 {// Value requiring more than 6 digits of precision to be 270 // represented accurately. 271 3.0000002f}))); 272 273INSTANTIATE_TEST_SUITE_P( 274 Float64StoreTests, RoundTripDoubleTest, 275 ::testing::ValuesIn(std::vector<double>( 276 {// Value requiring more than 15 digits of precision to be 277 // represented accurately. 278 1.5000000000000002}))); 279 280TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) { 281 std::stringstream s; 282 s << std::setw(4) << std::oct << std::setfill('x') << 8 << " " 283 << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9; 284 EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11"))); 285} 286 287TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) { 288 std::stringstream s; 289 s << std::setw(4) << std::oct << std::setfill('x') << 8 << " " 290 << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4) 291 << 9; 292 EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11"))); 293} 294 295TEST_P(DecodeHexFloatTest, DecodeCorrectly) { 296 EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second)); 297} 298 299TEST_P(DecodeHexDoubleTest, DecodeCorrectly) { 300 EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second)); 301} 302 303INSTANTIATE_TEST_SUITE_P( 304 Float32DecodeTests, DecodeHexFloatTest, 305 ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({ 306 {"0x0p+000", 0.f}, 307 {"0x0p0", 0.f}, 308 {"0x0p-0", 0.f}, 309 310 // flush to zero cases 311 {"0x1p-500", 0.f}, // Exponent underflows. 312 {"-0x1p-500", -0.f}, 313 {"0x0.00000000001p-126", 0.f}, // Fraction causes underflow. 314 {"-0x0.0000000001p-127", -0.f}, 315 {"-0x0.01p-142", -0.f}, // Fraction causes additional underflow. 316 {"0x0.01p-142", 0.f}, 317 318 // Some floats that do not encode the same way as they decode. 319 {"0x2p+0", 2.f}, 320 {"0xFFp+0", 255.f}, 321 {"0x0.8p+0", 0.5f}, 322 {"0x0.4p+0", 0.25f}, 323 }))); 324 325INSTANTIATE_TEST_SUITE_P( 326 Float32DecodeInfTests, DecodeHexFloatTest, 327 ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({ 328 // inf cases 329 {"-0x1p+128", uint32_t(0xFF800000)}, // -inf 330 {"0x32p+127", uint32_t(0x7F800000)}, // inf 331 {"0x32p+500", uint32_t(0x7F800000)}, // inf 332 {"-0x32p+127", uint32_t(0xFF800000)}, // -inf 333 }))); 334 335INSTANTIATE_TEST_SUITE_P( 336 Float64DecodeTests, DecodeHexDoubleTest, 337 ::testing::ValuesIn( 338 std::vector<std::pair<std::string, FloatProxy<double>>>({ 339 {"0x0p+000", 0.}, 340 {"0x0p0", 0.}, 341 {"0x0p-0", 0.}, 342 343 // flush to zero cases 344 {"0x1p-5000", 0.}, // Exponent underflows. 345 {"-0x1p-5000", -0.}, 346 {"0x0.0000000000000001p-1023", 0.}, // Fraction causes underflow. 347 {"-0x0.000000000000001p-1024", -0.}, 348 {"-0x0.01p-1090", -0.f}, // Fraction causes additional underflow. 349 {"0x0.01p-1090", 0.}, 350 351 // Some floats that do not encode the same way as they decode. 352 {"0x2p+0", 2.}, 353 {"0xFFp+0", 255.}, 354 {"0x0.8p+0", 0.5}, 355 {"0x0.4p+0", 0.25}, 356 }))); 357 358INSTANTIATE_TEST_SUITE_P( 359 Float64DecodeInfTests, DecodeHexDoubleTest, 360 ::testing::ValuesIn( 361 std::vector<std::pair<std::string, FloatProxy<double>>>({ 362 // inf cases 363 {"-0x1p+1024", uint64_t(0xFFF0000000000000)}, // -inf 364 {"0x32p+1023", uint64_t(0x7FF0000000000000)}, // inf 365 {"0x32p+5000", uint64_t(0x7FF0000000000000)}, // inf 366 {"-0x32p+1023", uint64_t(0xFFF0000000000000)}, // -inf 367 }))); 368 369TEST(FloatProxy, ValidConversion) { 370 EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f)); 371 EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f)); 372 EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f)); 373 EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f)); 374 EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f)); 375 EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f)); 376 377 EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat())); 378 EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat())); 379 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat())); 380 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat())); 381 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat())); 382 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat())); 383 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat())); 384 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat())); 385 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat())); 386 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat())); 387 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat())); 388 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat())); 389 390 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u)); 391 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u)); 392 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u)); 393 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u)); 394 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u)); 395 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u)); 396 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu)); 397 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u)); 398 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u)); 399 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u)); 400 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u)); 401 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu)); 402} 403 404TEST(FloatProxy, Nan) { 405 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan()); 406 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan()); 407 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan()); 408 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan()); 409 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan()); 410 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan()); 411 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan()); 412 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan()); 413 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan()); 414 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan()); 415} 416 417TEST(FloatProxy, Negation) { 418 EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f)); 419 EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f)); 420 421 EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f)); 422 EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f)); 423 424 EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f)); 425 EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f)); 426 427 EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f)); 428 EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f)); 429 430 EXPECT_THAT( 431 (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(), 432 Eq(-std::numeric_limits<float>::infinity())); 433 EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity())) 434 .getAsFloat(), 435 Eq(std::numeric_limits<float>::infinity())); 436} 437 438// Test conversion of FloatProxy values to strings. 439// 440// In previous cases, we always wrapped the FloatProxy value in a HexFloat 441// before conversion to a string. In the following cases, the FloatProxy 442// decides for itself whether to print as a regular number or as a hex float. 443 444using FloatProxyFloatTest = 445 ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>; 446using FloatProxyDoubleTest = 447 ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>; 448 449// Converts a float value to a string via a FloatProxy. 450template <typename T> 451std::string EncodeViaFloatProxy(const T& value) { 452 std::stringstream ss; 453 ss << value; 454 return ss.str(); 455} 456 457// Converts a floating point string so that the exponent prefix 458// is 'e', and the exponent value does not have leading zeros. 459// The Microsoft runtime library likes to write things like "2.5E+010". 460// Convert that to "2.5e+10". 461// We don't care what happens to strings that are not floating point 462// strings. 463std::string NormalizeExponentInFloatString(std::string in) { 464 std::string result; 465 // Reserve one spot for the terminating null, even when the sscanf fails. 466 std::vector<char> prefix(in.size() + 1); 467 char e; 468 char plus_or_minus; 469 int exponent; // in base 10 470 if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e, 471 &plus_or_minus, &exponent)) && 472 (e == 'e' || e == 'E') && 473 (plus_or_minus == '-' || plus_or_minus == '+')) { 474 // It looks like a floating point value with exponent. 475 std::stringstream out; 476 out << prefix.data() << 'e' << plus_or_minus << exponent; 477 result = out.str(); 478 } else { 479 result = in; 480 } 481 return result; 482} 483 484TEST(NormalizeFloat, Sample) { 485 EXPECT_THAT(NormalizeExponentInFloatString(""), Eq("")); 486 EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12")); 487 EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14")); 488 EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12")); 489 EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14")); 490} 491 492// The following two tests can't be DRY because they take different parameter 493// types. 494TEST_P(FloatProxyFloatTest, EncodeCorrectly) { 495 EXPECT_THAT( 496 NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)), 497 Eq(GetParam().second)); 498} 499 500TEST_P(FloatProxyDoubleTest, EncodeCorrectly) { 501 EXPECT_THAT( 502 NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)), 503 Eq(GetParam().second)); 504} 505 506INSTANTIATE_TEST_SUITE_P( 507 Float32Tests, FloatProxyFloatTest, 508 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({ 509 // Zero 510 {0.f, "0"}, 511 // Normal numbers 512 {1.f, "1"}, 513 {-0.25f, "-0.25"}, 514 {1000.0f, "1000"}, 515 516 // Still normal numbers, but with large magnitude exponents. 517 {float(ldexp(1.f, 126)), "8.50705917e+37"}, 518 {float(ldexp(-1.f, -126)), "-1.17549435e-38"}, 519 520 // denormalized values are printed as hex floats. 521 {float(ldexp(1.0f, -127)), "0x1p-127"}, 522 {float(ldexp(1.5f, -128)), "0x1.8p-128"}, 523 {float(ldexp(1.25, -129)), "0x1.4p-129"}, 524 {float(ldexp(1.125, -130)), "0x1.2p-130"}, 525 {float(ldexp(-1.0f, -127)), "-0x1p-127"}, 526 {float(ldexp(-1.0f, -128)), "-0x1p-128"}, 527 {float(ldexp(-1.0f, -129)), "-0x1p-129"}, 528 {float(ldexp(-1.5f, -130)), "-0x1.8p-130"}, 529 530 // NaNs 531 {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"}, 532 {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"}, 533 534 {std::numeric_limits<float>::infinity(), "0x1p+128"}, 535 {-std::numeric_limits<float>::infinity(), "-0x1p+128"}, 536 }))); 537 538INSTANTIATE_TEST_SUITE_P( 539 Float64Tests, FloatProxyDoubleTest, 540 ::testing::ValuesIn( 541 std::vector<std::pair<FloatProxy<double>, std::string>>({ 542 {0., "0"}, 543 {1., "1"}, 544 {-0.25, "-0.25"}, 545 {1000.0, "1000"}, 546 547 // Large outside the range of normal floats 548 {ldexp(1.0, 128), "3.4028236692093846e+38"}, 549 {ldexp(1.5, 129), "1.0208471007628154e+39"}, 550 {ldexp(-1.0, 128), "-3.4028236692093846e+38"}, 551 {ldexp(-1.5, 129), "-1.0208471007628154e+39"}, 552 553 // Small outside the range of normal floats 554 {ldexp(1.5, -129), "2.2040519077917891e-39"}, 555 {ldexp(-1.5, -129), "-2.2040519077917891e-39"}, 556 557 // lowest non-denorm 558 {ldexp(1.0, -1022), "2.2250738585072014e-308"}, 559 {ldexp(-1.0, -1022), "-2.2250738585072014e-308"}, 560 561 // Denormalized values 562 {ldexp(1.125, -1023), "0x1.2p-1023"}, 563 {ldexp(-1.375, -1024), "-0x1.6p-1024"}, 564 565 // NaNs 566 {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, 567 {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, 568 569 // Infinity 570 {std::numeric_limits<double>::infinity(), "0x1p+1024"}, 571 {-std::numeric_limits<double>::infinity(), "-0x1p+1024"}, 572 573 }))); 574 575// double is used so that unbiased_exponent can be used with the output 576// of ldexp directly. 577int32_t unbiased_exponent(double f) { 578 return HexFloat<FloatProxy<float>>(static_cast<float>(f)) 579 .getUnbiasedNormalizedExponent(); 580} 581 582int16_t unbiased_half_exponent(uint16_t f) { 583 return HexFloat<FloatProxy<Float16>>(f).getUnbiasedNormalizedExponent(); 584} 585 586TEST(HexFloatOperationTest, UnbiasedExponent) { 587 // Float cases 588 EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0))); 589 EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32))); 590 EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42))); 591 EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125))); 592 593 EXPECT_EQ(128, 594 HexFloat<FloatProxy<float>>(std::numeric_limits<float>::infinity()) 595 .getUnbiasedNormalizedExponent()); 596 597 EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100))); 598 EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm 599 EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128))); 600 EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129))); 601 EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140))); 602 // Smallest representable number 603 EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23))); 604 // Should get rounded to 0 first. 605 EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23))); 606 607 // Float16 cases 608 // The exponent is represented in the bits 0x7C00 609 // The offset is -15 610 EXPECT_EQ(0, unbiased_half_exponent(0x3C00)); 611 EXPECT_EQ(3, unbiased_half_exponent(0x4800)); 612 EXPECT_EQ(-1, unbiased_half_exponent(0x3800)); 613 EXPECT_EQ(-14, unbiased_half_exponent(0x0400)); 614 EXPECT_EQ(16, unbiased_half_exponent(0x7C00)); 615 EXPECT_EQ(10, unbiased_half_exponent(0x6400)); 616 617 // Smallest representable number 618 EXPECT_EQ(-24, unbiased_half_exponent(0x0001)); 619} 620 621// Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions 622float float_fractions(const std::vector<uint32_t>& fractions) { 623 float f = 0; 624 for (int32_t i : fractions) { 625 f += std::ldexp(1.0f, -i); 626 } 627 return f; 628} 629 630// Returns the normalized significand of a HexFloat<FloatProxy<float>> 631// that was created by calling float_fractions with the input fractions, 632// raised to the power of exp. 633uint32_t normalized_significand(const std::vector<uint32_t>& fractions, 634 uint32_t exp) { 635 return HexFloat<FloatProxy<float>>( 636 static_cast<float>(ldexp(float_fractions(fractions), exp))) 637 .getNormalizedSignificand(); 638} 639 640// Sets the bits from MSB to LSB of the significand part of a float. 641// For example 0 would set the bit 23 (counting from LSB to MSB), 642// and 1 would set the 22nd bit. 643uint32_t bits_set(const std::vector<uint32_t>& bits) { 644 const uint32_t top_bit = 1u << 22u; 645 uint32_t val = 0; 646 for (uint32_t i : bits) { 647 val |= top_bit >> i; 648 } 649 return val; 650} 651 652// The same as bits_set but for a Float16 value instead of 32-bit floating 653// point. 654uint16_t half_bits_set(const std::vector<uint32_t>& bits) { 655 const uint32_t top_bit = 1u << 9u; 656 uint32_t val = 0; 657 for (uint32_t i : bits) { 658 val |= top_bit >> i; 659 } 660 return static_cast<uint16_t>(val); 661} 662 663TEST(HexFloatOperationTest, NormalizedSignificand) { 664 // For normalized numbers (the following) it should be a simple matter 665 // of getting rid of the top implicit bit 666 EXPECT_EQ(bits_set({}), normalized_significand({0}, 0)); 667 EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0)); 668 EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0)); 669 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0)); 670 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32)); 671 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126)); 672 673 // For denormalized numbers we expect the normalized significand to 674 // shift as if it were normalized. This means, in practice that the 675 // top_most set bit will be cut off. Looks very similar to above (on purpose) 676 EXPECT_EQ(bits_set({}), 677 normalized_significand({0}, static_cast<uint32_t>(-127))); 678 EXPECT_EQ(bits_set({3}), 679 normalized_significand({0, 4}, static_cast<uint32_t>(-128))); 680 EXPECT_EQ(bits_set({3}), 681 normalized_significand({0, 4}, static_cast<uint32_t>(-127))); 682 EXPECT_EQ(bits_set({}), 683 normalized_significand({22}, static_cast<uint32_t>(-127))); 684 EXPECT_EQ(bits_set({0}), 685 normalized_significand({21, 22}, static_cast<uint32_t>(-127))); 686} 687 688// Returns the 32-bit floating point value created by 689// calling setFromSignUnbiasedExponentAndNormalizedSignificand 690// on a HexFloat<FloatProxy<float>> 691float set_from_sign(bool negative, int32_t unbiased_exponent, 692 uint32_t significand, bool round_denorm_up) { 693 HexFloat<FloatProxy<float>> f(0.f); 694 f.setFromSignUnbiasedExponentAndNormalizedSignificand( 695 negative, unbiased_exponent, significand, round_denorm_up); 696 return f.value().getAsFloat(); 697} 698 699TEST(HexFloatOperationTests, 700 SetFromSignUnbiasedExponentAndNormalizedSignificand) { 701 EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false)); 702 703 // Tests insertion of various denormalized numbers with and without round up. 704 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), 705 set_from_sign(false, -149, 0, false)); 706 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), 707 set_from_sign(false, -149, 0, true)); 708 EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false)); 709 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), 710 set_from_sign(false, -150, 1, true)); 711 712 EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false)); 713 EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false)); 714 EXPECT_EQ(float_fractions({0, 1, 2, 5}), 715 set_from_sign(false, 0, bits_set({0, 1, 4}), false)); 716 EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32), 717 set_from_sign(false, -32, bits_set({0, 1, 4}), false)); 718 EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128), 719 set_from_sign(false, -128, bits_set({0, 1, 4}), false)); 720 721 // The negative cases from above. 722 EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false)); 723 EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false)); 724 EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false)); 725 EXPECT_EQ(-float_fractions({0, 1, 2, 5}), 726 set_from_sign(true, 0, bits_set({0, 1, 4}), false)); 727 EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32), 728 set_from_sign(true, -32, bits_set({0, 1, 4}), false)); 729 EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128), 730 set_from_sign(true, -128, bits_set({0, 1, 4}), false)); 731} 732 733TEST(HexFloatOperationTests, NonRounding) { 734 // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial, 735 // except in the denorm case which is a bit more complex. 736 using HF = HexFloat<FloatProxy<float>>; 737 bool carry_bit = false; 738 739 round_direction rounding[] = {round_direction::kToZero, 740 round_direction::kToNearestEven, 741 round_direction::kToPositiveInfinity, 742 round_direction::kToNegativeInfinity}; 743 744 // Everything fits, so this should be straight-forward 745 for (round_direction round : rounding) { 746 EXPECT_EQ(bits_set({}), 747 HF(0.f).getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 748 EXPECT_FALSE(carry_bit); 749 750 EXPECT_EQ(bits_set({0}), 751 HF(float_fractions({0, 1})) 752 .getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 753 EXPECT_FALSE(carry_bit); 754 755 EXPECT_EQ(bits_set({1, 3}), 756 HF(float_fractions({0, 2, 4})) 757 .getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 758 EXPECT_FALSE(carry_bit); 759 760 EXPECT_EQ( 761 bits_set({0, 1, 4}), 762 HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128))) 763 .getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 764 EXPECT_FALSE(carry_bit); 765 766 EXPECT_EQ(bits_set({0, 1, 4, 22}), 767 HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23}))) 768 .getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 769 EXPECT_FALSE(carry_bit); 770 } 771} 772 773using RD = round_direction; 774struct RoundSignificandCase { 775 float source_float; 776 std::pair<int16_t, bool> expected_results; 777 round_direction round; 778}; 779 780using HexFloatRoundTest = ::testing::TestWithParam<RoundSignificandCase>; 781 782TEST_P(HexFloatRoundTest, RoundDownToFP16) { 783 using HF = HexFloat<FloatProxy<float>>; 784 using HF16 = HexFloat<FloatProxy<Float16>>; 785 786 HF input_value(GetParam().source_float); 787 bool carry_bit = false; 788 EXPECT_EQ(GetParam().expected_results.first, 789 input_value.getRoundedNormalizedSignificand<HF16>(GetParam().round, 790 &carry_bit)); 791 EXPECT_EQ(carry_bit, GetParam().expected_results.second); 792} 793 794// clang-format off 795INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatRoundTest, 796 ::testing::ValuesIn(std::vector<RoundSignificandCase>( 797 { 798 {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToZero}, 799 {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNearestEven}, 800 {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToPositiveInfinity}, 801 {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNegativeInfinity}, 802 {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), RD::kToZero}, 803 804 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToZero}, 805 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity}, 806 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity}, 807 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNearestEven}, 808 809 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToZero}, 810 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToPositiveInfinity}, 811 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity}, 812 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToNearestEven}, 813 814 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero}, 815 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity}, 816 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity}, 817 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven}, 818 819 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero}, 820 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToPositiveInfinity}, 821 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity}, 822 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven}, 823 824 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToZero}, 825 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity}, 826 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity}, 827 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven}, 828 829 // Carries 830 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToZero}, 831 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToPositiveInfinity}, 832 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToNegativeInfinity}, 833 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToNearestEven}, 834 835 // Cases where original number was denorm. Note: this should have no effect 836 // the number is pre-normalized. 837 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), RD::kToZero}, 838 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity}, 839 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity}, 840 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven}, 841 }))); 842// clang-format on 843 844struct UpCastSignificandCase { 845 uint16_t source_half; 846 uint32_t expected_result; 847}; 848 849using HexFloatRoundUpSignificandTest = 850 ::testing::TestWithParam<UpCastSignificandCase>; 851TEST_P(HexFloatRoundUpSignificandTest, Widening) { 852 using HF = HexFloat<FloatProxy<float>>; 853 using HF16 = HexFloat<FloatProxy<Float16>>; 854 bool carry_bit = false; 855 856 round_direction rounding[] = {round_direction::kToZero, 857 round_direction::kToNearestEven, 858 round_direction::kToPositiveInfinity, 859 round_direction::kToNegativeInfinity}; 860 861 // Everything fits, so everything should just be bit-shifts. 862 for (round_direction round : rounding) { 863 carry_bit = false; 864 HF16 input_value(GetParam().source_half); 865 EXPECT_EQ( 866 GetParam().expected_result, 867 input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)) 868 << std::hex << "0x" 869 << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit) 870 << " 0x" << GetParam().expected_result; 871 EXPECT_FALSE(carry_bit); 872 } 873} 874 875INSTANTIATE_TEST_SUITE_P( 876 F16toF32, HexFloatRoundUpSignificandTest, 877 // 0xFC00 of the source 16-bit hex value cover the sign and the exponent. 878 // They are ignored for this test. 879 ::testing::ValuesIn(std::vector<UpCastSignificandCase>({ 880 {0x3F00, 0x600000}, 881 {0x0F00, 0x600000}, 882 {0x0F01, 0x602000}, 883 {0x0FFF, 0x7FE000}, 884 }))); 885 886struct DownCastTest { 887 float source_float; 888 uint16_t expected_half; 889 std::vector<round_direction> directions; 890}; 891 892std::string get_round_text(round_direction direction) { 893#define CASE(round_direction) \ 894 case round_direction: \ 895 return #round_direction 896 897 switch (direction) { 898 CASE(round_direction::kToZero); 899 CASE(round_direction::kToPositiveInfinity); 900 CASE(round_direction::kToNegativeInfinity); 901 CASE(round_direction::kToNearestEven); 902 } 903#undef CASE 904 return ""; 905} 906 907using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>; 908 909TEST_P(HexFloatFP32To16Tests, NarrowingCasts) { 910 using HF = HexFloat<FloatProxy<float>>; 911 using HF16 = HexFloat<FloatProxy<Float16>>; 912 HF f(GetParam().source_float); 913 for (auto round : GetParam().directions) { 914 HF16 half(0); 915 f.castTo(half, round); 916 EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value()) 917 << get_round_text(round) << " " << std::hex 918 << BitwiseCast<uint32_t>(GetParam().source_float) 919 << " cast to: " << half.value().getAsFloat().get_value(); 920 } 921} 922 923const uint16_t positive_infinity = 0x7C00; 924const uint16_t negative_infinity = 0xFC00; 925 926INSTANTIATE_TEST_SUITE_P( 927 F32ToF16, HexFloatFP32To16Tests, 928 ::testing::ValuesIn(std::vector<DownCastTest>({ 929 // Exactly representable as half. 930 {0.f, 931 0x0, 932 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 933 RD::kToNearestEven}}, 934 {-0.f, 935 0x8000, 936 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 937 RD::kToNearestEven}}, 938 {1.0f, 939 0x3C00, 940 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 941 RD::kToNearestEven}}, 942 {-1.0f, 943 0xBC00, 944 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 945 RD::kToNearestEven}}, 946 947 {float_fractions({0, 1, 10}), 948 0x3E01, 949 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 950 RD::kToNearestEven}}, 951 {-float_fractions({0, 1, 10}), 952 0xBE01, 953 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 954 RD::kToNearestEven}}, 955 {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)), 956 0x4A01, 957 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 958 RD::kToNearestEven}}, 959 {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)), 960 0xCA01, 961 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 962 RD::kToNearestEven}}, 963 964 // Underflow 965 {static_cast<float>(ldexp(1.0f, -25)), 966 0x0, 967 {RD::kToZero, RD::kToNegativeInfinity, RD::kToNearestEven}}, 968 {static_cast<float>(ldexp(1.0f, -25)), 0x1, {RD::kToPositiveInfinity}}, 969 {static_cast<float>(-ldexp(1.0f, -25)), 970 0x8000, 971 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNearestEven}}, 972 {static_cast<float>(-ldexp(1.0f, -25)), 973 0x8001, 974 {RD::kToNegativeInfinity}}, 975 {static_cast<float>(ldexp(1.0f, -24)), 976 0x1, 977 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 978 RD::kToNearestEven}}, 979 980 // Overflow 981 {static_cast<float>(ldexp(1.0f, 16)), 982 positive_infinity, 983 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 984 RD::kToNearestEven}}, 985 {static_cast<float>(ldexp(1.0f, 18)), 986 positive_infinity, 987 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 988 RD::kToNearestEven}}, 989 {static_cast<float>(ldexp(1.3f, 16)), 990 positive_infinity, 991 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 992 RD::kToNearestEven}}, 993 {static_cast<float>(-ldexp(1.0f, 16)), 994 negative_infinity, 995 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 996 RD::kToNearestEven}}, 997 {static_cast<float>(-ldexp(1.0f, 18)), 998 negative_infinity, 999 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 1000 RD::kToNearestEven}}, 1001 {static_cast<float>(-ldexp(1.3f, 16)), 1002 negative_infinity, 1003 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 1004 RD::kToNearestEven}}, 1005 1006 // Transfer of Infinities 1007 {std::numeric_limits<float>::infinity(), 1008 positive_infinity, 1009 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 1010 RD::kToNearestEven}}, 1011 {-std::numeric_limits<float>::infinity(), 1012 negative_infinity, 1013 {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, 1014 RD::kToNearestEven}}, 1015 1016 // Nans are below because we cannot test for equality. 1017 }))); 1018 1019struct UpCastCase { 1020 uint16_t source_half; 1021 float expected_float; 1022}; 1023 1024using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>; 1025TEST_P(HexFloatFP16To32Tests, WideningCasts) { 1026 using HF = HexFloat<FloatProxy<float>>; 1027 using HF16 = HexFloat<FloatProxy<Float16>>; 1028 HF16 f(GetParam().source_half); 1029 1030 round_direction rounding[] = {round_direction::kToZero, 1031 round_direction::kToNearestEven, 1032 round_direction::kToPositiveInfinity, 1033 round_direction::kToNegativeInfinity}; 1034 1035 // Everything fits, so everything should just be bit-shifts. 1036 for (round_direction round : rounding) { 1037 HF flt(0.f); 1038 f.castTo(flt, round); 1039 EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat()) 1040 << get_round_text(round) << " " << std::hex 1041 << BitwiseCast<uint16_t>(GetParam().source_half) 1042 << " cast to: " << flt.value().getAsFloat(); 1043 } 1044} 1045 1046INSTANTIATE_TEST_SUITE_P( 1047 F16ToF32, HexFloatFP16To32Tests, 1048 ::testing::ValuesIn(std::vector<UpCastCase>({ 1049 {0x0000, 0.f}, 1050 {0x8000, -0.f}, 1051 {0x3C00, 1.0f}, 1052 {0xBC00, -1.0f}, 1053 {0x3F00, float_fractions({0, 1, 2})}, 1054 {0xBF00, -float_fractions({0, 1, 2})}, 1055 {0x3F01, float_fractions({0, 1, 2, 10})}, 1056 {0xBF01, -float_fractions({0, 1, 2, 10})}, 1057 1058 // denorm 1059 {0x0001, static_cast<float>(ldexp(1.0, -24))}, 1060 {0x0002, static_cast<float>(ldexp(1.0, -23))}, 1061 {0x8001, static_cast<float>(-ldexp(1.0, -24))}, 1062 {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))}, 1063 1064 // inf 1065 {0x7C00, std::numeric_limits<float>::infinity()}, 1066 {0xFC00, -std::numeric_limits<float>::infinity()}, 1067 }))); 1068 1069TEST(HexFloatOperationTests, NanTests) { 1070 using HF = HexFloat<FloatProxy<float>>; 1071 using HF16 = HexFloat<FloatProxy<Float16>>; 1072 round_direction rounding[] = {round_direction::kToZero, 1073 round_direction::kToNearestEven, 1074 round_direction::kToPositiveInfinity, 1075 round_direction::kToNegativeInfinity}; 1076 1077 // Everything fits, so everything should just be bit-shifts. 1078 for (round_direction round : rounding) { 1079 HF16 f16(0); 1080 HF f(0.f); 1081 HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round); 1082 EXPECT_TRUE(f16.value().isNan()); 1083 HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round); 1084 EXPECT_TRUE(f16.value().isNan()); 1085 1086 HF16(0x7C01).castTo(f, round); 1087 EXPECT_TRUE(f.value().isNan()); 1088 HF16(0x7C11).castTo(f, round); 1089 EXPECT_TRUE(f.value().isNan()); 1090 HF16(0xFC01).castTo(f, round); 1091 EXPECT_TRUE(f.value().isNan()); 1092 HF16(0x7C10).castTo(f, round); 1093 EXPECT_TRUE(f.value().isNan()); 1094 HF16(0xFF00).castTo(f, round); 1095 EXPECT_TRUE(f.value().isNan()); 1096 } 1097} 1098 1099// A test case for parsing good and bad HexFloat<FloatProxy<T>> literals. 1100template <typename T> 1101struct FloatParseCase { 1102 std::string literal; 1103 bool negate_value; 1104 bool expect_success; 1105 HexFloat<FloatProxy<T>> expected_value; 1106}; 1107 1108using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>; 1109 1110TEST_P(ParseNormalFloatTest, Samples) { 1111 std::stringstream input(GetParam().literal); 1112 HexFloat<FloatProxy<float>> parsed_value(0.0f); 1113 ParseNormalFloat(input, GetParam().negate_value, parsed_value); 1114 EXPECT_NE(GetParam().expect_success, input.fail()) 1115 << " literal: " << GetParam().literal 1116 << " negate: " << GetParam().negate_value; 1117 if (GetParam().expect_success) { 1118 EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value())) 1119 << " literal: " << GetParam().literal 1120 << " negate: " << GetParam().negate_value; 1121 } 1122} 1123 1124// Returns a FloatParseCase with expected failure. 1125template <typename T> 1126FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value, 1127 T expected_value) { 1128 HexFloat<FloatProxy<T>> proxy_expected_value(expected_value); 1129 return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value}; 1130} 1131 1132// Returns a FloatParseCase that should successfully parse to a given value. 1133template <typename T> 1134FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value, 1135 T expected_value) { 1136 HexFloat<FloatProxy<T>> proxy_expected_value(expected_value); 1137 return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value}; 1138} 1139 1140INSTANTIATE_TEST_SUITE_P( 1141 FloatParse, ParseNormalFloatTest, 1142 ::testing::ValuesIn(std::vector<FloatParseCase<float>>{ 1143 // Failing cases due to trivially incorrect syntax. 1144 BadFloatParseCase("abc", false, 0.0f), 1145 BadFloatParseCase("abc", true, 0.0f), 1146 1147 // Valid cases. 1148 GoodFloatParseCase("0", false, 0.0f), 1149 GoodFloatParseCase("0.0", false, 0.0f), 1150 GoodFloatParseCase("-0.0", false, -0.0f), 1151 GoodFloatParseCase("2.0", false, 2.0f), 1152 GoodFloatParseCase("-2.0", false, -2.0f), 1153 GoodFloatParseCase("+2.0", false, 2.0f), 1154 // Cases with negate_value being true. 1155 GoodFloatParseCase("0.0", true, -0.0f), 1156 GoodFloatParseCase("2.0", true, -2.0f), 1157 1158 // When negate_value is true, we should not accept a 1159 // leading minus or plus. 1160 BadFloatParseCase("-0.0", true, 0.0f), 1161 BadFloatParseCase("-2.0", true, 0.0f), 1162 BadFloatParseCase("+0.0", true, 0.0f), 1163 BadFloatParseCase("+2.0", true, 0.0f), 1164 1165 // Overflow is an error for 32-bit float parsing. 1166 BadFloatParseCase("1e40", false, FLT_MAX), 1167 BadFloatParseCase("1e40", true, -FLT_MAX), 1168 BadFloatParseCase("-1e40", false, -FLT_MAX), 1169 // We can't have -1e40 and negate_value == true since 1170 // that represents an original case of "--1e40" which 1171 // is invalid. 1172 })); 1173 1174using ParseNormalFloat16Test = 1175 ::testing::TestWithParam<FloatParseCase<Float16>>; 1176 1177TEST_P(ParseNormalFloat16Test, Samples) { 1178 std::stringstream input(GetParam().literal); 1179 HexFloat<FloatProxy<Float16>> parsed_value(0); 1180 ParseNormalFloat(input, GetParam().negate_value, parsed_value); 1181 EXPECT_NE(GetParam().expect_success, input.fail()) 1182 << " literal: " << GetParam().literal 1183 << " negate: " << GetParam().negate_value; 1184 if (GetParam().expect_success) { 1185 EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value())) 1186 << " literal: " << GetParam().literal 1187 << " negate: " << GetParam().negate_value; 1188 } 1189} 1190 1191INSTANTIATE_TEST_SUITE_P( 1192 Float16Parse, ParseNormalFloat16Test, 1193 ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{ 1194 // Failing cases due to trivially incorrect syntax. 1195 BadFloatParseCase<Float16>("abc", false, uint16_t{0}), 1196 BadFloatParseCase<Float16>("abc", true, uint16_t{0}), 1197 1198 // Valid cases. 1199 GoodFloatParseCase<Float16>("0", false, uint16_t{0}), 1200 GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}), 1201 GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}), 1202 GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}), 1203 GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}), 1204 GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}), 1205 // Cases with negate_value being true. 1206 GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}), 1207 GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}), 1208 1209 // When negate_value is true, we should not accept a leading minus or 1210 // plus. 1211 BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}), 1212 BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}), 1213 BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}), 1214 BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}), 1215 })); 1216 1217// A test case for detecting infinities. 1218template <typename T> 1219struct OverflowParseCase { 1220 std::string input; 1221 bool expect_success; 1222 T expected_value; 1223}; 1224 1225using FloatProxyParseOverflowFloatTest = 1226 ::testing::TestWithParam<OverflowParseCase<float>>; 1227 1228TEST_P(FloatProxyParseOverflowFloatTest, Sample) { 1229 std::istringstream input(GetParam().input); 1230 HexFloat<FloatProxy<float>> value(0.0f); 1231 input >> value; 1232 EXPECT_NE(GetParam().expect_success, input.fail()); 1233 if (GetParam().expect_success) { 1234 EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value); 1235 } 1236} 1237 1238INSTANTIATE_TEST_SUITE_P( 1239 FloatOverflow, FloatProxyParseOverflowFloatTest, 1240 ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({ 1241 {"0", true, 0.0f}, 1242 {"0.0", true, 0.0f}, 1243 {"1.0", true, 1.0f}, 1244 {"1e38", true, 1e38f}, 1245 {"-1e38", true, -1e38f}, 1246 {"1e40", false, FLT_MAX}, 1247 {"-1e40", false, -FLT_MAX}, 1248 {"1e400", false, FLT_MAX}, 1249 {"-1e400", false, -FLT_MAX}, 1250 }))); 1251 1252using FloatProxyParseOverflowDoubleTest = 1253 ::testing::TestWithParam<OverflowParseCase<double>>; 1254 1255TEST_P(FloatProxyParseOverflowDoubleTest, Sample) { 1256 std::istringstream input(GetParam().input); 1257 HexFloat<FloatProxy<double>> value(0.0); 1258 input >> value; 1259 EXPECT_NE(GetParam().expect_success, input.fail()); 1260 if (GetParam().expect_success) { 1261 EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value)); 1262 } 1263} 1264 1265INSTANTIATE_TEST_SUITE_P( 1266 DoubleOverflow, FloatProxyParseOverflowDoubleTest, 1267 ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({ 1268 {"0", true, 0.0}, 1269 {"0.0", true, 0.0}, 1270 {"1.0", true, 1.0}, 1271 {"1e38", true, 1e38}, 1272 {"-1e38", true, -1e38}, 1273 {"1e40", true, 1e40}, 1274 {"-1e40", true, -1e40}, 1275 {"1e400", false, DBL_MAX}, 1276 {"-1e400", false, -DBL_MAX}, 1277 }))); 1278 1279using FloatProxyParseOverflowFloat16Test = 1280 ::testing::TestWithParam<OverflowParseCase<uint16_t>>; 1281 1282TEST_P(FloatProxyParseOverflowFloat16Test, Sample) { 1283 std::istringstream input(GetParam().input); 1284 HexFloat<FloatProxy<Float16>> value(0); 1285 input >> value; 1286 EXPECT_NE(GetParam().expect_success, input.fail()) 1287 << " literal: " << GetParam().input; 1288 if (GetParam().expect_success) { 1289 EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value)) 1290 << " literal: " << GetParam().input; 1291 } 1292} 1293 1294INSTANTIATE_TEST_SUITE_P( 1295 Float16Overflow, FloatProxyParseOverflowFloat16Test, 1296 ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({ 1297 {"0", true, uint16_t{0}}, 1298 {"0.0", true, uint16_t{0}}, 1299 {"1.0", true, uint16_t{0x3c00}}, 1300 // Overflow for 16-bit float is an error, and returns max or 1301 // lowest value. 1302 {"1e38", false, uint16_t{0x7bff}}, 1303 {"1e40", false, uint16_t{0x7bff}}, 1304 {"1e400", false, uint16_t{0x7bff}}, 1305 {"-1e38", false, uint16_t{0xfbff}}, 1306 {"-1e40", false, uint16_t{0xfbff}}, 1307 {"-1e400", false, uint16_t{0xfbff}}, 1308 }))); 1309 1310TEST(FloatProxy, Max) { 1311 EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(), 1312 Eq(uint16_t{0x7bff})); 1313 EXPECT_THAT(FloatProxy<float>::max().getAsFloat(), 1314 Eq(std::numeric_limits<float>::max())); 1315 EXPECT_THAT(FloatProxy<double>::max().getAsFloat(), 1316 Eq(std::numeric_limits<double>::max())); 1317} 1318 1319TEST(FloatProxy, Lowest) { 1320 EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(), 1321 Eq(uint16_t{0xfbff})); 1322 EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(), 1323 Eq(std::numeric_limits<float>::lowest())); 1324 EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(), 1325 Eq(std::numeric_limits<double>::lowest())); 1326} 1327 1328template <typename T> 1329struct StreamParseCase { 1330 StreamParseCase(const std::string& lit, bool succ, const std::string& suffix, 1331 T value) 1332 : literal(lit), 1333 expect_success(succ), 1334 expected_suffix(suffix), 1335 expected_value(HexFloat<FloatProxy<T>>(value)) {} 1336 1337 std::string literal; 1338 bool expect_success; 1339 std::string expected_suffix; 1340 HexFloat<FloatProxy<T>> expected_value; 1341}; 1342 1343template <typename T> 1344std::ostream& operator<<(std::ostream& os, const StreamParseCase<T>& fspc) { 1345 os << "StreamParseCase(" << fspc.literal 1346 << ", expect_success:" << int(fspc.expect_success) << "," 1347 << fspc.expected_suffix << "," << fspc.expected_value << ")"; 1348 return os; 1349} 1350 1351using Float32StreamParseTest = ::testing::TestWithParam<StreamParseCase<float>>; 1352using Float16StreamParseTest = 1353 ::testing::TestWithParam<StreamParseCase<Float16>>; 1354 1355TEST_P(Float32StreamParseTest, Samples) { 1356 std::stringstream input(GetParam().literal); 1357 HexFloat<FloatProxy<float>> parsed_value(0.0f); 1358 // Hex floats must be read with the stream input operator. 1359 input >> parsed_value; 1360 if (GetParam().expect_success) { 1361 EXPECT_FALSE(input.fail()); 1362 std::string suffix; 1363 input >> suffix; 1364 // EXPECT_EQ(suffix, GetParam().expected_suffix); 1365 EXPECT_EQ(parsed_value.value().getAsFloat(), 1366 GetParam().expected_value.value().getAsFloat()); 1367 } else { 1368 EXPECT_TRUE(input.fail()); 1369 } 1370} 1371 1372// Returns a Float16 constructed from its sign bit, unbiased exponent, and 1373// mantissa. 1374Float16 makeF16(int sign_bit, int unbiased_exp, int mantissa) { 1375 EXPECT_LE(0, sign_bit); 1376 EXPECT_LE(sign_bit, 1); 1377 // Exponent is 5 bits, with bias of 15. 1378 EXPECT_LE(-15, unbiased_exp); // -15 means zero or subnormal 1379 EXPECT_LE(unbiased_exp, 16); // 16 means infinity or NaN 1380 EXPECT_LE(0, mantissa); 1381 EXPECT_LE(mantissa, 0x3ff); 1382 const unsigned biased_exp = 15 + unbiased_exp; 1383 const uint32_t as_bits = sign_bit << 15 | (biased_exp << 10) | mantissa; 1384 EXPECT_LE(as_bits, 0xffffu); 1385 return Float16(static_cast<uint16_t>(as_bits)); 1386} 1387 1388TEST_P(Float16StreamParseTest, Samples) { 1389 std::stringstream input(GetParam().literal); 1390 HexFloat<FloatProxy<Float16>> parsed_value(makeF16(0, 0, 0)); 1391 // Hex floats must be read with the stream input operator. 1392 input >> parsed_value; 1393 if (GetParam().expect_success) { 1394 EXPECT_FALSE(input.fail()); 1395 std::string suffix; 1396 input >> suffix; 1397 const auto got = parsed_value.value(); 1398 const auto expected = GetParam().expected_value.value(); 1399 EXPECT_EQ(got.data(), expected.data()) 1400 << "got: " << got << " expected: " << expected; 1401 } else { 1402 EXPECT_TRUE(input.fail()); 1403 } 1404} 1405 1406INSTANTIATE_TEST_SUITE_P( 1407 HexFloat32FillSignificantDigits, Float32StreamParseTest, 1408 ::testing::ValuesIn(std::vector<StreamParseCase<float>>{ 1409 {"0x123456p0", true, "", ldexpf(0x123456, 0)}, 1410 // Patterns that fill all mantissa bits 1411 {"0x1.fffffep+23", true, "", ldexpf(0x1fffffe, -1)}, 1412 {"0x1f.ffffep+19", true, "", ldexpf(0x1fffffe, -1)}, 1413 {"0x1ff.fffep+15", true, "", ldexpf(0x1fffffe, -1)}, 1414 {"0x1fff.ffep+11", true, "", ldexpf(0x1fffffe, -1)}, 1415 {"0x1ffff.fep+7", true, "", ldexpf(0x1fffffe, -1)}, 1416 {"0x1fffff.ep+3", true, "", ldexpf(0x1fffffe, -1)}, 1417 {"0x1fffffe.p-1", true, "", ldexpf(0x1fffffe, -1)}, 1418 {"0xffffff.p+0", true, "", ldexpf(0x1fffffe, -1)}, 1419 {"0xffffff.p+0", true, "", ldexpf(0xffffff, 0)}, 1420 // Now drop some bits in the middle 1421 {"0xa5a5a5.p+0", true, "", ldexpf(0xa5a5a5, 0)}, 1422 {"0x5a5a5a.p+0", true, "", ldexpf(0x5a5a5a, 0)}})); 1423 1424INSTANTIATE_TEST_SUITE_P( 1425 HexFloat32ExcessSignificantDigits, Float32StreamParseTest, 1426 ::testing::ValuesIn(std::vector<StreamParseCase<float>>{ 1427 // Base cases 1428 {"0x1.fffffep0", true, "", ldexpf(0xffffff, -23)}, 1429 {"0xa5a5a5p0", true, "", ldexpf(0xa5a5a5, 0)}, 1430 {"0xa.5a5a5p+9", true, "", ldexpf(0xa5a5a5, -11)}, 1431 {"0x5a5a5ap0", true, "", ldexpf(0x5a5a5a, 0)}, 1432 {"0x5.a5a5ap+9", true, "", ldexpf(0x5a5a5a, -11)}, 1433 // Truncate extra bits: zeroes 1434 {"0x1.fffffe0p0", true, "", ldexpf(0xffffff, -23)}, 1435 {"0xa5a5a5000p0", true, "", ldexpf(0xa5a5a5, 12)}, 1436 {"0xa.5a5a5000p+9", true, "", ldexpf(0xa5a5a5, -11)}, 1437 {"0x5a5a5a000p0", true, "", ldexpf(0x5a5a5a, 12)}, 1438 {"0x5.a5a5a000p+9", true, "", ldexpf(0x5a5a5a, -11)}, 1439 // Truncate extra bits: ones 1440 {"0x1.ffffffp0", // Extra bits in the last nibble 1441 true, "", ldexpf(0xffffff, -23)}, 1442 {"0x1.fffffffp0", true, "", ldexpf(0xffffff, -23)}, 1443 {"0xa5a5a5fffp0", true, "", ldexpf(0xa5a5a5, 12)}, 1444 {"0xa.5a5a5fffp+9", true, "", ldexpf(0xa5a5a5, -11)}, 1445 {"0x5a5a5afffp0", 1446 // The 5 nibble (0101), leads with 0, so the result can fit a leading 1447 // 1 bit , yielding 8 (1000). 1448 true, "", ldexpf(0x5a5a5a8, 8)}, 1449 {"0x5.a5a5afffp+9", true, "", ldexpf(0x5a5a5a8, 8 - 32 + 9)}})); 1450 1451INSTANTIATE_TEST_SUITE_P( 1452 HexFloat32ExponentMissingDigits, Float32StreamParseTest, 1453 ::testing::ValuesIn(std::vector<StreamParseCase<float>>{ 1454 {"0x1.0p1", true, "", 2.0f}, 1455 {"0x1.0p1a", true, "a", 2.0f}, 1456 {"-0x1.0p1f", true, "f", -2.0f}, 1457 {"0x1.0p", false, "", 0.0f}, 1458 {"0x1.0pa", false, "", 0.0f}, 1459 {"0x1.0p!", false, "", 0.0f}, 1460 {"0x1.0p+", false, "", 0.0f}, 1461 {"0x1.0p+a", false, "", 0.0f}, 1462 {"0x1.0p+!", false, "", 0.0f}, 1463 {"0x1.0p-", false, "", 0.0f}, 1464 {"0x1.0p-a", false, "", 0.0f}, 1465 {"0x1.0p-!", false, "", 0.0f}, 1466 {"0x1.0p++", false, "", 0.0f}, 1467 {"0x1.0p+-", false, "", 0.0f}, 1468 {"0x1.0p-+", false, "", 0.0f}, 1469 {"0x1.0p--", false, "", 0.0f}})); 1470 1471INSTANTIATE_TEST_SUITE_P( 1472 HexFloat32ExponentTrailingSign, Float32StreamParseTest, 1473 ::testing::ValuesIn(std::vector<StreamParseCase<float>>{ 1474 // Don't consume a sign after the binary exponent digits. 1475 {"0x1.0p1", true, "", 2.0f}, 1476 {"0x1.0p1+", true, "+", 2.0f}, 1477 {"0x1.0p1-", true, "-", 2.0f}})); 1478 1479INSTANTIATE_TEST_SUITE_P( 1480 HexFloat32PositiveExponentOverflow, Float32StreamParseTest, 1481 ::testing::ValuesIn(std::vector<StreamParseCase<float>>{ 1482 // Positive exponents 1483 {"0x1.0p1", true, "", 2.0f}, // fine, a normal number 1484 {"0x1.0p15", true, "", 32768.0f}, // fine, a normal number 1485 {"0x1.0p127", true, "", float(ldexp(1.0f, 127))}, // good large number 1486 {"0x0.8p128", true, "", float(ldexp(1.0f, 127))}, // good large number 1487 {"0x0.1p131", true, "", float(ldexp(1.0f, 127))}, // good large number 1488 {"0x0.01p135", true, "", float(ldexp(1.0f, 127))}, // good large number 1489 {"0x1.0p128", true, "", float(ldexp(1.0f, 128))}, // infinity 1490 {"0x1.0p4294967295", true, "", float(ldexp(1.0f, 128))}, // infinity 1491 {"0x1.0p5000000000", true, "", float(ldexp(1.0f, 128))}, // infinity 1492 {"0x0.0p5000000000", true, "", 0.0f}, // zero mantissa, zero result 1493 })); 1494 1495INSTANTIATE_TEST_SUITE_P( 1496 HexFloat32NegativeExponentOverflow, Float32StreamParseTest, 1497 ::testing::ValuesIn(std::vector<StreamParseCase<float>>{ 1498 // Positive results, digits before '.' 1499 {"0x1.0p-126", true, "", 1500 float(ldexp(1.0f, -126))}, // fine, a small normal number 1501 {"0x1.0p-127", true, "", float(ldexp(1.0f, -127))}, // denorm number 1502 {"0x1.0p-149", true, "", 1503 float(ldexp(1.0f, -149))}, // smallest positive denormal 1504 {"0x0.8p-148", true, "", 1505 float(ldexp(1.0f, -149))}, // smallest positive denormal 1506 {"0x0.1p-145", true, "", 1507 float(ldexp(1.0f, -149))}, // smallest positive denormal 1508 {"0x0.01p-141", true, "", 1509 float(ldexp(1.0f, -149))}, // smallest positive denormal 1510 1511 // underflow rounds down to zero 1512 {"0x1.0p-150", true, "", 0.0f}, 1513 {"0x1.0p-4294967296", true, "", 1514 0.0f}, // avoid exponent overflow in parser 1515 {"0x1.0p-5000000000", true, "", 1516 0.0f}, // avoid exponent overflow in parser 1517 {"0x0.0p-5000000000", true, "", 0.0f}, // zero mantissa, zero result 1518 })); 1519 1520INSTANTIATE_TEST_SUITE_P( 1521 HexFloat16ExcessSignificantDigits, Float16StreamParseTest, 1522 ::testing::ValuesIn(std::vector<StreamParseCase<Float16>>{ 1523 // Zero 1524 {"0x1.c00p0", true, "", makeF16(0, 0, 0x300)}, 1525 {"0x0p0", true, "", makeF16(0, -15, 0x0)}, 1526 {"0x000.0000p0", true, "", makeF16(0, -15, 0x0)}, 1527 // All leading 1s 1528 {"0x1p0", true, "", makeF16(0, 0, 0x0)}, 1529 {"0x1.8p0", true, "", makeF16(0, 0, 0x200)}, 1530 {"0x1.cp0", true, "", makeF16(0, 0, 0x300)}, 1531 {"0x1.ep0", true, "", makeF16(0, 0, 0x380)}, 1532 {"0x1.fp0", true, "", makeF16(0, 0, 0x3c0)}, 1533 {"0x1.f8p0", true, "", makeF16(0, 0, 0x3e0)}, 1534 {"0x1.fcp0", true, "", makeF16(0, 0, 0x3f0)}, 1535 {"0x1.fep0", true, "", makeF16(0, 0, 0x3f8)}, 1536 {"0x1.ffp0", true, "", makeF16(0, 0, 0x3fc)}, 1537 // Fill trailing zeros to all significant places 1538 // that might be used for significant digits. 1539 {"0x1.ff8p0", true, "", makeF16(0, 0, 0x3fe)}, 1540 {"0x1.ffcp0", true, "", makeF16(0, 0, 0x3ff)}, 1541 {"0x1.800p0", true, "", makeF16(0, 0, 0x200)}, 1542 {"0x1.c00p0", true, "", makeF16(0, 0, 0x300)}, 1543 {"0x1.e00p0", true, "", makeF16(0, 0, 0x380)}, 1544 {"0x1.f00p0", true, "", makeF16(0, 0, 0x3c0)}, 1545 {"0x1.f80p0", true, "", makeF16(0, 0, 0x3e0)}, 1546 {"0x1.fc0p0", true, "", makeF16(0, 0, 0x3f0)}, 1547 {"0x1.fe0p0", true, "", makeF16(0, 0, 0x3f8)}, 1548 {"0x1.ff0p0", true, "", makeF16(0, 0, 0x3fc)}, 1549 {"0x1.ff8p0", true, "", makeF16(0, 0, 0x3fe)}, 1550 {"0x1.ffcp0", true, "", makeF16(0, 0, 0x3ff)}, 1551 // Add several trailing zeros 1552 {"0x1.c00000p0", true, "", makeF16(0, 0, 0x300)}, 1553 {"0x1.e00000p0", true, "", makeF16(0, 0, 0x380)}, 1554 {"0x1.f00000p0", true, "", makeF16(0, 0, 0x3c0)}, 1555 {"0x1.f80000p0", true, "", makeF16(0, 0, 0x3e0)}, 1556 {"0x1.fc0000p0", true, "", makeF16(0, 0, 0x3f0)}, 1557 {"0x1.fe0000p0", true, "", makeF16(0, 0, 0x3f8)}, 1558 {"0x1.ff0000p0", true, "", makeF16(0, 0, 0x3fc)}, 1559 {"0x1.ff8000p0", true, "", makeF16(0, 0, 0x3fe)}, 1560 {"0x1.ffcp0000", true, "", makeF16(0, 0, 0x3ff)}, 1561 // Samples that drop out bits in the middle. 1562 // 5 = 0101 4 = 0100 1563 // a = 1010 8 = 1000 1564 {"0x1.5a4p0", true, "", makeF16(0, 0, 0x169)}, 1565 {"0x1.a58p0", true, "", makeF16(0, 0, 0x296)}, 1566 // Samples that drop out bits *and* truncate significant bits 1567 // that can't be represented. 1568 {"0x1.5a40000p0", true, "", makeF16(0, 0, 0x169)}, 1569 {"0x1.5a7ffffp0", true, "", makeF16(0, 0, 0x169)}, 1570 {"0x1.a580000p0", true, "", makeF16(0, 0, 0x296)}, 1571 {"0x1.a5bffffp0", true, "", makeF16(0, 0, 0x296)}, 1572 // Try some negations. 1573 {"-0x0p0", true, "", makeF16(1, -15, 0x0)}, 1574 {"-0x000.0000p0", true, "", makeF16(1, -15, 0x0)}, 1575 {"-0x1.5a40000p0", true, "", makeF16(1, 0, 0x169)}, 1576 {"-0x1.5a7ffffp0", true, "", makeF16(1, 0, 0x169)}, 1577 {"-0x1.a580000p0", true, "", makeF16(1, 0, 0x296)}, 1578 {"-0x1.a5bffffp0", true, "", makeF16(1, 0, 0x296)}})); 1579 1580INSTANTIATE_TEST_SUITE_P( 1581 HexFloat16IncreasingExponentsAndMantissa, Float16StreamParseTest, 1582 ::testing::ValuesIn(std::vector<StreamParseCase<Float16>>{ 1583 // Zero 1584 {"0x0p0", true, "", makeF16(0, -15, 0x0)}, 1585 {"0x0p5000000000000", true, "", makeF16(0, -15, 0x0)}, 1586 {"-0x0p5000000000000", true, "", makeF16(1, -15, 0x0)}, 1587 // Leading 1 1588 {"0x1p0", true, "", makeF16(0, 0, 0x0)}, 1589 {"0x1p1", true, "", makeF16(0, 1, 0x0)}, 1590 {"0x1p16", true, "", makeF16(0, 16, 0x0)}, 1591 {"0x1p-1", true, "", makeF16(0, -1, 0x0)}, 1592 {"0x1p-14", true, "", makeF16(0, -14, 0x0)}, 1593 // Leading 2 1594 {"0x2p0", true, "", makeF16(0, 1, 0x0)}, 1595 {"0x2p1", true, "", makeF16(0, 2, 0x0)}, 1596 {"0x2p15", true, "", makeF16(0, 16, 0x0)}, 1597 {"0x2p-1", true, "", makeF16(0, 0, 0x0)}, 1598 {"0x2p-15", true, "", makeF16(0, -14, 0x0)}, 1599 // Leading 8 1600 {"0x8p0", true, "", makeF16(0, 3, 0x0)}, 1601 {"0x8p1", true, "", makeF16(0, 4, 0x0)}, 1602 {"0x8p13", true, "", makeF16(0, 16, 0x0)}, 1603 {"0x8p-3", true, "", makeF16(0, 0, 0x0)}, 1604 {"0x8p-17", true, "", makeF16(0, -14, 0x0)}, 1605 // Leading 10 1606 {"0x10.0p0", true, "", makeF16(0, 4, 0x0)}, 1607 {"0x10.0p1", true, "", makeF16(0, 5, 0x0)}, 1608 {"0x10.0p12", true, "", makeF16(0, 16, 0x0)}, 1609 {"0x10.0p-5", true, "", makeF16(0, -1, 0x0)}, 1610 {"0x10.0p-18", true, "", makeF16(0, -14, 0x0)}, 1611 // Samples that drop out bits *and* truncate significant bits 1612 // that can't be represented. 1613 // Progressively increase the leading digit. 1614 {"0x1.5a40000p0", true, "", makeF16(0, 0, 0x169)}, 1615 {"0x1.5a7ffffp0", true, "", makeF16(0, 0, 0x169)}, 1616 {"0x2.5a40000p0", true, "", makeF16(0, 1, 0x0b4)}, 1617 {"0x2.5a7ffffp0", true, "", makeF16(0, 1, 0x0b4)}, 1618 {"0x4.5a40000p0", true, "", makeF16(0, 2, 0x05a)}, 1619 {"0x4.5a7ffffp0", true, "", makeF16(0, 2, 0x05a)}, 1620 {"0x8.5a40000p0", true, "", makeF16(0, 3, 0x02d)}, 1621 {"0x8.5a7ffffp0", true, "", makeF16(0, 3, 0x02d)}})); 1622 1623} // namespace 1624} // namespace utils 1625} // namespace spvtools 1626