1// Copyright (c) 2015-2016 The Khronos Group Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include <cfloat> 16#include <cmath> 17#include <cstdio> 18#include <sstream> 19#include <string> 20#include <tuple> 21 22#include <gmock/gmock.h> 23#include "SPIRV/hex_float.h" 24 25namespace { 26using ::testing::Eq; 27using spvutils::BitwiseCast; 28using spvutils::Float16; 29using spvutils::FloatProxy; 30using spvutils::HexFloat; 31using spvutils::ParseNormalFloat; 32 33// In this file "encode" means converting a number into a string, 34// and "decode" means converting a string into a number. 35 36using HexFloatTest = 37 ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>; 38using DecodeHexFloatTest = 39 ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>; 40using HexDoubleTest = 41 ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>; 42using DecodeHexDoubleTest = 43 ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>; 44 45// Hex-encodes a float value. 46template <typename T> 47std::string EncodeViaHexFloat(const T& value) { 48 std::stringstream ss; 49 ss << spvutils::HexFloat<T>(value); 50 return ss.str(); 51} 52 53// The following two tests can't be DRY because they take different parameter 54// types. 55 56TEST_P(HexFloatTest, EncodeCorrectly) { 57 EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second)); 58} 59 60TEST_P(HexDoubleTest, EncodeCorrectly) { 61 EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second)); 62} 63 64// Decodes a hex-float string. 65template <typename T> 66FloatProxy<T> Decode(const std::string& str) { 67 spvutils::HexFloat<FloatProxy<T>> decoded(0.f); 68 EXPECT_TRUE((std::stringstream(str) >> decoded).eof()); 69 return decoded.value(); 70} 71 72TEST_P(HexFloatTest, DecodeCorrectly) { 73 EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first)); 74} 75 76TEST_P(HexDoubleTest, DecodeCorrectly) { 77 EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first)); 78} 79 80INSTANTIATE_TEST_SUITE_P( 81 Float32Tests, HexFloatTest, 82 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({ 83 {0.f, "0x0p+0"}, 84 {1.f, "0x1p+0"}, 85 {2.f, "0x1p+1"}, 86 {3.f, "0x1.8p+1"}, 87 {0.5f, "0x1p-1"}, 88 {0.25f, "0x1p-2"}, 89 {0.75f, "0x1.8p-1"}, 90 {-0.f, "-0x0p+0"}, 91 {-1.f, "-0x1p+0"}, 92 {-0.5f, "-0x1p-1"}, 93 {-0.25f, "-0x1p-2"}, 94 {-0.75f, "-0x1.8p-1"}, 95 96 // Larger numbers 97 {512.f, "0x1p+9"}, 98 {-512.f, "-0x1p+9"}, 99 {1024.f, "0x1p+10"}, 100 {-1024.f, "-0x1p+10"}, 101 {1024.f + 8.f, "0x1.02p+10"}, 102 {-1024.f - 8.f, "-0x1.02p+10"}, 103 104 // Small numbers 105 {1.0f / 512.f, "0x1p-9"}, 106 {1.0f / -512.f, "-0x1p-9"}, 107 {1.0f / 1024.f, "0x1p-10"}, 108 {1.0f / -1024.f, "-0x1p-10"}, 109 {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"}, 110 {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"}, 111 112 // lowest non-denorm 113 {float(ldexp(1.0f, -126)), "0x1p-126"}, 114 {float(ldexp(-1.0f, -126)), "-0x1p-126"}, 115 116 // Denormalized values 117 {float(ldexp(1.0f, -127)), "0x1p-127"}, 118 {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"}, 119 {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"}, 120 {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"}, 121 {float(ldexp(-1.0f, -127)), "-0x1p-127"}, 122 {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"}, 123 {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"}, 124 {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"}, 125 126 {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"}, 127 {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)), 128 "0x1.8p-128"}, 129 130 }))); 131 132INSTANTIATE_TEST_SUITE_P( 133 Float32NanTests, HexFloatTest, 134 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({ 135 // Various NAN and INF cases 136 {uint32_t(0xFF800000), "-0x1p+128"}, // -inf 137 {uint32_t(0x7F800000), "0x1p+128"}, // inf 138 {uint32_t(0xFFC00000), "-0x1.8p+128"}, // -nan 139 {uint32_t(0xFF800100), "-0x1.0002p+128"}, // -nan 140 {uint32_t(0xFF800c00), "-0x1.0018p+128"}, // -nan 141 {uint32_t(0xFF80F000), "-0x1.01ep+128"}, // -nan 142 {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"}, // -nan 143 {uint32_t(0x7FC00000), "0x1.8p+128"}, // +nan 144 {uint32_t(0x7F800100), "0x1.0002p+128"}, // +nan 145 {uint32_t(0x7f800c00), "0x1.0018p+128"}, // +nan 146 {uint32_t(0x7F80F000), "0x1.01ep+128"}, // +nan 147 {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"}, // +nan 148 }))); 149 150INSTANTIATE_TEST_SUITE_P( 151 Float64Tests, HexDoubleTest, 152 ::testing::ValuesIn( 153 std::vector<std::pair<FloatProxy<double>, std::string>>({ 154 {0., "0x0p+0"}, 155 {1., "0x1p+0"}, 156 {2., "0x1p+1"}, 157 {3., "0x1.8p+1"}, 158 {0.5, "0x1p-1"}, 159 {0.25, "0x1p-2"}, 160 {0.75, "0x1.8p-1"}, 161 {-0., "-0x0p+0"}, 162 {-1., "-0x1p+0"}, 163 {-0.5, "-0x1p-1"}, 164 {-0.25, "-0x1p-2"}, 165 {-0.75, "-0x1.8p-1"}, 166 167 // Larger numbers 168 {512., "0x1p+9"}, 169 {-512., "-0x1p+9"}, 170 {1024., "0x1p+10"}, 171 {-1024., "-0x1p+10"}, 172 {1024. + 8., "0x1.02p+10"}, 173 {-1024. - 8., "-0x1.02p+10"}, 174 175 // Large outside the range of normal floats 176 {ldexp(1.0, 128), "0x1p+128"}, 177 {ldexp(1.0, 129), "0x1p+129"}, 178 {ldexp(-1.0, 128), "-0x1p+128"}, 179 {ldexp(-1.0, 129), "-0x1p+129"}, 180 {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"}, 181 {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"}, 182 {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"}, 183 {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"}, 184 185 // Small numbers 186 {1.0 / 512., "0x1p-9"}, 187 {1.0 / -512., "-0x1p-9"}, 188 {1.0 / 1024., "0x1p-10"}, 189 {1.0 / -1024., "-0x1p-10"}, 190 {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"}, 191 {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"}, 192 193 // Small outside the range of normal floats 194 {ldexp(1.0, -128), "0x1p-128"}, 195 {ldexp(1.0, -129), "0x1p-129"}, 196 {ldexp(-1.0, -128), "-0x1p-128"}, 197 {ldexp(-1.0, -129), "-0x1p-129"}, 198 {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"}, 199 {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"}, 200 {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"}, 201 {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"}, 202 203 // lowest non-denorm 204 {ldexp(1.0, -1022), "0x1p-1022"}, 205 {ldexp(-1.0, -1022), "-0x1p-1022"}, 206 207 // Denormalized values 208 {ldexp(1.0, -1023), "0x1p-1023"}, 209 {ldexp(1.0, -1023) / 2.0, "0x1p-1024"}, 210 {ldexp(1.0, -1023) / 4.0, "0x1p-1025"}, 211 {ldexp(1.0, -1023) / 8.0, "0x1p-1026"}, 212 {ldexp(-1.0, -1024), "-0x1p-1024"}, 213 {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"}, 214 {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"}, 215 {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"}, 216 217 {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"}, 218 {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0), 219 "0x1.8p-1024"}, 220 221 }))); 222 223INSTANTIATE_TEST_SUITE_P( 224 Float64NanTests, HexDoubleTest, 225 ::testing::ValuesIn(std::vector< 226 std::pair<FloatProxy<double>, std::string>>({ 227 // Various NAN and INF cases 228 {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"}, //-inf 229 {uint64_t(0x7FF0000000000000LL), "0x1p+1024"}, //+inf 230 {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"}, // -nan 231 {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, // -nan 232 {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"}, // -nan 233 {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"}, // -nan 234 {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"}, // -nan 235 {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, // +nan 236 {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"}, // +nan 237 {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"}, // -nan 238 {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"}, // -nan 239 {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"}, // -nan 240 }))); 241 242TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) { 243 std::stringstream s; 244 s << std::setw(4) << std::oct << std::setfill('x') << 8 << " " 245 << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9; 246 EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11"))); 247} 248 249TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) { 250 std::stringstream s; 251 s << std::setw(4) << std::oct << std::setfill('x') << 8 << " " 252 << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4) 253 << 9; 254 EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11"))); 255} 256 257TEST_P(DecodeHexFloatTest, DecodeCorrectly) { 258 EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second)); 259} 260 261TEST_P(DecodeHexDoubleTest, DecodeCorrectly) { 262 EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second)); 263} 264 265INSTANTIATE_TEST_SUITE_P( 266 Float32DecodeTests, DecodeHexFloatTest, 267 ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({ 268 {"0x0p+000", 0.f}, 269 {"0x0p0", 0.f}, 270 {"0x0p-0", 0.f}, 271 272 // flush to zero cases 273 {"0x1p-500", 0.f}, // Exponent underflows. 274 {"-0x1p-500", -0.f}, 275 {"0x0.00000000001p-126", 0.f}, // Fraction causes underflow. 276 {"-0x0.0000000001p-127", -0.f}, 277 {"-0x0.01p-142", -0.f}, // Fraction causes additional underflow. 278 {"0x0.01p-142", 0.f}, 279 280 // Some floats that do not encode the same way as they decode. 281 {"0x2p+0", 2.f}, 282 {"0xFFp+0", 255.f}, 283 {"0x0.8p+0", 0.5f}, 284 {"0x0.4p+0", 0.25f}, 285 }))); 286 287INSTANTIATE_TEST_SUITE_P( 288 Float32DecodeInfTests, DecodeHexFloatTest, 289 ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({ 290 // inf cases 291 {"-0x1p+128", uint32_t(0xFF800000)}, // -inf 292 {"0x32p+127", uint32_t(0x7F800000)}, // inf 293 {"0x32p+500", uint32_t(0x7F800000)}, // inf 294 {"-0x32p+127", uint32_t(0xFF800000)}, // -inf 295 }))); 296 297INSTANTIATE_TEST_SUITE_P( 298 Float64DecodeTests, DecodeHexDoubleTest, 299 ::testing::ValuesIn( 300 std::vector<std::pair<std::string, FloatProxy<double>>>({ 301 {"0x0p+000", 0.}, 302 {"0x0p0", 0.}, 303 {"0x0p-0", 0.}, 304 305 // flush to zero cases 306 {"0x1p-5000", 0.}, // Exponent underflows. 307 {"-0x1p-5000", -0.}, 308 {"0x0.0000000000000001p-1023", 0.}, // Fraction causes underflow. 309 {"-0x0.000000000000001p-1024", -0.}, 310 {"-0x0.01p-1090", -0.f}, // Fraction causes additional underflow. 311 {"0x0.01p-1090", 0.}, 312 313 // Some floats that do not encode the same way as they decode. 314 {"0x2p+0", 2.}, 315 {"0xFFp+0", 255.}, 316 {"0x0.8p+0", 0.5}, 317 {"0x0.4p+0", 0.25}, 318 }))); 319 320INSTANTIATE_TEST_SUITE_P( 321 Float64DecodeInfTests, DecodeHexDoubleTest, 322 ::testing::ValuesIn( 323 std::vector<std::pair<std::string, FloatProxy<double>>>({ 324 // inf cases 325 {"-0x1p+1024", uint64_t(0xFFF0000000000000)}, // -inf 326 {"0x32p+1023", uint64_t(0x7FF0000000000000)}, // inf 327 {"0x32p+5000", uint64_t(0x7FF0000000000000)}, // inf 328 {"-0x32p+1023", uint64_t(0xFFF0000000000000)}, // -inf 329 }))); 330 331TEST(FloatProxy, ValidConversion) { 332 EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f)); 333 EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f)); 334 EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f)); 335 EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f)); 336 EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f)); 337 EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f)); 338 339 EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat())); 340 EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat())); 341 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat())); 342 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat())); 343 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat())); 344 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat())); 345 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat())); 346 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat())); 347 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat())); 348 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat())); 349 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat())); 350 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat())); 351 352 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u)); 353 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u)); 354 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u)); 355 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u)); 356 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u)); 357 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u)); 358 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu)); 359 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u)); 360 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u)); 361 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u)); 362 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u)); 363 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu)); 364} 365 366TEST(FloatProxy, Nan) { 367 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan()); 368 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan()); 369 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan()); 370 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan()); 371 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan()); 372 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan()); 373 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan()); 374 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan()); 375 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan()); 376 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan()); 377} 378 379TEST(FloatProxy, Negation) { 380 EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f)); 381 EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f)); 382 383 EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f)); 384 EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f)); 385 386 EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f)); 387 EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f)); 388 389 EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f)); 390 EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f)); 391 392 EXPECT_THAT( 393 (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(), 394 Eq(-std::numeric_limits<float>::infinity())); 395 EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity())) 396 .getAsFloat(), 397 Eq(std::numeric_limits<float>::infinity())); 398} 399 400// Test conversion of FloatProxy values to strings. 401// 402// In previous cases, we always wrapped the FloatProxy value in a HexFloat 403// before conversion to a string. In the following cases, the FloatProxy 404// decides for itself whether to print as a regular number or as a hex float. 405 406using FloatProxyFloatTest = 407 ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>; 408using FloatProxyDoubleTest = 409 ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>; 410 411// Converts a float value to a string via a FloatProxy. 412template <typename T> 413std::string EncodeViaFloatProxy(const T& value) { 414 std::stringstream ss; 415 ss << value; 416 return ss.str(); 417} 418 419// Converts a floating point string so that the exponent prefix 420// is 'e', and the exponent value does not have leading zeros. 421// The Microsoft runtime library likes to write things like "2.5E+010". 422// Convert that to "2.5e+10". 423// We don't care what happens to strings that are not floating point 424// strings. 425std::string NormalizeExponentInFloatString(std::string in) { 426 std::string result; 427 // Reserve one spot for the terminating null, even when the sscanf fails. 428 std::vector<char> prefix(in.size() + 1); 429 char e; 430 char plus_or_minus; 431 int exponent; // in base 10 432 if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e, 433 &plus_or_minus, &exponent)) && 434 (e == 'e' || e == 'E') && 435 (plus_or_minus == '-' || plus_or_minus == '+')) { 436 // It looks like a floating point value with exponent. 437 std::stringstream out; 438 out << prefix.data() << 'e' << plus_or_minus << exponent; 439 result = out.str(); 440 } else { 441 result = in; 442 } 443 return result; 444} 445 446TEST(NormalizeFloat, Sample) { 447 EXPECT_THAT(NormalizeExponentInFloatString(""), Eq("")); 448 EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12")); 449 EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14")); 450 EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12")); 451 EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14")); 452} 453 454// The following two tests can't be DRY because they take different parameter 455// types. 456TEST_P(FloatProxyFloatTest, EncodeCorrectly) { 457 EXPECT_THAT( 458 NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)), 459 Eq(GetParam().second)); 460} 461 462TEST_P(FloatProxyDoubleTest, EncodeCorrectly) { 463 EXPECT_THAT( 464 NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)), 465 Eq(GetParam().second)); 466} 467 468INSTANTIATE_TEST_SUITE_P( 469 Float32Tests, FloatProxyFloatTest, 470 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({ 471 // Zero 472 {0.f, "0"}, 473 // Normal numbers 474 {1.f, "1"}, 475 {-0.25f, "-0.25"}, 476 {1000.0f, "1000"}, 477 478 // Still normal numbers, but with large magnitude exponents. 479 {float(ldexp(1.f, 126)), "8.50706e+37"}, 480 {float(ldexp(-1.f, -126)), "-1.17549e-38"}, 481 482 // denormalized values are printed as hex floats. 483 {float(ldexp(1.0f, -127)), "0x1p-127"}, 484 {float(ldexp(1.5f, -128)), "0x1.8p-128"}, 485 {float(ldexp(1.25, -129)), "0x1.4p-129"}, 486 {float(ldexp(1.125, -130)), "0x1.2p-130"}, 487 {float(ldexp(-1.0f, -127)), "-0x1p-127"}, 488 {float(ldexp(-1.0f, -128)), "-0x1p-128"}, 489 {float(ldexp(-1.0f, -129)), "-0x1p-129"}, 490 {float(ldexp(-1.5f, -130)), "-0x1.8p-130"}, 491 492 // NaNs 493 {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"}, 494 {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"}, 495 496 {std::numeric_limits<float>::infinity(), "0x1p+128"}, 497 {-std::numeric_limits<float>::infinity(), "-0x1p+128"}, 498 }))); 499 500INSTANTIATE_TEST_SUITE_P( 501 Float64Tests, FloatProxyDoubleTest, 502 ::testing::ValuesIn( 503 std::vector<std::pair<FloatProxy<double>, std::string>>({ 504 {0., "0"}, 505 {1., "1"}, 506 {-0.25, "-0.25"}, 507 {1000.0, "1000"}, 508 509 // Large outside the range of normal floats 510 {ldexp(1.0, 128), "3.40282366920938e+38"}, 511 {ldexp(1.5, 129), "1.02084710076282e+39"}, 512 {ldexp(-1.0, 128), "-3.40282366920938e+38"}, 513 {ldexp(-1.5, 129), "-1.02084710076282e+39"}, 514 515 // Small outside the range of normal floats 516 {ldexp(1.5, -129), "2.20405190779179e-39"}, 517 {ldexp(-1.5, -129), "-2.20405190779179e-39"}, 518 519 // lowest non-denorm 520 {ldexp(1.0, -1022), "2.2250738585072e-308"}, 521 {ldexp(-1.0, -1022), "-2.2250738585072e-308"}, 522 523 // Denormalized values 524 {ldexp(1.125, -1023), "0x1.2p-1023"}, 525 {ldexp(-1.375, -1024), "-0x1.6p-1024"}, 526 527 // NaNs 528 {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, 529 {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, 530 531 // Infinity 532 {std::numeric_limits<double>::infinity(), "0x1p+1024"}, 533 {-std::numeric_limits<double>::infinity(), "-0x1p+1024"}, 534 535 }))); 536 537// double is used so that unbiased_exponent can be used with the output 538// of ldexp directly. 539int32_t unbiased_exponent(double f) { 540 return spvutils::HexFloat<spvutils::FloatProxy<float>>( 541 static_cast<float>(f)).getUnbiasedNormalizedExponent(); 542} 543 544int16_t unbiased_half_exponent(uint16_t f) { 545 return spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>(f) 546 .getUnbiasedNormalizedExponent(); 547} 548 549TEST(HexFloatOperationTest, UnbiasedExponent) { 550 // Float cases 551 EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0))); 552 EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32))); 553 EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42))); 554 EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125))); 555 // Saturates to 128 556 EXPECT_EQ(128, unbiased_exponent(ldexp(1.0f, 256))); 557 558 EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100))); 559 EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm 560 EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128))); 561 EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129))); 562 EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140))); 563 // Smallest representable number 564 EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23))); 565 // Should get rounded to 0 first. 566 EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23))); 567 568 // Float16 cases 569 // The exponent is represented in the bits 0x7C00 570 // The offset is -15 571 EXPECT_EQ(0, unbiased_half_exponent(0x3C00)); 572 EXPECT_EQ(3, unbiased_half_exponent(0x4800)); 573 EXPECT_EQ(-1, unbiased_half_exponent(0x3800)); 574 EXPECT_EQ(-14, unbiased_half_exponent(0x0400)); 575 EXPECT_EQ(16, unbiased_half_exponent(0x7C00)); 576 EXPECT_EQ(10, unbiased_half_exponent(0x6400)); 577 578 // Smallest representable number 579 EXPECT_EQ(-24, unbiased_half_exponent(0x0001)); 580} 581 582// Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions 583float float_fractions(const std::vector<uint32_t>& fractions) { 584 float f = 0; 585 for(int32_t i: fractions) { 586 f += std::ldexp(1.0f, -i); 587 } 588 return f; 589} 590 591// Returns the normalized significand of a HexFloat<FloatProxy<float>> 592// that was created by calling float_fractions with the input fractions, 593// raised to the power of exp. 594uint32_t normalized_significand(const std::vector<uint32_t>& fractions, uint32_t exp) { 595 return spvutils::HexFloat<spvutils::FloatProxy<float>>( 596 static_cast<float>(ldexp(float_fractions(fractions), exp))) 597 .getNormalizedSignificand(); 598} 599 600// Sets the bits from MSB to LSB of the significand part of a float. 601// For example 0 would set the bit 23 (counting from LSB to MSB), 602// and 1 would set the 22nd bit. 603uint32_t bits_set(const std::vector<uint32_t>& bits) { 604 const uint32_t top_bit = 1u << 22u; 605 uint32_t val= 0; 606 for(uint32_t i: bits) { 607 val |= top_bit >> i; 608 } 609 return val; 610} 611 612// The same as bits_set but for a Float16 value instead of 32-bit floating 613// point. 614uint16_t half_bits_set(const std::vector<uint32_t>& bits) { 615 const uint32_t top_bit = 1u << 9u; 616 uint32_t val= 0; 617 for(uint32_t i: bits) { 618 val |= top_bit >> i; 619 } 620 return static_cast<uint16_t>(val); 621} 622 623TEST(HexFloatOperationTest, NormalizedSignificand) { 624 // For normalized numbers (the following) it should be a simple matter 625 // of getting rid of the top implicit bit 626 EXPECT_EQ(bits_set({}), normalized_significand({0}, 0)); 627 EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0)); 628 EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0)); 629 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0)); 630 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32)); 631 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126)); 632 633 // For denormalized numbers we expect the normalized significand to 634 // shift as if it were normalized. This means, in practice that the 635 // top_most set bit will be cut off. Looks very similar to above (on purpose) 636 EXPECT_EQ(bits_set({}), normalized_significand({0}, -127)); 637 EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -128)); 638 EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -127)); 639 EXPECT_EQ(bits_set({}), normalized_significand({22}, -127)); 640 EXPECT_EQ(bits_set({0}), normalized_significand({21, 22}, -127)); 641} 642 643// Returns the 32-bit floating point value created by 644// calling setFromSignUnbiasedExponentAndNormalizedSignificand 645// on a HexFloat<FloatProxy<float>> 646float set_from_sign(bool negative, int32_t unbiased_exponent, 647 uint32_t significand, bool round_denorm_up) { 648 spvutils::HexFloat<spvutils::FloatProxy<float>> f(0.f); 649 f.setFromSignUnbiasedExponentAndNormalizedSignificand( 650 negative, unbiased_exponent, significand, round_denorm_up); 651 return f.value().getAsFloat(); 652} 653 654TEST(HexFloatOperationTests, 655 SetFromSignUnbiasedExponentAndNormalizedSignificand) { 656 657 EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false)); 658 659 // Tests insertion of various denormalized numbers with and without round up. 660 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, false)); 661 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, true)); 662 EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false)); 663 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -150, 1, true)); 664 665 EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false)); 666 EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false)); 667 EXPECT_EQ(float_fractions({0, 1, 2, 5}), 668 set_from_sign(false, 0, bits_set({0, 1, 4}), false)); 669 EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32), 670 set_from_sign(false, -32, bits_set({0, 1, 4}), false)); 671 EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128), 672 set_from_sign(false, -128, bits_set({0, 1, 4}), false)); 673 674 // The negative cases from above. 675 EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false)); 676 EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false)); 677 EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false)); 678 EXPECT_EQ(-float_fractions({0, 1, 2, 5}), 679 set_from_sign(true, 0, bits_set({0, 1, 4}), false)); 680 EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32), 681 set_from_sign(true, -32, bits_set({0, 1, 4}), false)); 682 EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128), 683 set_from_sign(true, -128, bits_set({0, 1, 4}), false)); 684} 685 686TEST(HexFloatOperationTests, NonRounding) { 687 // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial, 688 // except in the denorm case which is a bit more complex. 689 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>; 690 bool carry_bit = false; 691 692 spvutils::round_direction rounding[] = { 693 spvutils::kRoundToZero, 694 spvutils::kRoundToNearestEven, 695 spvutils::kRoundToPositiveInfinity, 696 spvutils::kRoundToNegativeInfinity}; 697 698 // Everything fits, so this should be straight-forward 699 for (spvutils::round_direction round : rounding) { 700 EXPECT_EQ(bits_set({}), HF(0.f).getRoundedNormalizedSignificand<HF>( 701 round, &carry_bit)); 702 EXPECT_FALSE(carry_bit); 703 704 EXPECT_EQ(bits_set({0}), 705 HF(float_fractions({0, 1})) 706 .getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 707 EXPECT_FALSE(carry_bit); 708 709 EXPECT_EQ(bits_set({1, 3}), 710 HF(float_fractions({0, 2, 4})) 711 .getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 712 EXPECT_FALSE(carry_bit); 713 714 EXPECT_EQ( 715 bits_set({0, 1, 4}), 716 HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128))) 717 .getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 718 EXPECT_FALSE(carry_bit); 719 720 EXPECT_EQ( 721 bits_set({0, 1, 4, 22}), 722 HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23}))) 723 .getRoundedNormalizedSignificand<HF>(round, &carry_bit)); 724 EXPECT_FALSE(carry_bit); 725 } 726} 727 728struct RoundSignificandCase { 729 float source_float; 730 std::pair<int16_t, bool> expected_results; 731 spvutils::round_direction round; 732}; 733 734using HexFloatRoundTest = 735 ::testing::TestWithParam<RoundSignificandCase>; 736 737TEST_P(HexFloatRoundTest, RoundDownToFP16) { 738 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>; 739 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>; 740 741 HF input_value(GetParam().source_float); 742 bool carry_bit = false; 743 EXPECT_EQ(GetParam().expected_results.first, 744 input_value.getRoundedNormalizedSignificand<HF16>( 745 GetParam().round, &carry_bit)); 746 EXPECT_EQ(carry_bit, GetParam().expected_results.second); 747} 748 749// clang-format off 750INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatRoundTest, 751 ::testing::ValuesIn(std::vector<RoundSignificandCase>( 752 { 753 {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToZero}, 754 {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToNearestEven}, 755 {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToPositiveInfinity}, 756 {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToNegativeInfinity}, 757 {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero}, 758 759 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero}, 760 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity}, 761 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity}, 762 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNearestEven}, 763 764 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToZero}, 765 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), spvutils::kRoundToPositiveInfinity}, 766 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNegativeInfinity}, 767 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), spvutils::kRoundToNearestEven}, 768 769 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero}, 770 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity}, 771 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity}, 772 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven}, 773 774 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero}, 775 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToPositiveInfinity}, 776 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNegativeInfinity}, 777 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven}, 778 779 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero}, 780 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity}, 781 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity}, 782 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven}, 783 784 // Carries 785 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), spvutils::kRoundToZero}, 786 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), spvutils::kRoundToPositiveInfinity}, 787 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), spvutils::kRoundToNegativeInfinity}, 788 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), spvutils::kRoundToNearestEven}, 789 790 // Cases where original number was denorm. Note: this should have no effect 791 // the number is pre-normalized. 792 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero}, 793 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity}, 794 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity}, 795 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven}, 796 }))); 797// clang-format on 798 799struct UpCastSignificandCase { 800 uint16_t source_half; 801 uint32_t expected_result; 802}; 803 804using HexFloatRoundUpSignificandTest = 805 ::testing::TestWithParam<UpCastSignificandCase>; 806TEST_P(HexFloatRoundUpSignificandTest, Widening) { 807 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>; 808 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>; 809 bool carry_bit = false; 810 811 spvutils::round_direction rounding[] = { 812 spvutils::kRoundToZero, 813 spvutils::kRoundToNearestEven, 814 spvutils::kRoundToPositiveInfinity, 815 spvutils::kRoundToNegativeInfinity}; 816 817 // Everything fits, so everything should just be bit-shifts. 818 for (spvutils::round_direction round : rounding) { 819 carry_bit = false; 820 HF16 input_value(GetParam().source_half); 821 EXPECT_EQ( 822 GetParam().expected_result, 823 input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)) 824 << std::hex << "0x" 825 << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit) 826 << " 0x" << GetParam().expected_result; 827 EXPECT_FALSE(carry_bit); 828 } 829} 830 831INSTANTIATE_TEST_SUITE_P(F16toF32, HexFloatRoundUpSignificandTest, 832 // 0xFC00 of the source 16-bit hex value cover the sign and the exponent. 833 // They are ignored for this test. 834 ::testing::ValuesIn(std::vector<UpCastSignificandCase>( 835 { 836 {0x3F00, 0x600000}, 837 {0x0F00, 0x600000}, 838 {0x0F01, 0x602000}, 839 {0x0FFF, 0x7FE000}, 840 }))); 841 842struct DownCastTest { 843 float source_float; 844 uint16_t expected_half; 845 std::vector<spvutils::round_direction> directions; 846}; 847 848std::string get_round_text(spvutils::round_direction direction) { 849#define CASE(round_direction) \ 850 case round_direction: \ 851 return #round_direction 852 853 switch (direction) { 854 CASE(spvutils::kRoundToZero); 855 CASE(spvutils::kRoundToPositiveInfinity); 856 CASE(spvutils::kRoundToNegativeInfinity); 857 CASE(spvutils::kRoundToNearestEven); 858 } 859#undef CASE 860 return ""; 861} 862 863using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>; 864 865TEST_P(HexFloatFP32To16Tests, NarrowingCasts) { 866 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>; 867 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>; 868 HF f(GetParam().source_float); 869 for (auto round : GetParam().directions) { 870 HF16 half(0); 871 f.castTo(half, round); 872 EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value()) 873 << get_round_text(round) << " " << std::hex 874 << spvutils::BitwiseCast<uint32_t>(GetParam().source_float) 875 << " cast to: " << half.value().getAsFloat().get_value(); 876 } 877} 878 879const uint16_t positive_infinity = 0x7C00; 880const uint16_t negative_infinity = 0xFC00; 881 882INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatFP32To16Tests, 883 ::testing::ValuesIn(std::vector<DownCastTest>( 884 { 885 // Exactly representable as half. 886 {0.f, 0x0, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 887 {-0.f, 0x8000, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 888 {1.0f, 0x3C00, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 889 {-1.0f, 0xBC00, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 890 891 {float_fractions({0, 1, 10}) , 0x3E01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 892 {-float_fractions({0, 1, 10}) , 0xBE01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 893 {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)), 0x4A01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 894 {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)), 0xCA01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 895 896 897 // Underflow 898 {static_cast<float>(ldexp(1.0f, -25)), 0x0, {spvutils::kRoundToZero, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 899 {static_cast<float>(ldexp(1.0f, -25)), 0x1, {spvutils::kRoundToPositiveInfinity}}, 900 {static_cast<float>(-ldexp(1.0f, -25)), 0x8000, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNearestEven}}, 901 {static_cast<float>(-ldexp(1.0f, -25)), 0x8001, {spvutils::kRoundToNegativeInfinity}}, 902 {static_cast<float>(ldexp(1.0f, -24)), 0x1, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 903 904 // Overflow 905 {static_cast<float>(ldexp(1.0f, 16)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 906 {static_cast<float>(ldexp(1.0f, 18)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 907 {static_cast<float>(ldexp(1.3f, 16)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 908 {static_cast<float>(-ldexp(1.0f, 16)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 909 {static_cast<float>(-ldexp(1.0f, 18)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 910 {static_cast<float>(-ldexp(1.3f, 16)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 911 912 // Transfer of Infinities 913 {std::numeric_limits<float>::infinity(), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 914 {-std::numeric_limits<float>::infinity(), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}}, 915 916 // Nans are below because we cannot test for equality. 917 }))); 918 919struct UpCastCase{ 920 uint16_t source_half; 921 float expected_float; 922}; 923 924using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>; 925TEST_P(HexFloatFP16To32Tests, WideningCasts) { 926 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>; 927 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>; 928 HF16 f(GetParam().source_half); 929 930 spvutils::round_direction rounding[] = { 931 spvutils::kRoundToZero, 932 spvutils::kRoundToNearestEven, 933 spvutils::kRoundToPositiveInfinity, 934 spvutils::kRoundToNegativeInfinity}; 935 936 // Everything fits, so everything should just be bit-shifts. 937 for (spvutils::round_direction round : rounding) { 938 HF flt(0.f); 939 f.castTo(flt, round); 940 EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat()) 941 << get_round_text(round) << " " << std::hex 942 << spvutils::BitwiseCast<uint16_t>(GetParam().source_half) 943 << " cast to: " << flt.value().getAsFloat(); 944 } 945} 946 947INSTANTIATE_TEST_SUITE_P(F16ToF32, HexFloatFP16To32Tests, 948 ::testing::ValuesIn(std::vector<UpCastCase>( 949 { 950 {0x0000, 0.f}, 951 {0x8000, -0.f}, 952 {0x3C00, 1.0f}, 953 {0xBC00, -1.0f}, 954 {0x3F00, float_fractions({0, 1, 2})}, 955 {0xBF00, -float_fractions({0, 1, 2})}, 956 {0x3F01, float_fractions({0, 1, 2, 10})}, 957 {0xBF01, -float_fractions({0, 1, 2, 10})}, 958 959 // denorm 960 {0x0001, static_cast<float>(ldexp(1.0, -24))}, 961 {0x0002, static_cast<float>(ldexp(1.0, -23))}, 962 {0x8001, static_cast<float>(-ldexp(1.0, -24))}, 963 {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))}, 964 965 // inf 966 {0x7C00, std::numeric_limits<float>::infinity()}, 967 {0xFC00, -std::numeric_limits<float>::infinity()}, 968 }))); 969 970TEST(HexFloatOperationTests, NanTests) { 971 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>; 972 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>; 973 spvutils::round_direction rounding[] = { 974 spvutils::kRoundToZero, 975 spvutils::kRoundToNearestEven, 976 spvutils::kRoundToPositiveInfinity, 977 spvutils::kRoundToNegativeInfinity}; 978 979 // Everything fits, so everything should just be bit-shifts. 980 for (spvutils::round_direction round : rounding) { 981 HF16 f16(0); 982 HF f(0.f); 983 HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round); 984 EXPECT_TRUE(f16.value().isNan()); 985 HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round); 986 EXPECT_TRUE(f16.value().isNan()); 987 988 HF16(0x7C01).castTo(f, round); 989 EXPECT_TRUE(f.value().isNan()); 990 HF16(0x7C11).castTo(f, round); 991 EXPECT_TRUE(f.value().isNan()); 992 HF16(0xFC01).castTo(f, round); 993 EXPECT_TRUE(f.value().isNan()); 994 HF16(0x7C10).castTo(f, round); 995 EXPECT_TRUE(f.value().isNan()); 996 HF16(0xFF00).castTo(f, round); 997 EXPECT_TRUE(f.value().isNan()); 998 } 999} 1000 1001// A test case for parsing good and bad HexFloat<FloatProxy<T>> literals. 1002template <typename T> 1003struct FloatParseCase { 1004 std::string literal; 1005 bool negate_value; 1006 bool expect_success; 1007 HexFloat<FloatProxy<T>> expected_value; 1008}; 1009 1010using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>; 1011 1012TEST_P(ParseNormalFloatTest, Samples) { 1013 std::stringstream input(GetParam().literal); 1014 HexFloat<FloatProxy<float>> parsed_value(0.0f); 1015 ParseNormalFloat(input, GetParam().negate_value, parsed_value); 1016 EXPECT_NE(GetParam().expect_success, input.fail()) 1017 << " literal: " << GetParam().literal 1018 << " negate: " << GetParam().negate_value; 1019 if (GetParam().expect_success) { 1020 EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value())) 1021 << " literal: " << GetParam().literal 1022 << " negate: " << GetParam().negate_value; 1023 } 1024} 1025 1026// Returns a FloatParseCase with expected failure. 1027template <typename T> 1028FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value, 1029 T expected_value) { 1030 HexFloat<FloatProxy<T>> proxy_expected_value(expected_value); 1031 return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value}; 1032} 1033 1034// Returns a FloatParseCase that should successfully parse to a given value. 1035template <typename T> 1036FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value, 1037 T expected_value) { 1038 HexFloat<FloatProxy<T>> proxy_expected_value(expected_value); 1039 return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value}; 1040} 1041 1042INSTANTIATE_TEST_SUITE_P( 1043 FloatParse, ParseNormalFloatTest, 1044 ::testing::ValuesIn(std::vector<FloatParseCase<float>>{ 1045 // Failing cases due to trivially incorrect syntax. 1046 BadFloatParseCase("abc", false, 0.0f), 1047 BadFloatParseCase("abc", true, 0.0f), 1048 1049 // Valid cases. 1050 GoodFloatParseCase("0", false, 0.0f), 1051 GoodFloatParseCase("0.0", false, 0.0f), 1052 GoodFloatParseCase("-0.0", false, -0.0f), 1053 GoodFloatParseCase("2.0", false, 2.0f), 1054 GoodFloatParseCase("-2.0", false, -2.0f), 1055 GoodFloatParseCase("+2.0", false, 2.0f), 1056 // Cases with negate_value being true. 1057 GoodFloatParseCase("0.0", true, -0.0f), 1058 GoodFloatParseCase("2.0", true, -2.0f), 1059 1060 // When negate_value is true, we should not accept a 1061 // leading minus or plus. 1062 BadFloatParseCase("-0.0", true, 0.0f), 1063 BadFloatParseCase("-2.0", true, 0.0f), 1064 BadFloatParseCase("+0.0", true, 0.0f), 1065 BadFloatParseCase("+2.0", true, 0.0f), 1066 1067 // Overflow is an error for 32-bit float parsing. 1068 BadFloatParseCase("1e40", false, FLT_MAX), 1069 BadFloatParseCase("1e40", true, -FLT_MAX), 1070 BadFloatParseCase("-1e40", false, -FLT_MAX), 1071 // We can't have -1e40 and negate_value == true since 1072 // that represents an original case of "--1e40" which 1073 // is invalid. 1074 })); 1075 1076using ParseNormalFloat16Test = 1077 ::testing::TestWithParam<FloatParseCase<Float16>>; 1078 1079TEST_P(ParseNormalFloat16Test, Samples) { 1080 std::stringstream input(GetParam().literal); 1081 HexFloat<FloatProxy<Float16>> parsed_value(0); 1082 ParseNormalFloat(input, GetParam().negate_value, parsed_value); 1083 EXPECT_NE(GetParam().expect_success, input.fail()) 1084 << " literal: " << GetParam().literal 1085 << " negate: " << GetParam().negate_value; 1086 if (GetParam().expect_success) { 1087 EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value())) 1088 << " literal: " << GetParam().literal 1089 << " negate: " << GetParam().negate_value; 1090 } 1091} 1092 1093INSTANTIATE_TEST_SUITE_P( 1094 Float16Parse, ParseNormalFloat16Test, 1095 ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{ 1096 // Failing cases due to trivially incorrect syntax. 1097 BadFloatParseCase<Float16>("abc", false, uint16_t{0}), 1098 BadFloatParseCase<Float16>("abc", true, uint16_t{0}), 1099 1100 // Valid cases. 1101 GoodFloatParseCase<Float16>("0", false, uint16_t{0}), 1102 GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}), 1103 GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}), 1104 GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}), 1105 GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}), 1106 GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}), 1107 // Cases with negate_value being true. 1108 GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}), 1109 GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}), 1110 1111 // When negate_value is true, we should not accept a leading minus or 1112 // plus. 1113 BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}), 1114 BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}), 1115 BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}), 1116 BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}), 1117 })); 1118 1119// A test case for detecting infinities. 1120template <typename T> 1121struct OverflowParseCase { 1122 std::string input; 1123 bool expect_success; 1124 T expected_value; 1125}; 1126 1127using FloatProxyParseOverflowFloatTest = 1128 ::testing::TestWithParam<OverflowParseCase<float>>; 1129 1130TEST_P(FloatProxyParseOverflowFloatTest, Sample) { 1131 std::istringstream input(GetParam().input); 1132 HexFloat<FloatProxy<float>> value(0.0f); 1133 input >> value; 1134 EXPECT_NE(GetParam().expect_success, input.fail()); 1135 if (GetParam().expect_success) { 1136 EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value); 1137 } 1138} 1139 1140INSTANTIATE_TEST_SUITE_P( 1141 FloatOverflow, FloatProxyParseOverflowFloatTest, 1142 ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({ 1143 {"0", true, 0.0f}, 1144 {"0.0", true, 0.0f}, 1145 {"1.0", true, 1.0f}, 1146 {"1e38", true, 1e38f}, 1147 {"-1e38", true, -1e38f}, 1148 {"1e40", false, FLT_MAX}, 1149 {"-1e40", false, -FLT_MAX}, 1150 {"1e400", false, FLT_MAX}, 1151 {"-1e400", false, -FLT_MAX}, 1152 }))); 1153 1154using FloatProxyParseOverflowDoubleTest = 1155 ::testing::TestWithParam<OverflowParseCase<double>>; 1156 1157TEST_P(FloatProxyParseOverflowDoubleTest, Sample) { 1158 std::istringstream input(GetParam().input); 1159 HexFloat<FloatProxy<double>> value(0.0); 1160 input >> value; 1161 EXPECT_NE(GetParam().expect_success, input.fail()); 1162 if (GetParam().expect_success) { 1163 EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value)); 1164 } 1165} 1166 1167INSTANTIATE_TEST_SUITE_P( 1168 DoubleOverflow, FloatProxyParseOverflowDoubleTest, 1169 ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({ 1170 {"0", true, 0.0}, 1171 {"0.0", true, 0.0}, 1172 {"1.0", true, 1.0}, 1173 {"1e38", true, 1e38}, 1174 {"-1e38", true, -1e38}, 1175 {"1e40", true, 1e40}, 1176 {"-1e40", true, -1e40}, 1177 {"1e400", false, DBL_MAX}, 1178 {"-1e400", false, -DBL_MAX}, 1179 }))); 1180 1181using FloatProxyParseOverflowFloat16Test = 1182 ::testing::TestWithParam<OverflowParseCase<uint16_t>>; 1183 1184TEST_P(FloatProxyParseOverflowFloat16Test, Sample) { 1185 std::istringstream input(GetParam().input); 1186 HexFloat<FloatProxy<Float16>> value(0); 1187 input >> value; 1188 EXPECT_NE(GetParam().expect_success, input.fail()) << " literal: " 1189 << GetParam().input; 1190 if (GetParam().expect_success) { 1191 EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value)) 1192 << " literal: " << GetParam().input; 1193 } 1194} 1195 1196INSTANTIATE_TEST_SUITE_P( 1197 Float16Overflow, FloatProxyParseOverflowFloat16Test, 1198 ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({ 1199 {"0", true, uint16_t{0}}, 1200 {"0.0", true, uint16_t{0}}, 1201 {"1.0", true, uint16_t{0x3c00}}, 1202 // Overflow for 16-bit float is an error, and returns max or 1203 // lowest value. 1204 {"1e38", false, uint16_t{0x7bff}}, 1205 {"1e40", false, uint16_t{0x7bff}}, 1206 {"1e400", false, uint16_t{0x7bff}}, 1207 {"-1e38", false, uint16_t{0xfbff}}, 1208 {"-1e40", false, uint16_t{0xfbff}}, 1209 {"-1e400", false, uint16_t{0xfbff}}, 1210 }))); 1211 1212TEST(FloatProxy, Max) { 1213 EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(), 1214 Eq(uint16_t{0x7bff})); 1215 EXPECT_THAT(FloatProxy<float>::max().getAsFloat(), 1216 Eq(std::numeric_limits<float>::max())); 1217 EXPECT_THAT(FloatProxy<double>::max().getAsFloat(), 1218 Eq(std::numeric_limits<double>::max())); 1219} 1220 1221TEST(FloatProxy, Lowest) { 1222 EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(), 1223 Eq(uint16_t{0xfbff})); 1224 EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(), 1225 Eq(std::numeric_limits<float>::lowest())); 1226 EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(), 1227 Eq(std::numeric_limits<double>::lowest())); 1228} 1229 1230// TODO(awoloszyn): Add fp16 tests and HexFloatTraits. 1231} // anonymous namespace 1232