1/*------------------------------------------------------------------------- 2 * drawElements Base Portability Library 3 * ------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief 16-bit floating-point math. 22 *//*--------------------------------------------------------------------*/ 23 24#include "deFloat16.h" 25 26DE_BEGIN_EXTERN_C 27 28deFloat16 deFloat32To16 (float val32) 29{ 30 deUint32 sign; 31 int expotent; 32 deUint32 mantissa; 33 union 34 { 35 float f; 36 deUint32 u; 37 } x; 38 39 x.f = val32; 40 sign = (x.u >> 16u) & 0x00008000u; 41 expotent = (int)((x.u >> 23u) & 0x000000ffu) - (127 - 15); 42 mantissa = x.u & 0x007fffffu; 43 44 if (expotent <= 0) 45 { 46 if (expotent < -10) 47 { 48 /* Rounds to zero. */ 49 return (deFloat16) sign; 50 } 51 52 /* Converted to denormalized half, add leading 1 to significand. */ 53 mantissa = mantissa | 0x00800000u; 54 55 /* Round mantissa to nearest (10+e) */ 56 { 57 deUint32 t = 14u - expotent; 58 deUint32 a = (1u << (t - 1u)) - 1u; 59 deUint32 b = (mantissa >> t) & 1u; 60 61 mantissa = (mantissa + a + b) >> t; 62 } 63 64 return (deFloat16) (sign | mantissa); 65 } 66 else if (expotent == 0xff - (127 - 15)) 67 { 68 if (mantissa == 0u) 69 { 70 /* InF */ 71 return (deFloat16) (sign | 0x7c00u); 72 } 73 else 74 { 75 /* NaN */ 76 mantissa >>= 13u; 77 return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 78 } 79 } 80 else 81 { 82 /* Normalized float. */ 83 mantissa = mantissa + 0x00000fffu + ((mantissa >> 13u) & 1u); 84 85 if (mantissa & 0x00800000u) 86 { 87 /* Overflow in mantissa. */ 88 mantissa = 0u; 89 expotent += 1; 90 } 91 92 if (expotent > 30) 93 { 94 /* \todo [pyry] Cause hw fp overflow */ 95 return (deFloat16) (sign | 0x7c00u); 96 } 97 98 return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 13u)); 99 } 100} 101 102deFloat16 deFloat64To16 (double val64) 103{ 104 deUint64 sign; 105 long expotent; 106 deUint64 mantissa; 107 union 108 { 109 double f; 110 deUint64 u; 111 } x; 112 113 x.f = val64; 114 sign = (x.u >> 48u) & 0x00008000u; 115 expotent = (long int)((x.u >> 52u) & 0x000007ffu) - (1023 - 15); 116 mantissa = x.u & 0x00fffffffffffffu; 117 118 if (expotent <= 0) 119 { 120 if (expotent < -10) 121 { 122 /* Rounds to zero. */ 123 return (deFloat16) sign; 124 } 125 126 /* Converted to denormalized half, add leading 1 to significand. */ 127 mantissa = mantissa | 0x0010000000000000u; 128 129 /* Round mantissa to nearest (10+e) */ 130 { 131 deUint64 t = 43u - expotent; 132 deUint64 a = (1u << (t - 1u)) - 1u; 133 deUint64 b = (mantissa >> t) & 1u; 134 135 mantissa = (mantissa + a + b) >> t; 136 } 137 138 return (deFloat16) (sign | mantissa); 139 } 140 else if (expotent == 0x7ff - (1023 - 15)) 141 { 142 if (mantissa == 0u) 143 { 144 /* InF */ 145 return (deFloat16) (sign | 0x7c00u); 146 } 147 else 148 { 149 /* NaN */ 150 mantissa >>= 42u; 151 return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 152 } 153 } 154 else 155 { 156 /* Normalized float. */ 157 mantissa = mantissa + 0x000001ffffffffffu + ((mantissa >> 42u) & 1u); 158 159 if (mantissa & 0x010000000000000u) 160 { 161 /* Overflow in mantissa. */ 162 mantissa = 0u; 163 expotent += 1; 164 } 165 166 if (expotent > 30) 167 { 168 return (deFloat16) (sign | 0x7c00u); 169 } 170 171 return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 42u)); 172 } 173} 174 175/*--------------------------------------------------------------------*//*! 176 * \brief Round the given number `val` to nearest even by discarding 177 * the last `numBitsToDiscard` bits. 178 * \param val value to round 179 * \param numBitsToDiscard number of (least significant) bits to discard 180 * \return The rounded value with the last `numBitsToDiscard` removed 181 *//*--------------------------------------------------------------------*/ 182static deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard) 183{ 184 const deUint32 lastBits = val & ((1 << numBitsToDiscard) - 1); 185 const deUint32 headBit = val & (1 << (numBitsToDiscard - 1)); 186 187 DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32); /* Make sure no overflow. */ 188 val >>= numBitsToDiscard; 189 190 if (headBit == 0) 191 { 192 return val; 193 } 194 else if (headBit == lastBits) 195 { 196 if ((val & 0x1) == 0x1) 197 { 198 return val + 1; 199 } 200 else 201 { 202 return val; 203 } 204 } 205 else 206 { 207 return val + 1; 208 } 209} 210 211deFloat16 deFloat32To16Round (float val32, deRoundingMode mode) 212{ 213 union 214 { 215 float f; /* Interpret as 32-bit float */ 216 deUint32 u; /* Interpret as 32-bit unsigned integer */ 217 } x; 218 deUint32 sign; /* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */ 219 deUint32 exp32; /* exp32: biased exponent for 32-bit floats */ 220 int exp16; /* exp16: biased exponent for 16-bit floats */ 221 deUint32 mantissa; 222 223 /* We only support these two rounding modes for now */ 224 DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN); 225 226 x.f = val32; 227 sign = (x.u >> 16u) & 0x00008000u; 228 exp32 = (x.u >> 23u) & 0x000000ffu; 229 exp16 = (int) (exp32) - 127 + 15; /* 15/127: exponent bias for 16-bit/32-bit floats */ 230 mantissa = x.u & 0x007fffffu; 231 232 /* Case: zero and denormalized floats */ 233 if (exp32 == 0) 234 { 235 /* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */ 236 return (deFloat16) sign; 237 } 238 /* Case: Inf and NaN */ 239 else if (exp32 == 0x000000ffu) 240 { 241 if (mantissa == 0u) 242 { 243 /* Inf */ 244 return (deFloat16) (sign | 0x7c00u); 245 } 246 else 247 { 248 /* NaN */ 249 mantissa >>= 13u; /* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */ 250 /* Make sure we don't turn NaN into zero by | (mantissa == 0). */ 251 return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 252 } 253 } 254 /* The following are cases for normalized floats. 255 * 256 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent, 257 * we can only shift the mantissa further right. 258 * The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent. 259 * Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent. 260 * So, we just need to right shift the mantissa -exp16 bits. 261 * * If exp16 is 0, mantissa shifting requirement is similar to the above. 262 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats. 263 */ 264 /* Case: normalized floats -> zero */ 265 else if (exp16 < -10) 266 { 267 /* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */ 268 /* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */ 269 return (deFloat16) sign; 270 } 271 /* Case: normalized floats -> zero and denormalized halfs */ 272 else if (exp16 <= 0) 273 { 274 /* Add the implicit leading 1 in mormalized float to mantissa. */ 275 mantissa |= 0x00800000u; 276 /* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa. 277 * Need to discard the last 14-bits considering rounding mode. 278 * We also need to shift right -exp16 bits to encode the underflowed exponent. 279 */ 280 if (mode == DE_ROUNDINGMODE_TO_ZERO) 281 { 282 mantissa >>= (14 - exp16); 283 } 284 else 285 { 286 /* mantissa in the above may exceed 10-bits, in which case overflow happens. 287 * The overflowed bit is automatically carried to exponent then. 288 */ 289 mantissa = roundToNearestEven(mantissa, 14 - exp16); 290 } 291 return (deFloat16) (sign | mantissa); 292 } 293 /* Case: normalized floats -> normalized floats */ 294 else if (exp16 <= 30) 295 { 296 if (mode == DE_ROUNDINGMODE_TO_ZERO) 297 { 298 return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u)); 299 } 300 else 301 { 302 mantissa = roundToNearestEven(mantissa, 13); 303 /* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */ 304 exp16 = (exp16 << 10u) + (mantissa & (1 << 10)); 305 mantissa &= (1u << 10) - 1; 306 return (deFloat16) (sign | ((deUint32) exp16) | mantissa); 307 } 308 } 309 /* Case: normalized floats (too large to be representable as 16-bit floats) */ 310 else 311 { 312 /* According to IEEE Std 754-2008 Section 7.4, 313 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign 314 * of the intermediate result. 315 * * roundTowardZero carries all overflows to the format's largest finite number 316 * with the sign of the intermediate result. 317 */ 318 if (mode == DE_ROUNDINGMODE_TO_ZERO) 319 { 320 return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */ 321 } 322 else 323 { 324 return (deFloat16) (sign | (0x1f << 10)); 325 } 326 } 327 328 /* Make compiler happy */ 329 return (deFloat16) 0; 330} 331 332/*--------------------------------------------------------------------*//*! 333 * \brief Round the given number `val` to nearest even by discarding 334 * the last `numBitsToDiscard` bits. 335 * \param val value to round 336 * \param numBitsToDiscard number of (least significant) bits to discard 337 * \return The rounded value with the last `numBitsToDiscard` removed 338 *//*--------------------------------------------------------------------*/ 339static deUint64 roundToNearestEven64 (deUint64 val, const deUint64 numBitsToDiscard) 340{ 341 const deUint64 lastBits = val & (((deUint64)1 << numBitsToDiscard) - 1); 342 const deUint64 headBit = val & ((deUint64)1 << (numBitsToDiscard - 1)); 343 344 DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 64); /* Make sure no overflow. */ 345 val >>= numBitsToDiscard; 346 347 if (headBit == 0) 348 { 349 return val; 350 } 351 else if (headBit == lastBits) 352 { 353 if ((val & 0x1) == 0x1) 354 { 355 return val + 1; 356 } 357 else 358 { 359 return val; 360 } 361 } 362 else 363 { 364 return val + 1; 365 } 366} 367 368deFloat16 deFloat64To16Round (double val64, deRoundingMode mode) 369{ 370 union 371 { 372 double f; /* Interpret as 64-bit float */ 373 deUint64 u; /* Interpret as 64-bit unsigned integer */ 374 } x; 375 deUint64 sign; /* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */ 376 deUint64 exp64; /* exp32: biased exponent for 64-bit floats */ 377 int exp16; /* exp16: biased exponent for 16-bit floats */ 378 deUint64 mantissa; 379 380 /* We only support these two rounding modes for now */ 381 DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN); 382 383 x.f = val64; 384 sign = (x.u >> 48u) & 0x00008000u; 385 exp64 = (x.u >> 52u) & 0x000007ffu; 386 exp16 = (int) (exp64) - 1023 + 15; /* 15/127: exponent bias for 16-bit/32-bit floats */ 387 mantissa = x.u & 0x00fffffffffffffu; 388 389 /* Case: zero and denormalized floats */ 390 if (exp64 == 0) 391 { 392 /* Denormalized floats are < 2^(1-1023), not representable in 16-bit floats, rounding to zero. */ 393 return (deFloat16) sign; 394 } 395 /* Case: Inf and NaN */ 396 else if (exp64 == 0x000007ffu) 397 { 398 if (mantissa == 0u) 399 { 400 /* Inf */ 401 return (deFloat16) (sign | 0x7c00u); 402 } 403 else 404 { 405 /* NaN */ 406 mantissa >>= 42u; /* 16-bit floats has 10-bit for mantissa, 42-bit less than 64-bit floats. */ 407 /* Make sure we don't turn NaN into zero by | (mantissa == 0). */ 408 return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 409 } 410 } 411 /* The following are cases for normalized floats. 412 * 413 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent, 414 * we can only shift the mantissa further right. 415 * The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent. 416 * Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent. 417 * So, we just need to right shift the mantissa -exp16 bits. 418 * * If exp16 is 0, mantissa shifting requirement is similar to the above. 419 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats. 420 */ 421 /* Case: normalized floats -> zero */ 422 else if (exp16 < -10) 423 { 424 /* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */ 425 /* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */ 426 return (deFloat16) sign; 427 } 428 /* Case: normalized floats -> zero and denormalized halfs */ 429 else if (exp16 <= 0) 430 { 431 /* Add the implicit leading 1 in mormalized float to mantissa. */ 432 mantissa |= 0x0010000000000000u; 433 /* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa. 434 * Need to discard the last 14-bits considering rounding mode. 435 * We also need to shift right -exp16 bits to encode the underflowed exponent. 436 */ 437 if (mode == DE_ROUNDINGMODE_TO_ZERO) 438 { 439 mantissa >>= (43 - exp16); 440 } 441 else 442 { 443 /* mantissa in the above may exceed 10-bits, in which case overflow happens. 444 * The overflowed bit is automatically carried to exponent then. 445 */ 446 mantissa = roundToNearestEven64(mantissa, 43 - exp16); 447 } 448 return (deFloat16) (sign | mantissa); 449 } 450 /* Case: normalized floats -> normalized floats */ 451 else if (exp16 <= 30) 452 { 453 if (mode == DE_ROUNDINGMODE_TO_ZERO) 454 { 455 return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 42u)); 456 } 457 else 458 { 459 mantissa = roundToNearestEven64(mantissa, 42); 460 /* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */ 461 exp16 = (exp16 << 10u) + (deFloat16)(mantissa & (1 << 10)); 462 mantissa &= (1u << 10) - 1; 463 return (deFloat16) (sign | ((deUint32) exp16) | mantissa); 464 } 465 } 466 /* Case: normalized floats (too large to be representable as 16-bit floats) */ 467 else 468 { 469 /* According to IEEE Std 754-2008 Section 7.4, 470 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign 471 * of the intermediate result. 472 * * roundTowardZero carries all overflows to the format's largest finite number 473 * with the sign of the intermediate result. 474 */ 475 if (mode == DE_ROUNDINGMODE_TO_ZERO) 476 { 477 return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */ 478 } 479 else 480 { 481 return (deFloat16) (sign | (0x1f << 10)); 482 } 483 } 484 485 /* Make compiler happy */ 486 return (deFloat16) 0; 487} 488 489float deFloat16To32 (deFloat16 val16) 490{ 491 deUint32 sign; 492 deUint32 expotent; 493 deUint32 mantissa; 494 union 495 { 496 float f; 497 deUint32 u; 498 } x; 499 500 x.u = 0u; 501 502 sign = ((deUint32)val16 >> 15u) & 0x00000001u; 503 expotent = ((deUint32)val16 >> 10u) & 0x0000001fu; 504 mantissa = (deUint32)val16 & 0x000003ffu; 505 506 if (expotent == 0u) 507 { 508 if (mantissa == 0u) 509 { 510 /* +/- 0 */ 511 x.u = sign << 31u; 512 return x.f; 513 } 514 else 515 { 516 /* Denormalized, normalize it. */ 517 518 while (!(mantissa & 0x00000400u)) 519 { 520 mantissa <<= 1u; 521 expotent -= 1u; 522 } 523 524 expotent += 1u; 525 mantissa &= ~0x00000400u; 526 } 527 } 528 else if (expotent == 31u) 529 { 530 if (mantissa == 0u) 531 { 532 /* +/- InF */ 533 x.u = (sign << 31u) | 0x7f800000u; 534 return x.f; 535 } 536 else 537 { 538 /* +/- NaN */ 539 x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u); 540 return x.f; 541 } 542 } 543 544 expotent = expotent + (127u - 15u); 545 mantissa = mantissa << 13u; 546 547 x.u = (sign << 31u) | (expotent << 23u) | mantissa; 548 return x.f; 549} 550 551double deFloat16To64 (deFloat16 val16) 552{ 553 deUint64 sign; 554 deUint64 expotent; 555 deUint64 mantissa; 556 union 557 { 558 double f; 559 deUint64 u; 560 } x; 561 562 x.u = 0u; 563 564 sign = ((deUint32)val16 >> 15u) & 0x00000001u; 565 expotent = ((deUint32)val16 >> 10u) & 0x0000001fu; 566 mantissa = (deUint32)val16 & 0x000003ffu; 567 568 if (expotent == 0u) 569 { 570 if (mantissa == 0u) 571 { 572 /* +/- 0 */ 573 x.u = sign << 63u; 574 return x.f; 575 } 576 else 577 { 578 /* Denormalized, normalize it. */ 579 580 while (!(mantissa & 0x00000400u)) 581 { 582 mantissa <<= 1u; 583 expotent -= 1u; 584 } 585 586 expotent += 1u; 587 mantissa &= ~0x00000400u; 588 } 589 } 590 else if (expotent == 31u) 591 { 592 if (mantissa == 0u) 593 { 594 /* +/- InF */ 595 x.u = (sign << 63u) | 0x7ff0000000000000u; 596 return x.f; 597 } 598 else 599 { 600 /* +/- NaN */ 601 x.u = (sign << 63u) | 0x7ff0000000000000u | (mantissa << 42u); 602 return x.f; 603 } 604 } 605 606 expotent = expotent + (1023u - 15u); 607 mantissa = mantissa << 42u; 608 609 x.u = (sign << 63u) | (expotent << 52u) | mantissa; 610 return x.f; 611} 612 613DE_END_EXTERN_C 614