1e5c31af7Sopenharmony_ci/*------------------------------------------------------------------------- 2e5c31af7Sopenharmony_ci * drawElements Base Portability Library 3e5c31af7Sopenharmony_ci * ------------------------------------- 4e5c31af7Sopenharmony_ci * 5e5c31af7Sopenharmony_ci * Copyright 2014 The Android Open Source Project 6e5c31af7Sopenharmony_ci * 7e5c31af7Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 8e5c31af7Sopenharmony_ci * you may not use this file except in compliance with the License. 9e5c31af7Sopenharmony_ci * You may obtain a copy of the License at 10e5c31af7Sopenharmony_ci * 11e5c31af7Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 12e5c31af7Sopenharmony_ci * 13e5c31af7Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 14e5c31af7Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 15e5c31af7Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16e5c31af7Sopenharmony_ci * See the License for the specific language governing permissions and 17e5c31af7Sopenharmony_ci * limitations under the License. 18e5c31af7Sopenharmony_ci * 19e5c31af7Sopenharmony_ci *//*! 20e5c31af7Sopenharmony_ci * \file 21e5c31af7Sopenharmony_ci * \brief 16-bit floating-point math. 22e5c31af7Sopenharmony_ci *//*--------------------------------------------------------------------*/ 23e5c31af7Sopenharmony_ci 24e5c31af7Sopenharmony_ci#include "deFloat16.h" 25e5c31af7Sopenharmony_ci 26e5c31af7Sopenharmony_ciDE_BEGIN_EXTERN_C 27e5c31af7Sopenharmony_ci 28e5c31af7Sopenharmony_cideFloat16 deFloat32To16 (float val32) 29e5c31af7Sopenharmony_ci{ 30e5c31af7Sopenharmony_ci deUint32 sign; 31e5c31af7Sopenharmony_ci int expotent; 32e5c31af7Sopenharmony_ci deUint32 mantissa; 33e5c31af7Sopenharmony_ci union 34e5c31af7Sopenharmony_ci { 35e5c31af7Sopenharmony_ci float f; 36e5c31af7Sopenharmony_ci deUint32 u; 37e5c31af7Sopenharmony_ci } x; 38e5c31af7Sopenharmony_ci 39e5c31af7Sopenharmony_ci x.f = val32; 40e5c31af7Sopenharmony_ci sign = (x.u >> 16u) & 0x00008000u; 41e5c31af7Sopenharmony_ci expotent = (int)((x.u >> 23u) & 0x000000ffu) - (127 - 15); 42e5c31af7Sopenharmony_ci mantissa = x.u & 0x007fffffu; 43e5c31af7Sopenharmony_ci 44e5c31af7Sopenharmony_ci if (expotent <= 0) 45e5c31af7Sopenharmony_ci { 46e5c31af7Sopenharmony_ci if (expotent < -10) 47e5c31af7Sopenharmony_ci { 48e5c31af7Sopenharmony_ci /* Rounds to zero. */ 49e5c31af7Sopenharmony_ci return (deFloat16) sign; 50e5c31af7Sopenharmony_ci } 51e5c31af7Sopenharmony_ci 52e5c31af7Sopenharmony_ci /* Converted to denormalized half, add leading 1 to significand. */ 53e5c31af7Sopenharmony_ci mantissa = mantissa | 0x00800000u; 54e5c31af7Sopenharmony_ci 55e5c31af7Sopenharmony_ci /* Round mantissa to nearest (10+e) */ 56e5c31af7Sopenharmony_ci { 57e5c31af7Sopenharmony_ci deUint32 t = 14u - expotent; 58e5c31af7Sopenharmony_ci deUint32 a = (1u << (t - 1u)) - 1u; 59e5c31af7Sopenharmony_ci deUint32 b = (mantissa >> t) & 1u; 60e5c31af7Sopenharmony_ci 61e5c31af7Sopenharmony_ci mantissa = (mantissa + a + b) >> t; 62e5c31af7Sopenharmony_ci } 63e5c31af7Sopenharmony_ci 64e5c31af7Sopenharmony_ci return (deFloat16) (sign | mantissa); 65e5c31af7Sopenharmony_ci } 66e5c31af7Sopenharmony_ci else if (expotent == 0xff - (127 - 15)) 67e5c31af7Sopenharmony_ci { 68e5c31af7Sopenharmony_ci if (mantissa == 0u) 69e5c31af7Sopenharmony_ci { 70e5c31af7Sopenharmony_ci /* InF */ 71e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u); 72e5c31af7Sopenharmony_ci } 73e5c31af7Sopenharmony_ci else 74e5c31af7Sopenharmony_ci { 75e5c31af7Sopenharmony_ci /* NaN */ 76e5c31af7Sopenharmony_ci mantissa >>= 13u; 77e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 78e5c31af7Sopenharmony_ci } 79e5c31af7Sopenharmony_ci } 80e5c31af7Sopenharmony_ci else 81e5c31af7Sopenharmony_ci { 82e5c31af7Sopenharmony_ci /* Normalized float. */ 83e5c31af7Sopenharmony_ci mantissa = mantissa + 0x00000fffu + ((mantissa >> 13u) & 1u); 84e5c31af7Sopenharmony_ci 85e5c31af7Sopenharmony_ci if (mantissa & 0x00800000u) 86e5c31af7Sopenharmony_ci { 87e5c31af7Sopenharmony_ci /* Overflow in mantissa. */ 88e5c31af7Sopenharmony_ci mantissa = 0u; 89e5c31af7Sopenharmony_ci expotent += 1; 90e5c31af7Sopenharmony_ci } 91e5c31af7Sopenharmony_ci 92e5c31af7Sopenharmony_ci if (expotent > 30) 93e5c31af7Sopenharmony_ci { 94e5c31af7Sopenharmony_ci /* \todo [pyry] Cause hw fp overflow */ 95e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u); 96e5c31af7Sopenharmony_ci } 97e5c31af7Sopenharmony_ci 98e5c31af7Sopenharmony_ci return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 13u)); 99e5c31af7Sopenharmony_ci } 100e5c31af7Sopenharmony_ci} 101e5c31af7Sopenharmony_ci 102e5c31af7Sopenharmony_cideFloat16 deFloat64To16 (double val64) 103e5c31af7Sopenharmony_ci{ 104e5c31af7Sopenharmony_ci deUint64 sign; 105e5c31af7Sopenharmony_ci long expotent; 106e5c31af7Sopenharmony_ci deUint64 mantissa; 107e5c31af7Sopenharmony_ci union 108e5c31af7Sopenharmony_ci { 109e5c31af7Sopenharmony_ci double f; 110e5c31af7Sopenharmony_ci deUint64 u; 111e5c31af7Sopenharmony_ci } x; 112e5c31af7Sopenharmony_ci 113e5c31af7Sopenharmony_ci x.f = val64; 114e5c31af7Sopenharmony_ci sign = (x.u >> 48u) & 0x00008000u; 115e5c31af7Sopenharmony_ci expotent = (long int)((x.u >> 52u) & 0x000007ffu) - (1023 - 15); 116e5c31af7Sopenharmony_ci mantissa = x.u & 0x00fffffffffffffu; 117e5c31af7Sopenharmony_ci 118e5c31af7Sopenharmony_ci if (expotent <= 0) 119e5c31af7Sopenharmony_ci { 120e5c31af7Sopenharmony_ci if (expotent < -10) 121e5c31af7Sopenharmony_ci { 122e5c31af7Sopenharmony_ci /* Rounds to zero. */ 123e5c31af7Sopenharmony_ci return (deFloat16) sign; 124e5c31af7Sopenharmony_ci } 125e5c31af7Sopenharmony_ci 126e5c31af7Sopenharmony_ci /* Converted to denormalized half, add leading 1 to significand. */ 127e5c31af7Sopenharmony_ci mantissa = mantissa | 0x0010000000000000u; 128e5c31af7Sopenharmony_ci 129e5c31af7Sopenharmony_ci /* Round mantissa to nearest (10+e) */ 130e5c31af7Sopenharmony_ci { 131e5c31af7Sopenharmony_ci deUint64 t = 43u - expotent; 132e5c31af7Sopenharmony_ci deUint64 a = (1u << (t - 1u)) - 1u; 133e5c31af7Sopenharmony_ci deUint64 b = (mantissa >> t) & 1u; 134e5c31af7Sopenharmony_ci 135e5c31af7Sopenharmony_ci mantissa = (mantissa + a + b) >> t; 136e5c31af7Sopenharmony_ci } 137e5c31af7Sopenharmony_ci 138e5c31af7Sopenharmony_ci return (deFloat16) (sign | mantissa); 139e5c31af7Sopenharmony_ci } 140e5c31af7Sopenharmony_ci else if (expotent == 0x7ff - (1023 - 15)) 141e5c31af7Sopenharmony_ci { 142e5c31af7Sopenharmony_ci if (mantissa == 0u) 143e5c31af7Sopenharmony_ci { 144e5c31af7Sopenharmony_ci /* InF */ 145e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u); 146e5c31af7Sopenharmony_ci } 147e5c31af7Sopenharmony_ci else 148e5c31af7Sopenharmony_ci { 149e5c31af7Sopenharmony_ci /* NaN */ 150e5c31af7Sopenharmony_ci mantissa >>= 42u; 151e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 152e5c31af7Sopenharmony_ci } 153e5c31af7Sopenharmony_ci } 154e5c31af7Sopenharmony_ci else 155e5c31af7Sopenharmony_ci { 156e5c31af7Sopenharmony_ci /* Normalized float. */ 157e5c31af7Sopenharmony_ci mantissa = mantissa + 0x000001ffffffffffu + ((mantissa >> 42u) & 1u); 158e5c31af7Sopenharmony_ci 159e5c31af7Sopenharmony_ci if (mantissa & 0x010000000000000u) 160e5c31af7Sopenharmony_ci { 161e5c31af7Sopenharmony_ci /* Overflow in mantissa. */ 162e5c31af7Sopenharmony_ci mantissa = 0u; 163e5c31af7Sopenharmony_ci expotent += 1; 164e5c31af7Sopenharmony_ci } 165e5c31af7Sopenharmony_ci 166e5c31af7Sopenharmony_ci if (expotent > 30) 167e5c31af7Sopenharmony_ci { 168e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u); 169e5c31af7Sopenharmony_ci } 170e5c31af7Sopenharmony_ci 171e5c31af7Sopenharmony_ci return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 42u)); 172e5c31af7Sopenharmony_ci } 173e5c31af7Sopenharmony_ci} 174e5c31af7Sopenharmony_ci 175e5c31af7Sopenharmony_ci/*--------------------------------------------------------------------*//*! 176e5c31af7Sopenharmony_ci * \brief Round the given number `val` to nearest even by discarding 177e5c31af7Sopenharmony_ci * the last `numBitsToDiscard` bits. 178e5c31af7Sopenharmony_ci * \param val value to round 179e5c31af7Sopenharmony_ci * \param numBitsToDiscard number of (least significant) bits to discard 180e5c31af7Sopenharmony_ci * \return The rounded value with the last `numBitsToDiscard` removed 181e5c31af7Sopenharmony_ci *//*--------------------------------------------------------------------*/ 182e5c31af7Sopenharmony_cistatic deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard) 183e5c31af7Sopenharmony_ci{ 184e5c31af7Sopenharmony_ci const deUint32 lastBits = val & ((1 << numBitsToDiscard) - 1); 185e5c31af7Sopenharmony_ci const deUint32 headBit = val & (1 << (numBitsToDiscard - 1)); 186e5c31af7Sopenharmony_ci 187e5c31af7Sopenharmony_ci DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32); /* Make sure no overflow. */ 188e5c31af7Sopenharmony_ci val >>= numBitsToDiscard; 189e5c31af7Sopenharmony_ci 190e5c31af7Sopenharmony_ci if (headBit == 0) 191e5c31af7Sopenharmony_ci { 192e5c31af7Sopenharmony_ci return val; 193e5c31af7Sopenharmony_ci } 194e5c31af7Sopenharmony_ci else if (headBit == lastBits) 195e5c31af7Sopenharmony_ci { 196e5c31af7Sopenharmony_ci if ((val & 0x1) == 0x1) 197e5c31af7Sopenharmony_ci { 198e5c31af7Sopenharmony_ci return val + 1; 199e5c31af7Sopenharmony_ci } 200e5c31af7Sopenharmony_ci else 201e5c31af7Sopenharmony_ci { 202e5c31af7Sopenharmony_ci return val; 203e5c31af7Sopenharmony_ci } 204e5c31af7Sopenharmony_ci } 205e5c31af7Sopenharmony_ci else 206e5c31af7Sopenharmony_ci { 207e5c31af7Sopenharmony_ci return val + 1; 208e5c31af7Sopenharmony_ci } 209e5c31af7Sopenharmony_ci} 210e5c31af7Sopenharmony_ci 211e5c31af7Sopenharmony_cideFloat16 deFloat32To16Round (float val32, deRoundingMode mode) 212e5c31af7Sopenharmony_ci{ 213e5c31af7Sopenharmony_ci union 214e5c31af7Sopenharmony_ci { 215e5c31af7Sopenharmony_ci float f; /* Interpret as 32-bit float */ 216e5c31af7Sopenharmony_ci deUint32 u; /* Interpret as 32-bit unsigned integer */ 217e5c31af7Sopenharmony_ci } x; 218e5c31af7Sopenharmony_ci deUint32 sign; /* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */ 219e5c31af7Sopenharmony_ci deUint32 exp32; /* exp32: biased exponent for 32-bit floats */ 220e5c31af7Sopenharmony_ci int exp16; /* exp16: biased exponent for 16-bit floats */ 221e5c31af7Sopenharmony_ci deUint32 mantissa; 222e5c31af7Sopenharmony_ci 223e5c31af7Sopenharmony_ci /* We only support these two rounding modes for now */ 224e5c31af7Sopenharmony_ci DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN); 225e5c31af7Sopenharmony_ci 226e5c31af7Sopenharmony_ci x.f = val32; 227e5c31af7Sopenharmony_ci sign = (x.u >> 16u) & 0x00008000u; 228e5c31af7Sopenharmony_ci exp32 = (x.u >> 23u) & 0x000000ffu; 229e5c31af7Sopenharmony_ci exp16 = (int) (exp32) - 127 + 15; /* 15/127: exponent bias for 16-bit/32-bit floats */ 230e5c31af7Sopenharmony_ci mantissa = x.u & 0x007fffffu; 231e5c31af7Sopenharmony_ci 232e5c31af7Sopenharmony_ci /* Case: zero and denormalized floats */ 233e5c31af7Sopenharmony_ci if (exp32 == 0) 234e5c31af7Sopenharmony_ci { 235e5c31af7Sopenharmony_ci /* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */ 236e5c31af7Sopenharmony_ci return (deFloat16) sign; 237e5c31af7Sopenharmony_ci } 238e5c31af7Sopenharmony_ci /* Case: Inf and NaN */ 239e5c31af7Sopenharmony_ci else if (exp32 == 0x000000ffu) 240e5c31af7Sopenharmony_ci { 241e5c31af7Sopenharmony_ci if (mantissa == 0u) 242e5c31af7Sopenharmony_ci { 243e5c31af7Sopenharmony_ci /* Inf */ 244e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u); 245e5c31af7Sopenharmony_ci } 246e5c31af7Sopenharmony_ci else 247e5c31af7Sopenharmony_ci { 248e5c31af7Sopenharmony_ci /* NaN */ 249e5c31af7Sopenharmony_ci mantissa >>= 13u; /* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */ 250e5c31af7Sopenharmony_ci /* Make sure we don't turn NaN into zero by | (mantissa == 0). */ 251e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 252e5c31af7Sopenharmony_ci } 253e5c31af7Sopenharmony_ci } 254e5c31af7Sopenharmony_ci /* The following are cases for normalized floats. 255e5c31af7Sopenharmony_ci * 256e5c31af7Sopenharmony_ci * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent, 257e5c31af7Sopenharmony_ci * we can only shift the mantissa further right. 258e5c31af7Sopenharmony_ci * The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent. 259e5c31af7Sopenharmony_ci * Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent. 260e5c31af7Sopenharmony_ci * So, we just need to right shift the mantissa -exp16 bits. 261e5c31af7Sopenharmony_ci * * If exp16 is 0, mantissa shifting requirement is similar to the above. 262e5c31af7Sopenharmony_ci * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats. 263e5c31af7Sopenharmony_ci */ 264e5c31af7Sopenharmony_ci /* Case: normalized floats -> zero */ 265e5c31af7Sopenharmony_ci else if (exp16 < -10) 266e5c31af7Sopenharmony_ci { 267e5c31af7Sopenharmony_ci /* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */ 268e5c31af7Sopenharmony_ci /* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */ 269e5c31af7Sopenharmony_ci return (deFloat16) sign; 270e5c31af7Sopenharmony_ci } 271e5c31af7Sopenharmony_ci /* Case: normalized floats -> zero and denormalized halfs */ 272e5c31af7Sopenharmony_ci else if (exp16 <= 0) 273e5c31af7Sopenharmony_ci { 274e5c31af7Sopenharmony_ci /* Add the implicit leading 1 in mormalized float to mantissa. */ 275e5c31af7Sopenharmony_ci mantissa |= 0x00800000u; 276e5c31af7Sopenharmony_ci /* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa. 277e5c31af7Sopenharmony_ci * Need to discard the last 14-bits considering rounding mode. 278e5c31af7Sopenharmony_ci * We also need to shift right -exp16 bits to encode the underflowed exponent. 279e5c31af7Sopenharmony_ci */ 280e5c31af7Sopenharmony_ci if (mode == DE_ROUNDINGMODE_TO_ZERO) 281e5c31af7Sopenharmony_ci { 282e5c31af7Sopenharmony_ci mantissa >>= (14 - exp16); 283e5c31af7Sopenharmony_ci } 284e5c31af7Sopenharmony_ci else 285e5c31af7Sopenharmony_ci { 286e5c31af7Sopenharmony_ci /* mantissa in the above may exceed 10-bits, in which case overflow happens. 287e5c31af7Sopenharmony_ci * The overflowed bit is automatically carried to exponent then. 288e5c31af7Sopenharmony_ci */ 289e5c31af7Sopenharmony_ci mantissa = roundToNearestEven(mantissa, 14 - exp16); 290e5c31af7Sopenharmony_ci } 291e5c31af7Sopenharmony_ci return (deFloat16) (sign | mantissa); 292e5c31af7Sopenharmony_ci } 293e5c31af7Sopenharmony_ci /* Case: normalized floats -> normalized floats */ 294e5c31af7Sopenharmony_ci else if (exp16 <= 30) 295e5c31af7Sopenharmony_ci { 296e5c31af7Sopenharmony_ci if (mode == DE_ROUNDINGMODE_TO_ZERO) 297e5c31af7Sopenharmony_ci { 298e5c31af7Sopenharmony_ci return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u)); 299e5c31af7Sopenharmony_ci } 300e5c31af7Sopenharmony_ci else 301e5c31af7Sopenharmony_ci { 302e5c31af7Sopenharmony_ci mantissa = roundToNearestEven(mantissa, 13); 303e5c31af7Sopenharmony_ci /* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */ 304e5c31af7Sopenharmony_ci exp16 = (exp16 << 10u) + (mantissa & (1 << 10)); 305e5c31af7Sopenharmony_ci mantissa &= (1u << 10) - 1; 306e5c31af7Sopenharmony_ci return (deFloat16) (sign | ((deUint32) exp16) | mantissa); 307e5c31af7Sopenharmony_ci } 308e5c31af7Sopenharmony_ci } 309e5c31af7Sopenharmony_ci /* Case: normalized floats (too large to be representable as 16-bit floats) */ 310e5c31af7Sopenharmony_ci else 311e5c31af7Sopenharmony_ci { 312e5c31af7Sopenharmony_ci /* According to IEEE Std 754-2008 Section 7.4, 313e5c31af7Sopenharmony_ci * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign 314e5c31af7Sopenharmony_ci * of the intermediate result. 315e5c31af7Sopenharmony_ci * * roundTowardZero carries all overflows to the format's largest finite number 316e5c31af7Sopenharmony_ci * with the sign of the intermediate result. 317e5c31af7Sopenharmony_ci */ 318e5c31af7Sopenharmony_ci if (mode == DE_ROUNDINGMODE_TO_ZERO) 319e5c31af7Sopenharmony_ci { 320e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */ 321e5c31af7Sopenharmony_ci } 322e5c31af7Sopenharmony_ci else 323e5c31af7Sopenharmony_ci { 324e5c31af7Sopenharmony_ci return (deFloat16) (sign | (0x1f << 10)); 325e5c31af7Sopenharmony_ci } 326e5c31af7Sopenharmony_ci } 327e5c31af7Sopenharmony_ci 328e5c31af7Sopenharmony_ci /* Make compiler happy */ 329e5c31af7Sopenharmony_ci return (deFloat16) 0; 330e5c31af7Sopenharmony_ci} 331e5c31af7Sopenharmony_ci 332e5c31af7Sopenharmony_ci/*--------------------------------------------------------------------*//*! 333e5c31af7Sopenharmony_ci * \brief Round the given number `val` to nearest even by discarding 334e5c31af7Sopenharmony_ci * the last `numBitsToDiscard` bits. 335e5c31af7Sopenharmony_ci * \param val value to round 336e5c31af7Sopenharmony_ci * \param numBitsToDiscard number of (least significant) bits to discard 337e5c31af7Sopenharmony_ci * \return The rounded value with the last `numBitsToDiscard` removed 338e5c31af7Sopenharmony_ci *//*--------------------------------------------------------------------*/ 339e5c31af7Sopenharmony_cistatic deUint64 roundToNearestEven64 (deUint64 val, const deUint64 numBitsToDiscard) 340e5c31af7Sopenharmony_ci{ 341e5c31af7Sopenharmony_ci const deUint64 lastBits = val & (((deUint64)1 << numBitsToDiscard) - 1); 342e5c31af7Sopenharmony_ci const deUint64 headBit = val & ((deUint64)1 << (numBitsToDiscard - 1)); 343e5c31af7Sopenharmony_ci 344e5c31af7Sopenharmony_ci DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 64); /* Make sure no overflow. */ 345e5c31af7Sopenharmony_ci val >>= numBitsToDiscard; 346e5c31af7Sopenharmony_ci 347e5c31af7Sopenharmony_ci if (headBit == 0) 348e5c31af7Sopenharmony_ci { 349e5c31af7Sopenharmony_ci return val; 350e5c31af7Sopenharmony_ci } 351e5c31af7Sopenharmony_ci else if (headBit == lastBits) 352e5c31af7Sopenharmony_ci { 353e5c31af7Sopenharmony_ci if ((val & 0x1) == 0x1) 354e5c31af7Sopenharmony_ci { 355e5c31af7Sopenharmony_ci return val + 1; 356e5c31af7Sopenharmony_ci } 357e5c31af7Sopenharmony_ci else 358e5c31af7Sopenharmony_ci { 359e5c31af7Sopenharmony_ci return val; 360e5c31af7Sopenharmony_ci } 361e5c31af7Sopenharmony_ci } 362e5c31af7Sopenharmony_ci else 363e5c31af7Sopenharmony_ci { 364e5c31af7Sopenharmony_ci return val + 1; 365e5c31af7Sopenharmony_ci } 366e5c31af7Sopenharmony_ci} 367e5c31af7Sopenharmony_ci 368e5c31af7Sopenharmony_cideFloat16 deFloat64To16Round (double val64, deRoundingMode mode) 369e5c31af7Sopenharmony_ci{ 370e5c31af7Sopenharmony_ci union 371e5c31af7Sopenharmony_ci { 372e5c31af7Sopenharmony_ci double f; /* Interpret as 64-bit float */ 373e5c31af7Sopenharmony_ci deUint64 u; /* Interpret as 64-bit unsigned integer */ 374e5c31af7Sopenharmony_ci } x; 375e5c31af7Sopenharmony_ci deUint64 sign; /* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */ 376e5c31af7Sopenharmony_ci deUint64 exp64; /* exp32: biased exponent for 64-bit floats */ 377e5c31af7Sopenharmony_ci int exp16; /* exp16: biased exponent for 16-bit floats */ 378e5c31af7Sopenharmony_ci deUint64 mantissa; 379e5c31af7Sopenharmony_ci 380e5c31af7Sopenharmony_ci /* We only support these two rounding modes for now */ 381e5c31af7Sopenharmony_ci DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN); 382e5c31af7Sopenharmony_ci 383e5c31af7Sopenharmony_ci x.f = val64; 384e5c31af7Sopenharmony_ci sign = (x.u >> 48u) & 0x00008000u; 385e5c31af7Sopenharmony_ci exp64 = (x.u >> 52u) & 0x000007ffu; 386e5c31af7Sopenharmony_ci exp16 = (int) (exp64) - 1023 + 15; /* 15/127: exponent bias for 16-bit/32-bit floats */ 387e5c31af7Sopenharmony_ci mantissa = x.u & 0x00fffffffffffffu; 388e5c31af7Sopenharmony_ci 389e5c31af7Sopenharmony_ci /* Case: zero and denormalized floats */ 390e5c31af7Sopenharmony_ci if (exp64 == 0) 391e5c31af7Sopenharmony_ci { 392e5c31af7Sopenharmony_ci /* Denormalized floats are < 2^(1-1023), not representable in 16-bit floats, rounding to zero. */ 393e5c31af7Sopenharmony_ci return (deFloat16) sign; 394e5c31af7Sopenharmony_ci } 395e5c31af7Sopenharmony_ci /* Case: Inf and NaN */ 396e5c31af7Sopenharmony_ci else if (exp64 == 0x000007ffu) 397e5c31af7Sopenharmony_ci { 398e5c31af7Sopenharmony_ci if (mantissa == 0u) 399e5c31af7Sopenharmony_ci { 400e5c31af7Sopenharmony_ci /* Inf */ 401e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u); 402e5c31af7Sopenharmony_ci } 403e5c31af7Sopenharmony_ci else 404e5c31af7Sopenharmony_ci { 405e5c31af7Sopenharmony_ci /* NaN */ 406e5c31af7Sopenharmony_ci mantissa >>= 42u; /* 16-bit floats has 10-bit for mantissa, 42-bit less than 64-bit floats. */ 407e5c31af7Sopenharmony_ci /* Make sure we don't turn NaN into zero by | (mantissa == 0). */ 408e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u)); 409e5c31af7Sopenharmony_ci } 410e5c31af7Sopenharmony_ci } 411e5c31af7Sopenharmony_ci /* The following are cases for normalized floats. 412e5c31af7Sopenharmony_ci * 413e5c31af7Sopenharmony_ci * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent, 414e5c31af7Sopenharmony_ci * we can only shift the mantissa further right. 415e5c31af7Sopenharmony_ci * The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent. 416e5c31af7Sopenharmony_ci * Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent. 417e5c31af7Sopenharmony_ci * So, we just need to right shift the mantissa -exp16 bits. 418e5c31af7Sopenharmony_ci * * If exp16 is 0, mantissa shifting requirement is similar to the above. 419e5c31af7Sopenharmony_ci * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats. 420e5c31af7Sopenharmony_ci */ 421e5c31af7Sopenharmony_ci /* Case: normalized floats -> zero */ 422e5c31af7Sopenharmony_ci else if (exp16 < -10) 423e5c31af7Sopenharmony_ci { 424e5c31af7Sopenharmony_ci /* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */ 425e5c31af7Sopenharmony_ci /* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */ 426e5c31af7Sopenharmony_ci return (deFloat16) sign; 427e5c31af7Sopenharmony_ci } 428e5c31af7Sopenharmony_ci /* Case: normalized floats -> zero and denormalized halfs */ 429e5c31af7Sopenharmony_ci else if (exp16 <= 0) 430e5c31af7Sopenharmony_ci { 431e5c31af7Sopenharmony_ci /* Add the implicit leading 1 in mormalized float to mantissa. */ 432e5c31af7Sopenharmony_ci mantissa |= 0x0010000000000000u; 433e5c31af7Sopenharmony_ci /* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa. 434e5c31af7Sopenharmony_ci * Need to discard the last 14-bits considering rounding mode. 435e5c31af7Sopenharmony_ci * We also need to shift right -exp16 bits to encode the underflowed exponent. 436e5c31af7Sopenharmony_ci */ 437e5c31af7Sopenharmony_ci if (mode == DE_ROUNDINGMODE_TO_ZERO) 438e5c31af7Sopenharmony_ci { 439e5c31af7Sopenharmony_ci mantissa >>= (43 - exp16); 440e5c31af7Sopenharmony_ci } 441e5c31af7Sopenharmony_ci else 442e5c31af7Sopenharmony_ci { 443e5c31af7Sopenharmony_ci /* mantissa in the above may exceed 10-bits, in which case overflow happens. 444e5c31af7Sopenharmony_ci * The overflowed bit is automatically carried to exponent then. 445e5c31af7Sopenharmony_ci */ 446e5c31af7Sopenharmony_ci mantissa = roundToNearestEven64(mantissa, 43 - exp16); 447e5c31af7Sopenharmony_ci } 448e5c31af7Sopenharmony_ci return (deFloat16) (sign | mantissa); 449e5c31af7Sopenharmony_ci } 450e5c31af7Sopenharmony_ci /* Case: normalized floats -> normalized floats */ 451e5c31af7Sopenharmony_ci else if (exp16 <= 30) 452e5c31af7Sopenharmony_ci { 453e5c31af7Sopenharmony_ci if (mode == DE_ROUNDINGMODE_TO_ZERO) 454e5c31af7Sopenharmony_ci { 455e5c31af7Sopenharmony_ci return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 42u)); 456e5c31af7Sopenharmony_ci } 457e5c31af7Sopenharmony_ci else 458e5c31af7Sopenharmony_ci { 459e5c31af7Sopenharmony_ci mantissa = roundToNearestEven64(mantissa, 42); 460e5c31af7Sopenharmony_ci /* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */ 461e5c31af7Sopenharmony_ci exp16 = (exp16 << 10u) + (deFloat16)(mantissa & (1 << 10)); 462e5c31af7Sopenharmony_ci mantissa &= (1u << 10) - 1; 463e5c31af7Sopenharmony_ci return (deFloat16) (sign | ((deUint32) exp16) | mantissa); 464e5c31af7Sopenharmony_ci } 465e5c31af7Sopenharmony_ci } 466e5c31af7Sopenharmony_ci /* Case: normalized floats (too large to be representable as 16-bit floats) */ 467e5c31af7Sopenharmony_ci else 468e5c31af7Sopenharmony_ci { 469e5c31af7Sopenharmony_ci /* According to IEEE Std 754-2008 Section 7.4, 470e5c31af7Sopenharmony_ci * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign 471e5c31af7Sopenharmony_ci * of the intermediate result. 472e5c31af7Sopenharmony_ci * * roundTowardZero carries all overflows to the format's largest finite number 473e5c31af7Sopenharmony_ci * with the sign of the intermediate result. 474e5c31af7Sopenharmony_ci */ 475e5c31af7Sopenharmony_ci if (mode == DE_ROUNDINGMODE_TO_ZERO) 476e5c31af7Sopenharmony_ci { 477e5c31af7Sopenharmony_ci return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */ 478e5c31af7Sopenharmony_ci } 479e5c31af7Sopenharmony_ci else 480e5c31af7Sopenharmony_ci { 481e5c31af7Sopenharmony_ci return (deFloat16) (sign | (0x1f << 10)); 482e5c31af7Sopenharmony_ci } 483e5c31af7Sopenharmony_ci } 484e5c31af7Sopenharmony_ci 485e5c31af7Sopenharmony_ci /* Make compiler happy */ 486e5c31af7Sopenharmony_ci return (deFloat16) 0; 487e5c31af7Sopenharmony_ci} 488e5c31af7Sopenharmony_ci 489e5c31af7Sopenharmony_cifloat deFloat16To32 (deFloat16 val16) 490e5c31af7Sopenharmony_ci{ 491e5c31af7Sopenharmony_ci deUint32 sign; 492e5c31af7Sopenharmony_ci deUint32 expotent; 493e5c31af7Sopenharmony_ci deUint32 mantissa; 494e5c31af7Sopenharmony_ci union 495e5c31af7Sopenharmony_ci { 496e5c31af7Sopenharmony_ci float f; 497e5c31af7Sopenharmony_ci deUint32 u; 498e5c31af7Sopenharmony_ci } x; 499e5c31af7Sopenharmony_ci 500e5c31af7Sopenharmony_ci x.u = 0u; 501e5c31af7Sopenharmony_ci 502e5c31af7Sopenharmony_ci sign = ((deUint32)val16 >> 15u) & 0x00000001u; 503e5c31af7Sopenharmony_ci expotent = ((deUint32)val16 >> 10u) & 0x0000001fu; 504e5c31af7Sopenharmony_ci mantissa = (deUint32)val16 & 0x000003ffu; 505e5c31af7Sopenharmony_ci 506e5c31af7Sopenharmony_ci if (expotent == 0u) 507e5c31af7Sopenharmony_ci { 508e5c31af7Sopenharmony_ci if (mantissa == 0u) 509e5c31af7Sopenharmony_ci { 510e5c31af7Sopenharmony_ci /* +/- 0 */ 511e5c31af7Sopenharmony_ci x.u = sign << 31u; 512e5c31af7Sopenharmony_ci return x.f; 513e5c31af7Sopenharmony_ci } 514e5c31af7Sopenharmony_ci else 515e5c31af7Sopenharmony_ci { 516e5c31af7Sopenharmony_ci /* Denormalized, normalize it. */ 517e5c31af7Sopenharmony_ci 518e5c31af7Sopenharmony_ci while (!(mantissa & 0x00000400u)) 519e5c31af7Sopenharmony_ci { 520e5c31af7Sopenharmony_ci mantissa <<= 1u; 521e5c31af7Sopenharmony_ci expotent -= 1u; 522e5c31af7Sopenharmony_ci } 523e5c31af7Sopenharmony_ci 524e5c31af7Sopenharmony_ci expotent += 1u; 525e5c31af7Sopenharmony_ci mantissa &= ~0x00000400u; 526e5c31af7Sopenharmony_ci } 527e5c31af7Sopenharmony_ci } 528e5c31af7Sopenharmony_ci else if (expotent == 31u) 529e5c31af7Sopenharmony_ci { 530e5c31af7Sopenharmony_ci if (mantissa == 0u) 531e5c31af7Sopenharmony_ci { 532e5c31af7Sopenharmony_ci /* +/- InF */ 533e5c31af7Sopenharmony_ci x.u = (sign << 31u) | 0x7f800000u; 534e5c31af7Sopenharmony_ci return x.f; 535e5c31af7Sopenharmony_ci } 536e5c31af7Sopenharmony_ci else 537e5c31af7Sopenharmony_ci { 538e5c31af7Sopenharmony_ci /* +/- NaN */ 539e5c31af7Sopenharmony_ci x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u); 540e5c31af7Sopenharmony_ci return x.f; 541e5c31af7Sopenharmony_ci } 542e5c31af7Sopenharmony_ci } 543e5c31af7Sopenharmony_ci 544e5c31af7Sopenharmony_ci expotent = expotent + (127u - 15u); 545e5c31af7Sopenharmony_ci mantissa = mantissa << 13u; 546e5c31af7Sopenharmony_ci 547e5c31af7Sopenharmony_ci x.u = (sign << 31u) | (expotent << 23u) | mantissa; 548e5c31af7Sopenharmony_ci return x.f; 549e5c31af7Sopenharmony_ci} 550e5c31af7Sopenharmony_ci 551e5c31af7Sopenharmony_cidouble deFloat16To64 (deFloat16 val16) 552e5c31af7Sopenharmony_ci{ 553e5c31af7Sopenharmony_ci deUint64 sign; 554e5c31af7Sopenharmony_ci deUint64 expotent; 555e5c31af7Sopenharmony_ci deUint64 mantissa; 556e5c31af7Sopenharmony_ci union 557e5c31af7Sopenharmony_ci { 558e5c31af7Sopenharmony_ci double f; 559e5c31af7Sopenharmony_ci deUint64 u; 560e5c31af7Sopenharmony_ci } x; 561e5c31af7Sopenharmony_ci 562e5c31af7Sopenharmony_ci x.u = 0u; 563e5c31af7Sopenharmony_ci 564e5c31af7Sopenharmony_ci sign = ((deUint32)val16 >> 15u) & 0x00000001u; 565e5c31af7Sopenharmony_ci expotent = ((deUint32)val16 >> 10u) & 0x0000001fu; 566e5c31af7Sopenharmony_ci mantissa = (deUint32)val16 & 0x000003ffu; 567e5c31af7Sopenharmony_ci 568e5c31af7Sopenharmony_ci if (expotent == 0u) 569e5c31af7Sopenharmony_ci { 570e5c31af7Sopenharmony_ci if (mantissa == 0u) 571e5c31af7Sopenharmony_ci { 572e5c31af7Sopenharmony_ci /* +/- 0 */ 573e5c31af7Sopenharmony_ci x.u = sign << 63u; 574e5c31af7Sopenharmony_ci return x.f; 575e5c31af7Sopenharmony_ci } 576e5c31af7Sopenharmony_ci else 577e5c31af7Sopenharmony_ci { 578e5c31af7Sopenharmony_ci /* Denormalized, normalize it. */ 579e5c31af7Sopenharmony_ci 580e5c31af7Sopenharmony_ci while (!(mantissa & 0x00000400u)) 581e5c31af7Sopenharmony_ci { 582e5c31af7Sopenharmony_ci mantissa <<= 1u; 583e5c31af7Sopenharmony_ci expotent -= 1u; 584e5c31af7Sopenharmony_ci } 585e5c31af7Sopenharmony_ci 586e5c31af7Sopenharmony_ci expotent += 1u; 587e5c31af7Sopenharmony_ci mantissa &= ~0x00000400u; 588e5c31af7Sopenharmony_ci } 589e5c31af7Sopenharmony_ci } 590e5c31af7Sopenharmony_ci else if (expotent == 31u) 591e5c31af7Sopenharmony_ci { 592e5c31af7Sopenharmony_ci if (mantissa == 0u) 593e5c31af7Sopenharmony_ci { 594e5c31af7Sopenharmony_ci /* +/- InF */ 595e5c31af7Sopenharmony_ci x.u = (sign << 63u) | 0x7ff0000000000000u; 596e5c31af7Sopenharmony_ci return x.f; 597e5c31af7Sopenharmony_ci } 598e5c31af7Sopenharmony_ci else 599e5c31af7Sopenharmony_ci { 600e5c31af7Sopenharmony_ci /* +/- NaN */ 601e5c31af7Sopenharmony_ci x.u = (sign << 63u) | 0x7ff0000000000000u | (mantissa << 42u); 602e5c31af7Sopenharmony_ci return x.f; 603e5c31af7Sopenharmony_ci } 604e5c31af7Sopenharmony_ci } 605e5c31af7Sopenharmony_ci 606e5c31af7Sopenharmony_ci expotent = expotent + (1023u - 15u); 607e5c31af7Sopenharmony_ci mantissa = mantissa << 42u; 608e5c31af7Sopenharmony_ci 609e5c31af7Sopenharmony_ci x.u = (sign << 63u) | (expotent << 52u) | mantissa; 610e5c31af7Sopenharmony_ci return x.f; 611e5c31af7Sopenharmony_ci} 612e5c31af7Sopenharmony_ci 613e5c31af7Sopenharmony_ciDE_END_EXTERN_C 614