1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Mesa 3-D graphics library 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. 5bf215546Sopenharmony_ci * Copyright 2015 Philip Taylor <philip@zaynar.co.uk> 6bf215546Sopenharmony_ci * Copyright 2018 Advanced Micro Devices, Inc. 7bf215546Sopenharmony_ci * Copyright (C) 2018-2019 Intel Corporation 8bf215546Sopenharmony_ci * 9bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 10bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 11bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 12bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 14bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included 17bf215546Sopenharmony_ci * in all copies or substantial portions of the Software. 18bf215546Sopenharmony_ci * 19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 23bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 24bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include <math.h> 29bf215546Sopenharmony_ci#include <assert.h> 30bf215546Sopenharmony_ci#include "half_float.h" 31bf215546Sopenharmony_ci#include "rounding.h" 32bf215546Sopenharmony_ci#include "softfloat.h" 33bf215546Sopenharmony_ci#include "macros.h" 34bf215546Sopenharmony_ci#include "u_math.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_citypedef union { float f; int32_t i; uint32_t u; } fi_type; 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci/** 39bf215546Sopenharmony_ci * Convert a 4-byte float to a 2-byte half float. 40bf215546Sopenharmony_ci * 41bf215546Sopenharmony_ci * Not all float32 values can be represented exactly as a float16 value. We 42bf215546Sopenharmony_ci * round such intermediate float32 values to the nearest float16. When the 43bf215546Sopenharmony_ci * float32 lies exactly between to float16 values, we round to the one with 44bf215546Sopenharmony_ci * an even mantissa. 45bf215546Sopenharmony_ci * 46bf215546Sopenharmony_ci * This rounding behavior has several benefits: 47bf215546Sopenharmony_ci * - It has no sign bias. 48bf215546Sopenharmony_ci * 49bf215546Sopenharmony_ci * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's 50bf215546Sopenharmony_ci * GPU ISA. 51bf215546Sopenharmony_ci * 52bf215546Sopenharmony_ci * - By reproducing the behavior of the GPU (at least on Intel hardware), 53bf215546Sopenharmony_ci * compile-time evaluation of constant packHalf2x16 GLSL expressions will 54bf215546Sopenharmony_ci * result in the same value as if the expression were executed on the GPU. 55bf215546Sopenharmony_ci */ 56bf215546Sopenharmony_ciuint16_t 57bf215546Sopenharmony_ci_mesa_float_to_half_slow(float val) 58bf215546Sopenharmony_ci{ 59bf215546Sopenharmony_ci const fi_type fi = {val}; 60bf215546Sopenharmony_ci const int flt_m = fi.i & 0x7fffff; 61bf215546Sopenharmony_ci const int flt_e = (fi.i >> 23) & 0xff; 62bf215546Sopenharmony_ci const int flt_s = (fi.i >> 31) & 0x1; 63bf215546Sopenharmony_ci int s, e, m = 0; 64bf215546Sopenharmony_ci uint16_t result; 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci /* sign bit */ 67bf215546Sopenharmony_ci s = flt_s; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci /* handle special cases */ 70bf215546Sopenharmony_ci if ((flt_e == 0) && (flt_m == 0)) { 71bf215546Sopenharmony_ci /* zero */ 72bf215546Sopenharmony_ci /* m = 0; - already set */ 73bf215546Sopenharmony_ci e = 0; 74bf215546Sopenharmony_ci } 75bf215546Sopenharmony_ci else if ((flt_e == 0) && (flt_m != 0)) { 76bf215546Sopenharmony_ci /* denorm -- denorm float maps to 0 half */ 77bf215546Sopenharmony_ci /* m = 0; - already set */ 78bf215546Sopenharmony_ci e = 0; 79bf215546Sopenharmony_ci } 80bf215546Sopenharmony_ci else if ((flt_e == 0xff) && (flt_m == 0)) { 81bf215546Sopenharmony_ci /* infinity */ 82bf215546Sopenharmony_ci /* m = 0; - already set */ 83bf215546Sopenharmony_ci e = 31; 84bf215546Sopenharmony_ci } 85bf215546Sopenharmony_ci else if ((flt_e == 0xff) && (flt_m != 0)) { 86bf215546Sopenharmony_ci /* Retain the top bits of a NaN to make sure that the quiet/signaling 87bf215546Sopenharmony_ci * status stays the same. 88bf215546Sopenharmony_ci */ 89bf215546Sopenharmony_ci m = flt_m >> 13; 90bf215546Sopenharmony_ci if (!m) 91bf215546Sopenharmony_ci m = 1; 92bf215546Sopenharmony_ci e = 31; 93bf215546Sopenharmony_ci } 94bf215546Sopenharmony_ci else { 95bf215546Sopenharmony_ci /* regular number */ 96bf215546Sopenharmony_ci const int new_exp = flt_e - 127; 97bf215546Sopenharmony_ci if (new_exp < -14) { 98bf215546Sopenharmony_ci /* The float32 lies in the range (0.0, min_normal16) and is rounded 99bf215546Sopenharmony_ci * to a nearby float16 value. The result will be either zero, subnormal, 100bf215546Sopenharmony_ci * or normal. 101bf215546Sopenharmony_ci */ 102bf215546Sopenharmony_ci e = 0; 103bf215546Sopenharmony_ci m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f)); 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci else if (new_exp > 15) { 106bf215546Sopenharmony_ci /* map this value to infinity */ 107bf215546Sopenharmony_ci /* m = 0; - already set */ 108bf215546Sopenharmony_ci e = 31; 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci else { 111bf215546Sopenharmony_ci /* The float32 lies in the range 112bf215546Sopenharmony_ci * [min_normal16, max_normal16 + max_step16) 113bf215546Sopenharmony_ci * and is rounded to a nearby float16 value. The result will be 114bf215546Sopenharmony_ci * either normal or infinite. 115bf215546Sopenharmony_ci */ 116bf215546Sopenharmony_ci e = new_exp + 15; 117bf215546Sopenharmony_ci m = _mesa_lroundevenf(flt_m / (float) (1 << 13)); 118bf215546Sopenharmony_ci } 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci assert(0 <= m && m <= 1024); 122bf215546Sopenharmony_ci if (m == 1024) { 123bf215546Sopenharmony_ci /* The float32 was rounded upwards into the range of the next exponent, 124bf215546Sopenharmony_ci * so bump the exponent. This correctly handles the case where f32 125bf215546Sopenharmony_ci * should be rounded up to float16 infinity. 126bf215546Sopenharmony_ci */ 127bf215546Sopenharmony_ci ++e; 128bf215546Sopenharmony_ci m = 0; 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci result = (s << 15) | (e << 10) | m; 132bf215546Sopenharmony_ci return result; 133bf215546Sopenharmony_ci} 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ciuint16_t 136bf215546Sopenharmony_ci_mesa_float_to_float16_rtz_slow(float val) 137bf215546Sopenharmony_ci{ 138bf215546Sopenharmony_ci return _mesa_float_to_half_rtz_slow(val); 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci/** 142bf215546Sopenharmony_ci * Convert a 2-byte half float to a 4-byte float. 143bf215546Sopenharmony_ci * Based on code from: 144bf215546Sopenharmony_ci * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html 145bf215546Sopenharmony_ci */ 146bf215546Sopenharmony_cifloat 147bf215546Sopenharmony_ci_mesa_half_to_float_slow(uint16_t val) 148bf215546Sopenharmony_ci{ 149bf215546Sopenharmony_ci union fi infnan; 150bf215546Sopenharmony_ci union fi magic; 151bf215546Sopenharmony_ci union fi f32; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci infnan.ui = 0x8f << 23; 154bf215546Sopenharmony_ci infnan.f = 65536.0f; 155bf215546Sopenharmony_ci magic.ui = 0xef << 23; 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci /* Exponent / Mantissa */ 158bf215546Sopenharmony_ci f32.ui = (val & 0x7fff) << 13; 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci /* Adjust */ 161bf215546Sopenharmony_ci f32.f *= magic.f; 162bf215546Sopenharmony_ci /* XXX: The magic mul relies on denorms being available */ 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci /* Inf / NaN */ 165bf215546Sopenharmony_ci if (f32.f >= infnan.f) 166bf215546Sopenharmony_ci f32.ui |= 0xff << 23; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci /* Sign */ 169bf215546Sopenharmony_ci f32.ui |= (uint32_t)(val & 0x8000) << 16; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci return f32.f; 172bf215546Sopenharmony_ci} 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci/** 175bf215546Sopenharmony_ci * Convert 0.0 to 0x00, 1.0 to 0xff. 176bf215546Sopenharmony_ci * Values outside the range [0.0, 1.0] will give undefined results. 177bf215546Sopenharmony_ci */ 178bf215546Sopenharmony_ciuint8_t _mesa_half_to_unorm8(uint16_t val) 179bf215546Sopenharmony_ci{ 180bf215546Sopenharmony_ci const int m = val & 0x3ff; 181bf215546Sopenharmony_ci const int e = (val >> 10) & 0x1f; 182bf215546Sopenharmony_ci ASSERTED const int s = (val >> 15) & 0x1; 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci /* v = round_to_nearest(1.mmmmmmmmmm * 2^(e-15) * 255) 185bf215546Sopenharmony_ci * = round_to_nearest((1.mmmmmmmmmm * 255) * 2^(e-15)) 186bf215546Sopenharmony_ci * = round_to_nearest((1mmmmmmmmmm * 255) * 2^(e-25)) 187bf215546Sopenharmony_ci * = round_to_zero((1mmmmmmmmmm * 255) * 2^(e-25) + 0.5) 188bf215546Sopenharmony_ci * = round_to_zero(((1mmmmmmmmmm * 255) * 2^(e-24) + 1) / 2) 189bf215546Sopenharmony_ci * 190bf215546Sopenharmony_ci * This happens to give the correct answer for zero/subnormals too 191bf215546Sopenharmony_ci */ 192bf215546Sopenharmony_ci assert(s == 0 && val <= FP16_ONE); /* check 0 <= this <= 1 */ 193bf215546Sopenharmony_ci /* (implies e <= 15, which means the bit-shifts below are safe) */ 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci uint32_t v = ((1 << 10) | m) * 255; 196bf215546Sopenharmony_ci v = ((v >> (24 - e)) + 1) >> 1; 197bf215546Sopenharmony_ci return v; 198bf215546Sopenharmony_ci} 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci/** 201bf215546Sopenharmony_ci * Takes a uint16_t, divides by 65536, converts the infinite-precision 202bf215546Sopenharmony_ci * result to fp16 with round-to-zero. Used by the ASTC decoder. 203bf215546Sopenharmony_ci */ 204bf215546Sopenharmony_ciuint16_t _mesa_uint16_div_64k_to_half(uint16_t v) 205bf215546Sopenharmony_ci{ 206bf215546Sopenharmony_ci /* Zero or subnormal. Set the mantissa to (v << 8) and return. */ 207bf215546Sopenharmony_ci if (v < 4) 208bf215546Sopenharmony_ci return v << 8; 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci /* Count the leading 0s in the uint16_t */ 211bf215546Sopenharmony_ci#ifdef HAVE___BUILTIN_CLZ 212bf215546Sopenharmony_ci int n = __builtin_clz(v) - 16; 213bf215546Sopenharmony_ci#else 214bf215546Sopenharmony_ci int n = 16; 215bf215546Sopenharmony_ci for (int i = 15; i >= 0; i--) { 216bf215546Sopenharmony_ci if (v & (1 << i)) { 217bf215546Sopenharmony_ci n = 15 - i; 218bf215546Sopenharmony_ci break; 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci } 221bf215546Sopenharmony_ci#endif 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci /* Shift the mantissa up so bit 16 is the hidden 1 bit, 224bf215546Sopenharmony_ci * mask it off, then shift back down to 10 bits 225bf215546Sopenharmony_ci */ 226bf215546Sopenharmony_ci int m = ( ((uint32_t)v << (n + 1)) & 0xffff ) >> 6; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci /* (0{n} 1 X{15-n}) * 2^-16 229bf215546Sopenharmony_ci * = 1.X * 2^(15-n-16) 230bf215546Sopenharmony_ci * = 1.X * 2^(14-n - 15) 231bf215546Sopenharmony_ci * which is the FP16 form with e = 14 - n 232bf215546Sopenharmony_ci */ 233bf215546Sopenharmony_ci int e = 14 - n; 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci assert(e >= 1 && e <= 30); 236bf215546Sopenharmony_ci assert(m >= 0 && m < 0x400); 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci return (e << 10) | m; 239bf215546Sopenharmony_ci} 240