1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 3bf215546Sopenharmony_ci * Copyright © 2019 Valve Corporation 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22bf215546Sopenharmony_ci * IN THE SOFTWARE. 23bf215546Sopenharmony_ci * 24bf215546Sopenharmony_ci * Authors: 25bf215546Sopenharmony_ci * Jason Ekstrand (jason@jlekstrand.net) 26bf215546Sopenharmony_ci * Samuel Pitoiset (samuel.pitoiset@gmail.com> 27bf215546Sopenharmony_ci */ 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "nir.h" 30bf215546Sopenharmony_ci#include "nir_builder.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_cistatic nir_ssa_def * 33bf215546Sopenharmony_cilower_frexp_sig(nir_builder *b, nir_ssa_def *x) 34bf215546Sopenharmony_ci{ 35bf215546Sopenharmony_ci nir_ssa_def *abs_x = nir_fabs(b, x); 36bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size); 37bf215546Sopenharmony_ci nir_ssa_def *sign_mantissa_mask, *exponent_value; 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci switch (x->bit_size) { 40bf215546Sopenharmony_ci case 16: 41bf215546Sopenharmony_ci /* Half-precision floating-point values are stored as 42bf215546Sopenharmony_ci * 1 sign bit; 43bf215546Sopenharmony_ci * 5 exponent bits; 44bf215546Sopenharmony_ci * 10 mantissa bits. 45bf215546Sopenharmony_ci * 46bf215546Sopenharmony_ci * An exponent shift of 10 will shift the mantissa out, leaving only the 47bf215546Sopenharmony_ci * exponent and sign bit (which itself may be zero, if the absolute value 48bf215546Sopenharmony_ci * was taken before the bitcast and shift). 49bf215546Sopenharmony_ci */ 50bf215546Sopenharmony_ci sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16); 51bf215546Sopenharmony_ci /* Exponent of floating-point values in the range [0.5, 1.0). */ 52bf215546Sopenharmony_ci exponent_value = nir_imm_intN_t(b, 0x3800u, 16); 53bf215546Sopenharmony_ci break; 54bf215546Sopenharmony_ci case 32: 55bf215546Sopenharmony_ci /* Single-precision floating-point values are stored as 56bf215546Sopenharmony_ci * 1 sign bit; 57bf215546Sopenharmony_ci * 8 exponent bits; 58bf215546Sopenharmony_ci * 23 mantissa bits. 59bf215546Sopenharmony_ci * 60bf215546Sopenharmony_ci * An exponent shift of 23 will shift the mantissa out, leaving only the 61bf215546Sopenharmony_ci * exponent and sign bit (which itself may be zero, if the absolute value 62bf215546Sopenharmony_ci * was taken before the bitcast and shift. 63bf215546Sopenharmony_ci */ 64bf215546Sopenharmony_ci sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); 65bf215546Sopenharmony_ci /* Exponent of floating-point values in the range [0.5, 1.0). */ 66bf215546Sopenharmony_ci exponent_value = nir_imm_int(b, 0x3f000000u); 67bf215546Sopenharmony_ci break; 68bf215546Sopenharmony_ci case 64: 69bf215546Sopenharmony_ci /* Double-precision floating-point values are stored as 70bf215546Sopenharmony_ci * 1 sign bit; 71bf215546Sopenharmony_ci * 11 exponent bits; 72bf215546Sopenharmony_ci * 52 mantissa bits. 73bf215546Sopenharmony_ci * 74bf215546Sopenharmony_ci * An exponent shift of 20 will shift the remaining mantissa bits out, 75bf215546Sopenharmony_ci * leaving only the exponent and sign bit (which itself may be zero, if 76bf215546Sopenharmony_ci * the absolute value was taken before the bitcast and shift. 77bf215546Sopenharmony_ci */ 78bf215546Sopenharmony_ci sign_mantissa_mask = nir_imm_int(b, 0x800fffffu); 79bf215546Sopenharmony_ci /* Exponent of floating-point values in the range [0.5, 1.0). */ 80bf215546Sopenharmony_ci exponent_value = nir_imm_int(b, 0x3fe00000u); 81bf215546Sopenharmony_ci break; 82bf215546Sopenharmony_ci default: 83bf215546Sopenharmony_ci unreachable("Invalid bitsize"); 84bf215546Sopenharmony_ci } 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci if (x->bit_size == 64) { 87bf215546Sopenharmony_ci /* We only need to deal with the exponent so first we extract the upper 88bf215546Sopenharmony_ci * 32 bits using nir_unpack_64_2x32_split_y. 89bf215546Sopenharmony_ci */ 90bf215546Sopenharmony_ci nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x); 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci /* If x is ±0, ±Inf, or NaN, return x unmodified. */ 93bf215546Sopenharmony_ci nir_ssa_def *new_upper = 94bf215546Sopenharmony_ci nir_bcsel(b, 95bf215546Sopenharmony_ci nir_iand(b, 96bf215546Sopenharmony_ci nir_flt(b, zero, abs_x), 97bf215546Sopenharmony_ci nir_fisfinite(b, x)), 98bf215546Sopenharmony_ci nir_ior(b, 99bf215546Sopenharmony_ci nir_iand(b, upper_x, sign_mantissa_mask), 100bf215546Sopenharmony_ci exponent_value), 101bf215546Sopenharmony_ci upper_x); 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x); 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci return nir_pack_64_2x32_split(b, lower_x, new_upper); 106bf215546Sopenharmony_ci } else { 107bf215546Sopenharmony_ci /* If x is ±0, ±Inf, or NaN, return x unmodified. */ 108bf215546Sopenharmony_ci return nir_bcsel(b, 109bf215546Sopenharmony_ci nir_iand(b, 110bf215546Sopenharmony_ci nir_flt(b, zero, abs_x), 111bf215546Sopenharmony_ci nir_fisfinite(b, x)), 112bf215546Sopenharmony_ci nir_ior(b, 113bf215546Sopenharmony_ci nir_iand(b, x, sign_mantissa_mask), 114bf215546Sopenharmony_ci exponent_value), 115bf215546Sopenharmony_ci x); 116bf215546Sopenharmony_ci } 117bf215546Sopenharmony_ci} 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_cistatic nir_ssa_def * 120bf215546Sopenharmony_cilower_frexp_exp(nir_builder *b, nir_ssa_def *x) 121bf215546Sopenharmony_ci{ 122bf215546Sopenharmony_ci nir_ssa_def *abs_x = nir_fabs(b, x); 123bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size); 124bf215546Sopenharmony_ci nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero); 125bf215546Sopenharmony_ci nir_ssa_def *exponent; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci switch (x->bit_size) { 128bf215546Sopenharmony_ci case 16: { 129bf215546Sopenharmony_ci nir_ssa_def *exponent_shift = nir_imm_int(b, 10); 130bf215546Sopenharmony_ci nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci /* Significand return must be of the same type as the input, but the 133bf215546Sopenharmony_ci * exponent must be a 32-bit integer. 134bf215546Sopenharmony_ci */ 135bf215546Sopenharmony_ci exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), 136bf215546Sopenharmony_ci nir_bcsel(b, is_not_zero, exponent_bias, zero))); 137bf215546Sopenharmony_ci break; 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci case 32: { 140bf215546Sopenharmony_ci nir_ssa_def *exponent_shift = nir_imm_int(b, 23); 141bf215546Sopenharmony_ci nir_ssa_def *exponent_bias = nir_imm_int(b, -126); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), 144bf215546Sopenharmony_ci nir_bcsel(b, is_not_zero, exponent_bias, zero)); 145bf215546Sopenharmony_ci break; 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci case 64: { 148bf215546Sopenharmony_ci nir_ssa_def *exponent_shift = nir_imm_int(b, 20); 149bf215546Sopenharmony_ci nir_ssa_def *exponent_bias = nir_imm_int(b, -1022); 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci nir_ssa_def *zero32 = nir_imm_int(b, 0); 152bf215546Sopenharmony_ci nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x); 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift), 155bf215546Sopenharmony_ci nir_bcsel(b, is_not_zero, exponent_bias, zero32)); 156bf215546Sopenharmony_ci break; 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci default: 159bf215546Sopenharmony_ci unreachable("Invalid bitsize"); 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci return exponent; 163bf215546Sopenharmony_ci} 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_cistatic bool 166bf215546Sopenharmony_cilower_frexp_impl(nir_function_impl *impl) 167bf215546Sopenharmony_ci{ 168bf215546Sopenharmony_ci bool progress = false; 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci nir_builder b; 171bf215546Sopenharmony_ci nir_builder_init(&b, impl); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 174bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 175bf215546Sopenharmony_ci if (instr->type != nir_instr_type_alu) 176bf215546Sopenharmony_ci continue; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci nir_alu_instr *alu_instr = nir_instr_as_alu(instr); 179bf215546Sopenharmony_ci nir_ssa_def *lower; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci switch (alu_instr->op) { 184bf215546Sopenharmony_ci case nir_op_frexp_sig: 185bf215546Sopenharmony_ci lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0)); 186bf215546Sopenharmony_ci break; 187bf215546Sopenharmony_ci case nir_op_frexp_exp: 188bf215546Sopenharmony_ci lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0)); 189bf215546Sopenharmony_ci break; 190bf215546Sopenharmony_ci default: 191bf215546Sopenharmony_ci continue; 192bf215546Sopenharmony_ci } 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, 195bf215546Sopenharmony_ci lower); 196bf215546Sopenharmony_ci nir_instr_remove(instr); 197bf215546Sopenharmony_ci progress = true; 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci if (progress) { 202bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | 203bf215546Sopenharmony_ci nir_metadata_dominance); 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci return progress; 207bf215546Sopenharmony_ci} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_cibool 210bf215546Sopenharmony_cinir_lower_frexp(nir_shader *shader) 211bf215546Sopenharmony_ci{ 212bf215546Sopenharmony_ci bool progress = false; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci nir_foreach_function(function, shader) { 215bf215546Sopenharmony_ci if (function->impl) 216bf215546Sopenharmony_ci progress |= lower_frexp_impl(function->impl); 217bf215546Sopenharmony_ci } 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci return progress; 220bf215546Sopenharmony_ci} 221