1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2020 Collabora Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#ifndef NIR_CONVERSION_BUILDER_H 25bf215546Sopenharmony_ci#define NIR_CONVERSION_BUILDER_H 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "util/u_math.h" 28bf215546Sopenharmony_ci#include "nir_builder.h" 29bf215546Sopenharmony_ci#include "nir_builtin_builder.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#ifdef __cplusplus 32bf215546Sopenharmony_ciextern "C" { 33bf215546Sopenharmony_ci#endif 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cistatic inline nir_ssa_def * 36bf215546Sopenharmony_cinir_round_float_to_int(nir_builder *b, nir_ssa_def *src, 37bf215546Sopenharmony_ci nir_rounding_mode round) 38bf215546Sopenharmony_ci{ 39bf215546Sopenharmony_ci switch (round) { 40bf215546Sopenharmony_ci case nir_rounding_mode_ru: 41bf215546Sopenharmony_ci return nir_fceil(b, src); 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci case nir_rounding_mode_rd: 44bf215546Sopenharmony_ci return nir_ffloor(b, src); 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci case nir_rounding_mode_rtne: 47bf215546Sopenharmony_ci return nir_fround_even(b, src); 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci case nir_rounding_mode_undef: 50bf215546Sopenharmony_ci case nir_rounding_mode_rtz: 51bf215546Sopenharmony_ci break; 52bf215546Sopenharmony_ci } 53bf215546Sopenharmony_ci unreachable("unexpected rounding mode"); 54bf215546Sopenharmony_ci} 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_cistatic inline nir_ssa_def * 57bf215546Sopenharmony_cinir_round_float_to_float(nir_builder *b, nir_ssa_def *src, 58bf215546Sopenharmony_ci unsigned dest_bit_size, 59bf215546Sopenharmony_ci nir_rounding_mode round) 60bf215546Sopenharmony_ci{ 61bf215546Sopenharmony_ci unsigned src_bit_size = src->bit_size; 62bf215546Sopenharmony_ci if (dest_bit_size > src_bit_size) 63bf215546Sopenharmony_ci return src; /* No rounding is needed for an up-convert */ 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci nir_op low_conv = nir_type_conversion_op(nir_type_float | src_bit_size, 66bf215546Sopenharmony_ci nir_type_float | dest_bit_size, 67bf215546Sopenharmony_ci nir_rounding_mode_undef); 68bf215546Sopenharmony_ci nir_op high_conv = nir_type_conversion_op(nir_type_float | dest_bit_size, 69bf215546Sopenharmony_ci nir_type_float | src_bit_size, 70bf215546Sopenharmony_ci nir_rounding_mode_undef); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci switch (round) { 73bf215546Sopenharmony_ci case nir_rounding_mode_ru: { 74bf215546Sopenharmony_ci /* If lower-precision conversion results in a lower value, push it 75bf215546Sopenharmony_ci * up one ULP. */ 76bf215546Sopenharmony_ci nir_ssa_def *lower_prec = 77bf215546Sopenharmony_ci nir_build_alu(b, low_conv, src, NULL, NULL, NULL); 78bf215546Sopenharmony_ci nir_ssa_def *roundtrip = 79bf215546Sopenharmony_ci nir_build_alu(b, high_conv, lower_prec, NULL, NULL, NULL); 80bf215546Sopenharmony_ci nir_ssa_def *cmp = nir_flt(b, roundtrip, src); 81bf215546Sopenharmony_ci nir_ssa_def *inf = nir_imm_floatN_t(b, INFINITY, dest_bit_size); 82bf215546Sopenharmony_ci return nir_bcsel(b, cmp, nir_nextafter(b, lower_prec, inf), lower_prec); 83bf215546Sopenharmony_ci } 84bf215546Sopenharmony_ci case nir_rounding_mode_rd: { 85bf215546Sopenharmony_ci /* If lower-precision conversion results in a higher value, push it 86bf215546Sopenharmony_ci * down one ULP. */ 87bf215546Sopenharmony_ci nir_ssa_def *lower_prec = 88bf215546Sopenharmony_ci nir_build_alu(b, low_conv, src, NULL, NULL, NULL); 89bf215546Sopenharmony_ci nir_ssa_def *roundtrip = 90bf215546Sopenharmony_ci nir_build_alu(b, high_conv, lower_prec, NULL, NULL, NULL); 91bf215546Sopenharmony_ci nir_ssa_def *cmp = nir_flt(b, src, roundtrip); 92bf215546Sopenharmony_ci nir_ssa_def *neg_inf = nir_imm_floatN_t(b, -INFINITY, dest_bit_size); 93bf215546Sopenharmony_ci return nir_bcsel(b, cmp, nir_nextafter(b, lower_prec, neg_inf), lower_prec); 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci case nir_rounding_mode_rtz: 96bf215546Sopenharmony_ci return nir_bcsel(b, nir_flt(b, src, nir_imm_zero(b, 1, src->bit_size)), 97bf215546Sopenharmony_ci nir_round_float_to_float(b, src, dest_bit_size, 98bf215546Sopenharmony_ci nir_rounding_mode_ru), 99bf215546Sopenharmony_ci nir_round_float_to_float(b, src, dest_bit_size, 100bf215546Sopenharmony_ci nir_rounding_mode_rd)); 101bf215546Sopenharmony_ci case nir_rounding_mode_rtne: 102bf215546Sopenharmony_ci case nir_rounding_mode_undef: 103bf215546Sopenharmony_ci break; 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci unreachable("unexpected rounding mode"); 106bf215546Sopenharmony_ci} 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_cistatic inline nir_ssa_def * 109bf215546Sopenharmony_cinir_round_int_to_float(nir_builder *b, nir_ssa_def *src, 110bf215546Sopenharmony_ci nir_alu_type src_type, 111bf215546Sopenharmony_ci unsigned dest_bit_size, 112bf215546Sopenharmony_ci nir_rounding_mode round) 113bf215546Sopenharmony_ci{ 114bf215546Sopenharmony_ci /* We only care whether or not its signed */ 115bf215546Sopenharmony_ci src_type = nir_alu_type_get_base_type(src_type); 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci unsigned mantissa_bits; 118bf215546Sopenharmony_ci switch (dest_bit_size) { 119bf215546Sopenharmony_ci case 16: 120bf215546Sopenharmony_ci mantissa_bits = 10; 121bf215546Sopenharmony_ci break; 122bf215546Sopenharmony_ci case 32: 123bf215546Sopenharmony_ci mantissa_bits = 23; 124bf215546Sopenharmony_ci break; 125bf215546Sopenharmony_ci case 64: 126bf215546Sopenharmony_ci mantissa_bits = 52; 127bf215546Sopenharmony_ci break; 128bf215546Sopenharmony_ci default: unreachable("Unsupported bit size"); 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci if (src->bit_size < mantissa_bits) 132bf215546Sopenharmony_ci return src; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci if (src_type == nir_type_int) { 135bf215546Sopenharmony_ci nir_ssa_def *sign = 136bf215546Sopenharmony_ci nir_i2b1(b, nir_ishr(b, src, nir_imm_int(b, src->bit_size - 1))); 137bf215546Sopenharmony_ci nir_ssa_def *abs = nir_iabs(b, src); 138bf215546Sopenharmony_ci nir_ssa_def *positive_rounded = 139bf215546Sopenharmony_ci nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, round); 140bf215546Sopenharmony_ci nir_ssa_def *max_positive = 141bf215546Sopenharmony_ci nir_imm_intN_t(b, (1ull << (src->bit_size - 1)) - 1, src->bit_size); 142bf215546Sopenharmony_ci switch (round) { 143bf215546Sopenharmony_ci case nir_rounding_mode_rtz: 144bf215546Sopenharmony_ci return nir_bcsel(b, sign, nir_ineg(b, positive_rounded), 145bf215546Sopenharmony_ci positive_rounded); 146bf215546Sopenharmony_ci break; 147bf215546Sopenharmony_ci case nir_rounding_mode_ru: 148bf215546Sopenharmony_ci return nir_bcsel(b, sign, 149bf215546Sopenharmony_ci nir_ineg(b, nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, nir_rounding_mode_rd)), 150bf215546Sopenharmony_ci nir_umin(b, positive_rounded, max_positive)); 151bf215546Sopenharmony_ci break; 152bf215546Sopenharmony_ci case nir_rounding_mode_rd: 153bf215546Sopenharmony_ci return nir_bcsel(b, sign, 154bf215546Sopenharmony_ci nir_ineg(b, 155bf215546Sopenharmony_ci nir_umin(b, max_positive, 156bf215546Sopenharmony_ci nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, nir_rounding_mode_ru))), 157bf215546Sopenharmony_ci positive_rounded); 158bf215546Sopenharmony_ci case nir_rounding_mode_rtne: 159bf215546Sopenharmony_ci case nir_rounding_mode_undef: 160bf215546Sopenharmony_ci break; 161bf215546Sopenharmony_ci } 162bf215546Sopenharmony_ci unreachable("unexpected rounding mode"); 163bf215546Sopenharmony_ci } else { 164bf215546Sopenharmony_ci nir_ssa_def *mantissa_bit_size = nir_imm_int(b, mantissa_bits); 165bf215546Sopenharmony_ci nir_ssa_def *msb = nir_imax(b, nir_ufind_msb(b, src), mantissa_bit_size); 166bf215546Sopenharmony_ci nir_ssa_def *bits_to_lose = nir_isub(b, msb, mantissa_bit_size); 167bf215546Sopenharmony_ci nir_ssa_def *one = nir_imm_intN_t(b, 1, src->bit_size); 168bf215546Sopenharmony_ci nir_ssa_def *adjust = nir_ishl(b, one, bits_to_lose); 169bf215546Sopenharmony_ci nir_ssa_def *mask = nir_inot(b, nir_isub(b, adjust, one)); 170bf215546Sopenharmony_ci nir_ssa_def *truncated = nir_iand(b, src, mask); 171bf215546Sopenharmony_ci switch (round) { 172bf215546Sopenharmony_ci case nir_rounding_mode_rtz: 173bf215546Sopenharmony_ci case nir_rounding_mode_rd: 174bf215546Sopenharmony_ci return truncated; 175bf215546Sopenharmony_ci break; 176bf215546Sopenharmony_ci case nir_rounding_mode_ru: 177bf215546Sopenharmony_ci return nir_bcsel(b, nir_ieq(b, src, truncated), 178bf215546Sopenharmony_ci src, nir_uadd_sat(b, truncated, adjust)); 179bf215546Sopenharmony_ci case nir_rounding_mode_rtne: 180bf215546Sopenharmony_ci case nir_rounding_mode_undef: 181bf215546Sopenharmony_ci break; 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci unreachable("unexpected rounding mode"); 184bf215546Sopenharmony_ci } 185bf215546Sopenharmony_ci} 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci/** Returns true if the representable range of a contains the representable 188bf215546Sopenharmony_ci * range of b. 189bf215546Sopenharmony_ci */ 190bf215546Sopenharmony_cistatic inline bool 191bf215546Sopenharmony_cinir_alu_type_range_contains_type_range(nir_alu_type a, nir_alu_type b) 192bf215546Sopenharmony_ci{ 193bf215546Sopenharmony_ci /* Split types from bit sizes */ 194bf215546Sopenharmony_ci nir_alu_type a_base_type = nir_alu_type_get_base_type(a); 195bf215546Sopenharmony_ci nir_alu_type b_base_type = nir_alu_type_get_base_type(b); 196bf215546Sopenharmony_ci unsigned a_bit_size = nir_alu_type_get_type_size(a); 197bf215546Sopenharmony_ci unsigned b_bit_size = nir_alu_type_get_type_size(b); 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci /* This requires sized types */ 200bf215546Sopenharmony_ci assert(a_bit_size > 0 && b_bit_size > 0); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci if (a_base_type == b_base_type && a_bit_size >= b_bit_size) 203bf215546Sopenharmony_ci return true; 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci if (a_base_type == nir_type_int && b_base_type == nir_type_uint && 206bf215546Sopenharmony_ci a_bit_size > b_bit_size) 207bf215546Sopenharmony_ci return true; 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci /* 16-bit floats fit in 32-bit integers */ 210bf215546Sopenharmony_ci if (a_base_type == nir_type_int && a_bit_size >= 32 && 211bf215546Sopenharmony_ci b == nir_type_float16) 212bf215546Sopenharmony_ci return true; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci /* All signed or unsigned ints can fit in float or above. A uint8 can fit 215bf215546Sopenharmony_ci * in a float16. 216bf215546Sopenharmony_ci */ 217bf215546Sopenharmony_ci if (a_base_type == nir_type_float && b_base_type != nir_type_float && 218bf215546Sopenharmony_ci (a_bit_size >= 32 || b_bit_size == 8)) 219bf215546Sopenharmony_ci return true; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci return false; 222bf215546Sopenharmony_ci} 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci/** 225bf215546Sopenharmony_ci * Retrieves limits used for clamping a value of the src type into 226bf215546Sopenharmony_ci * the widest representable range of the dst type via cmp + bcsel 227bf215546Sopenharmony_ci */ 228bf215546Sopenharmony_cistatic inline void 229bf215546Sopenharmony_cinir_get_clamp_limits(nir_builder *b, 230bf215546Sopenharmony_ci nir_alu_type src_type, 231bf215546Sopenharmony_ci nir_alu_type dest_type, 232bf215546Sopenharmony_ci nir_ssa_def **low, nir_ssa_def **high) 233bf215546Sopenharmony_ci{ 234bf215546Sopenharmony_ci /* Split types from bit sizes */ 235bf215546Sopenharmony_ci nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type); 236bf215546Sopenharmony_ci nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type); 237bf215546Sopenharmony_ci unsigned src_bit_size = nir_alu_type_get_type_size(src_type); 238bf215546Sopenharmony_ci unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type); 239bf215546Sopenharmony_ci assert(dest_bit_size != 0 && src_bit_size != 0); 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci *low = NULL; 242bf215546Sopenharmony_ci *high = NULL; 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci /* limits of the destination type, expressed in the source type */ 245bf215546Sopenharmony_ci switch (dest_base_type) { 246bf215546Sopenharmony_ci case nir_type_int: { 247bf215546Sopenharmony_ci int64_t ilow, ihigh; 248bf215546Sopenharmony_ci if (dest_bit_size == 64) { 249bf215546Sopenharmony_ci ilow = INT64_MIN; 250bf215546Sopenharmony_ci ihigh = INT64_MAX; 251bf215546Sopenharmony_ci } else { 252bf215546Sopenharmony_ci ilow = -(1ll << (dest_bit_size - 1)); 253bf215546Sopenharmony_ci ihigh = (1ll << (dest_bit_size - 1)) - 1; 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci if (src_base_type == nir_type_int) { 257bf215546Sopenharmony_ci *low = nir_imm_intN_t(b, ilow, src_bit_size); 258bf215546Sopenharmony_ci *high = nir_imm_intN_t(b, ihigh, src_bit_size); 259bf215546Sopenharmony_ci } else if (src_base_type == nir_type_uint) { 260bf215546Sopenharmony_ci assert(src_bit_size >= dest_bit_size); 261bf215546Sopenharmony_ci *high = nir_imm_intN_t(b, ihigh, src_bit_size); 262bf215546Sopenharmony_ci } else { 263bf215546Sopenharmony_ci *low = nir_imm_floatN_t(b, ilow, src_bit_size); 264bf215546Sopenharmony_ci *high = nir_imm_floatN_t(b, ihigh, src_bit_size); 265bf215546Sopenharmony_ci } 266bf215546Sopenharmony_ci break; 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci case nir_type_uint: { 269bf215546Sopenharmony_ci uint64_t uhigh = dest_bit_size == 64 ? 270bf215546Sopenharmony_ci ~0ull : (1ull << dest_bit_size) - 1; 271bf215546Sopenharmony_ci if (src_base_type != nir_type_float) { 272bf215546Sopenharmony_ci *low = nir_imm_intN_t(b, 0, src_bit_size); 273bf215546Sopenharmony_ci if (src_base_type == nir_type_uint || src_bit_size > dest_bit_size) 274bf215546Sopenharmony_ci *high = nir_imm_intN_t(b, uhigh, src_bit_size); 275bf215546Sopenharmony_ci } else { 276bf215546Sopenharmony_ci *low = nir_imm_floatN_t(b, 0.0f, src_bit_size); 277bf215546Sopenharmony_ci *high = nir_imm_floatN_t(b, uhigh, src_bit_size); 278bf215546Sopenharmony_ci } 279bf215546Sopenharmony_ci break; 280bf215546Sopenharmony_ci } 281bf215546Sopenharmony_ci case nir_type_float: { 282bf215546Sopenharmony_ci double flow, fhigh; 283bf215546Sopenharmony_ci switch (dest_bit_size) { 284bf215546Sopenharmony_ci case 16: 285bf215546Sopenharmony_ci flow = -65504.0f; 286bf215546Sopenharmony_ci fhigh = 65504.0f; 287bf215546Sopenharmony_ci break; 288bf215546Sopenharmony_ci case 32: 289bf215546Sopenharmony_ci flow = -FLT_MAX; 290bf215546Sopenharmony_ci fhigh = FLT_MAX; 291bf215546Sopenharmony_ci break; 292bf215546Sopenharmony_ci case 64: 293bf215546Sopenharmony_ci flow = -DBL_MAX; 294bf215546Sopenharmony_ci fhigh = DBL_MAX; 295bf215546Sopenharmony_ci break; 296bf215546Sopenharmony_ci default: 297bf215546Sopenharmony_ci unreachable("Unhandled bit size"); 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci switch (src_base_type) { 301bf215546Sopenharmony_ci case nir_type_int: { 302bf215546Sopenharmony_ci int64_t src_ilow, src_ihigh; 303bf215546Sopenharmony_ci if (src_bit_size == 64) { 304bf215546Sopenharmony_ci src_ilow = INT64_MIN; 305bf215546Sopenharmony_ci src_ihigh = INT64_MAX; 306bf215546Sopenharmony_ci } else { 307bf215546Sopenharmony_ci src_ilow = -(1ll << (src_bit_size - 1)); 308bf215546Sopenharmony_ci src_ihigh = (1ll << (src_bit_size - 1)) - 1; 309bf215546Sopenharmony_ci } 310bf215546Sopenharmony_ci if (src_ilow < flow) 311bf215546Sopenharmony_ci *low = nir_imm_intN_t(b, flow, src_bit_size); 312bf215546Sopenharmony_ci if (src_ihigh > fhigh) 313bf215546Sopenharmony_ci *high = nir_imm_intN_t(b, fhigh, src_bit_size); 314bf215546Sopenharmony_ci break; 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci case nir_type_uint: { 317bf215546Sopenharmony_ci uint64_t src_uhigh = src_bit_size == 64 ? 318bf215546Sopenharmony_ci ~0ull : (1ull << src_bit_size) - 1; 319bf215546Sopenharmony_ci if (src_uhigh > fhigh) 320bf215546Sopenharmony_ci *high = nir_imm_intN_t(b, fhigh, src_bit_size); 321bf215546Sopenharmony_ci break; 322bf215546Sopenharmony_ci } 323bf215546Sopenharmony_ci case nir_type_float: 324bf215546Sopenharmony_ci *low = nir_imm_floatN_t(b, flow, src_bit_size); 325bf215546Sopenharmony_ci *high = nir_imm_floatN_t(b, fhigh, src_bit_size); 326bf215546Sopenharmony_ci break; 327bf215546Sopenharmony_ci default: 328bf215546Sopenharmony_ci unreachable("Clamping from unknown type"); 329bf215546Sopenharmony_ci } 330bf215546Sopenharmony_ci break; 331bf215546Sopenharmony_ci } 332bf215546Sopenharmony_ci default: 333bf215546Sopenharmony_ci unreachable("clamping to unknown type"); 334bf215546Sopenharmony_ci break; 335bf215546Sopenharmony_ci } 336bf215546Sopenharmony_ci} 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci/** 339bf215546Sopenharmony_ci * Clamp the value into the widest representatble range of the 340bf215546Sopenharmony_ci * destination type with cmp + bcsel. 341bf215546Sopenharmony_ci * 342bf215546Sopenharmony_ci * val/val_type: The variables used for bcsel 343bf215546Sopenharmony_ci * src/src_type: The variables used for comparison 344bf215546Sopenharmony_ci * dest_type: The type which determines the range used for comparison 345bf215546Sopenharmony_ci */ 346bf215546Sopenharmony_cistatic inline nir_ssa_def * 347bf215546Sopenharmony_cinir_clamp_to_type_range(nir_builder *b, 348bf215546Sopenharmony_ci nir_ssa_def *val, nir_alu_type val_type, 349bf215546Sopenharmony_ci nir_ssa_def *src, nir_alu_type src_type, 350bf215546Sopenharmony_ci nir_alu_type dest_type) 351bf215546Sopenharmony_ci{ 352bf215546Sopenharmony_ci assert(nir_alu_type_get_type_size(src_type) == 0 || 353bf215546Sopenharmony_ci nir_alu_type_get_type_size(src_type) == src->bit_size); 354bf215546Sopenharmony_ci src_type |= src->bit_size; 355bf215546Sopenharmony_ci if (nir_alu_type_range_contains_type_range(dest_type, src_type)) 356bf215546Sopenharmony_ci return val; 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci /* limits of the destination type, expressed in the source type */ 359bf215546Sopenharmony_ci nir_ssa_def *low = NULL, *high = NULL; 360bf215546Sopenharmony_ci nir_get_clamp_limits(b, src_type, dest_type, &low, &high); 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci nir_ssa_def *low_cond = NULL, *high_cond = NULL; 363bf215546Sopenharmony_ci switch (nir_alu_type_get_base_type(src_type)) { 364bf215546Sopenharmony_ci case nir_type_int: 365bf215546Sopenharmony_ci low_cond = low ? nir_ilt(b, src, low) : NULL; 366bf215546Sopenharmony_ci high_cond = high ? nir_ilt(b, high, src) : NULL; 367bf215546Sopenharmony_ci break; 368bf215546Sopenharmony_ci case nir_type_uint: 369bf215546Sopenharmony_ci low_cond = low ? nir_ult(b, src, low) : NULL; 370bf215546Sopenharmony_ci high_cond = high ? nir_ult(b, high, src) : NULL; 371bf215546Sopenharmony_ci break; 372bf215546Sopenharmony_ci case nir_type_float: 373bf215546Sopenharmony_ci low_cond = low ? nir_fge(b, low, src) : NULL; 374bf215546Sopenharmony_ci high_cond = high ? nir_fge(b, src, high) : NULL; 375bf215546Sopenharmony_ci break; 376bf215546Sopenharmony_ci default: 377bf215546Sopenharmony_ci unreachable("clamping from unknown type"); 378bf215546Sopenharmony_ci } 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci nir_ssa_def *val_low = low, *val_high = high; 381bf215546Sopenharmony_ci if (val_type != src_type) { 382bf215546Sopenharmony_ci nir_get_clamp_limits(b, val_type, dest_type, &val_low, &val_high); 383bf215546Sopenharmony_ci } 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci nir_ssa_def *res = val; 386bf215546Sopenharmony_ci if (low_cond && val_low) 387bf215546Sopenharmony_ci res = nir_bcsel(b, low_cond, val_low, res); 388bf215546Sopenharmony_ci if (high_cond && val_high) 389bf215546Sopenharmony_ci res = nir_bcsel(b, high_cond, val_high, res); 390bf215546Sopenharmony_ci 391bf215546Sopenharmony_ci return res; 392bf215546Sopenharmony_ci} 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_cistatic inline nir_rounding_mode 395bf215546Sopenharmony_cinir_simplify_conversion_rounding(nir_alu_type src_type, 396bf215546Sopenharmony_ci nir_alu_type dest_type, 397bf215546Sopenharmony_ci nir_rounding_mode rounding) 398bf215546Sopenharmony_ci{ 399bf215546Sopenharmony_ci nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type); 400bf215546Sopenharmony_ci nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type); 401bf215546Sopenharmony_ci unsigned src_bit_size = nir_alu_type_get_type_size(src_type); 402bf215546Sopenharmony_ci unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type); 403bf215546Sopenharmony_ci assert(src_bit_size > 0 && dest_bit_size > 0); 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci if (rounding == nir_rounding_mode_undef) 406bf215546Sopenharmony_ci return rounding; 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci /* Pure integer conversion doesn't have any rounding */ 409bf215546Sopenharmony_ci if (src_base_type != nir_type_float && 410bf215546Sopenharmony_ci dest_base_type != nir_type_float) 411bf215546Sopenharmony_ci return nir_rounding_mode_undef; 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci /* Float down-casts don't round */ 414bf215546Sopenharmony_ci if (src_base_type == nir_type_float && 415bf215546Sopenharmony_ci dest_base_type == nir_type_float && 416bf215546Sopenharmony_ci dest_bit_size >= src_bit_size) 417bf215546Sopenharmony_ci return nir_rounding_mode_undef; 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci /* Regular float to int conversions are RTZ */ 420bf215546Sopenharmony_ci if (src_base_type == nir_type_float && 421bf215546Sopenharmony_ci dest_base_type != nir_type_float && 422bf215546Sopenharmony_ci rounding == nir_rounding_mode_rtz) 423bf215546Sopenharmony_ci return nir_rounding_mode_undef; 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci /* The CL spec requires regular conversions to float to be RTNE */ 426bf215546Sopenharmony_ci if (dest_base_type == nir_type_float && 427bf215546Sopenharmony_ci rounding == nir_rounding_mode_rtne) 428bf215546Sopenharmony_ci return nir_rounding_mode_undef; 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci /* Couldn't simplify */ 431bf215546Sopenharmony_ci return rounding; 432bf215546Sopenharmony_ci} 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_cistatic inline nir_ssa_def * 435bf215546Sopenharmony_cinir_convert_with_rounding(nir_builder *b, 436bf215546Sopenharmony_ci nir_ssa_def *src, nir_alu_type src_type, 437bf215546Sopenharmony_ci nir_alu_type dest_type, 438bf215546Sopenharmony_ci nir_rounding_mode round, 439bf215546Sopenharmony_ci bool clamp) 440bf215546Sopenharmony_ci{ 441bf215546Sopenharmony_ci /* Some stuff wants sized types */ 442bf215546Sopenharmony_ci assert(nir_alu_type_get_type_size(src_type) == 0 || 443bf215546Sopenharmony_ci nir_alu_type_get_type_size(src_type) == src->bit_size); 444bf215546Sopenharmony_ci src_type |= src->bit_size; 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci /* Split types from bit sizes */ 447bf215546Sopenharmony_ci nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type); 448bf215546Sopenharmony_ci nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type); 449bf215546Sopenharmony_ci unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci /* Try to simplify the conversion if we can */ 452bf215546Sopenharmony_ci clamp = clamp && 453bf215546Sopenharmony_ci !nir_alu_type_range_contains_type_range(dest_type, src_type); 454bf215546Sopenharmony_ci round = nir_simplify_conversion_rounding(src_type, dest_type, round); 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci /* For float -> int/uint conversions, we might not be able to represent 457bf215546Sopenharmony_ci * the destination range in the source float accurately. For these cases, 458bf215546Sopenharmony_ci * do the comparison in float range, but the bcsel in the destination range. 459bf215546Sopenharmony_ci */ 460bf215546Sopenharmony_ci bool clamp_after_conversion = clamp && 461bf215546Sopenharmony_ci src_base_type == nir_type_float && 462bf215546Sopenharmony_ci dest_base_type != nir_type_float; 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci /* 465bf215546Sopenharmony_ci * If we don't care about rounding and clamping, we can just use NIR's 466bf215546Sopenharmony_ci * built-in ops. There is also a special case for SPIR-V in shaders, where 467bf215546Sopenharmony_ci * f32/f64 -> f16 conversions can have one of two rounding modes applied, 468bf215546Sopenharmony_ci * which NIR has built-in opcodes for. 469bf215546Sopenharmony_ci * 470bf215546Sopenharmony_ci * For the rest, we have our own implementation of rounding and clamping. 471bf215546Sopenharmony_ci */ 472bf215546Sopenharmony_ci bool trivial_convert; 473bf215546Sopenharmony_ci if (!clamp && round == nir_rounding_mode_undef) { 474bf215546Sopenharmony_ci trivial_convert = true; 475bf215546Sopenharmony_ci } else if (!clamp && src_type == nir_type_float32 && 476bf215546Sopenharmony_ci dest_type == nir_type_float16 && 477bf215546Sopenharmony_ci (round == nir_rounding_mode_rtne || 478bf215546Sopenharmony_ci round == nir_rounding_mode_rtz)) { 479bf215546Sopenharmony_ci trivial_convert = true; 480bf215546Sopenharmony_ci } else { 481bf215546Sopenharmony_ci trivial_convert = false; 482bf215546Sopenharmony_ci } 483bf215546Sopenharmony_ci if (trivial_convert) { 484bf215546Sopenharmony_ci nir_op op = nir_type_conversion_op(src_type, dest_type, round); 485bf215546Sopenharmony_ci return nir_build_alu(b, op, src, NULL, NULL, NULL); 486bf215546Sopenharmony_ci } 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci nir_ssa_def *dest = src; 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci /* clamp the result into range */ 491bf215546Sopenharmony_ci if (clamp && !clamp_after_conversion) 492bf215546Sopenharmony_ci dest = nir_clamp_to_type_range(b, src, src_type, src, src_type, dest_type); 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci /* round with selected rounding mode */ 495bf215546Sopenharmony_ci if (!trivial_convert && round != nir_rounding_mode_undef) { 496bf215546Sopenharmony_ci if (src_base_type == nir_type_float) { 497bf215546Sopenharmony_ci if (dest_base_type == nir_type_float) { 498bf215546Sopenharmony_ci dest = nir_round_float_to_float(b, dest, dest_bit_size, round); 499bf215546Sopenharmony_ci } else { 500bf215546Sopenharmony_ci dest = nir_round_float_to_int(b, dest, round); 501bf215546Sopenharmony_ci } 502bf215546Sopenharmony_ci } else { 503bf215546Sopenharmony_ci dest = nir_round_int_to_float(b, dest, src_type, dest_bit_size, round); 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci round = nir_rounding_mode_undef; 507bf215546Sopenharmony_ci } 508bf215546Sopenharmony_ci 509bf215546Sopenharmony_ci /* now we can convert the value */ 510bf215546Sopenharmony_ci nir_op op = nir_type_conversion_op(src_type, dest_type, round); 511bf215546Sopenharmony_ci dest = nir_build_alu(b, op, dest, NULL, NULL, NULL); 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci if (clamp_after_conversion) 514bf215546Sopenharmony_ci dest = nir_clamp_to_type_range(b, dest, dest_type, src, src_type, dest_type); 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci return dest; 517bf215546Sopenharmony_ci} 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci#ifdef __cplusplus 520bf215546Sopenharmony_ci} 521bf215546Sopenharmony_ci#endif 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci#endif /* NIR_CONVERSION_BUILDER_H */ 524