1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci#include <math.h> 24bf215546Sopenharmony_ci#include <float.h> 25bf215546Sopenharmony_ci#include "nir.h" 26bf215546Sopenharmony_ci#include "nir_range_analysis.h" 27bf215546Sopenharmony_ci#include "util/hash_table.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci/** 30bf215546Sopenharmony_ci * Analyzes a sequence of operations to determine some aspects of the range of 31bf215546Sopenharmony_ci * the result. 32bf215546Sopenharmony_ci */ 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_cistatic bool 35bf215546Sopenharmony_ciis_not_negative(enum ssa_ranges r) 36bf215546Sopenharmony_ci{ 37bf215546Sopenharmony_ci return r == gt_zero || r == ge_zero || r == eq_zero; 38bf215546Sopenharmony_ci} 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistatic bool 41bf215546Sopenharmony_ciis_not_zero(enum ssa_ranges r) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci return r == gt_zero || r == lt_zero || r == ne_zero; 44bf215546Sopenharmony_ci} 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_cistatic void * 47bf215546Sopenharmony_cipack_data(const struct ssa_result_range r) 48bf215546Sopenharmony_ci{ 49bf215546Sopenharmony_ci return (void *)(uintptr_t)(r.range | r.is_integral << 8 | r.is_finite << 9 | 50bf215546Sopenharmony_ci r.is_a_number << 10); 51bf215546Sopenharmony_ci} 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_cistatic struct ssa_result_range 54bf215546Sopenharmony_ciunpack_data(const void *p) 55bf215546Sopenharmony_ci{ 56bf215546Sopenharmony_ci const uintptr_t v = (uintptr_t) p; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci return (struct ssa_result_range){ 59bf215546Sopenharmony_ci .range = v & 0xff, 60bf215546Sopenharmony_ci .is_integral = (v & 0x00100) != 0, 61bf215546Sopenharmony_ci .is_finite = (v & 0x00200) != 0, 62bf215546Sopenharmony_ci .is_a_number = (v & 0x00400) != 0 63bf215546Sopenharmony_ci }; 64bf215546Sopenharmony_ci} 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_cistatic void * 67bf215546Sopenharmony_cipack_key(const struct nir_alu_instr *instr, nir_alu_type type) 68bf215546Sopenharmony_ci{ 69bf215546Sopenharmony_ci uintptr_t type_encoding; 70bf215546Sopenharmony_ci uintptr_t ptr = (uintptr_t) instr; 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci /* The low 2 bits have to be zero or this whole scheme falls apart. */ 73bf215546Sopenharmony_ci assert((ptr & 0x3) == 0); 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci /* NIR is typeless in the sense that sequences of bits have whatever 76bf215546Sopenharmony_ci * meaning is attached to them by the instruction that consumes them. 77bf215546Sopenharmony_ci * However, the number of bits must match between producer and consumer. 78bf215546Sopenharmony_ci * As a result, the number of bits does not need to be encoded here. 79bf215546Sopenharmony_ci */ 80bf215546Sopenharmony_ci switch (nir_alu_type_get_base_type(type)) { 81bf215546Sopenharmony_ci case nir_type_int: type_encoding = 0; break; 82bf215546Sopenharmony_ci case nir_type_uint: type_encoding = 1; break; 83bf215546Sopenharmony_ci case nir_type_bool: type_encoding = 2; break; 84bf215546Sopenharmony_ci case nir_type_float: type_encoding = 3; break; 85bf215546Sopenharmony_ci default: unreachable("Invalid base type."); 86bf215546Sopenharmony_ci } 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci return (void *)(ptr | type_encoding); 89bf215546Sopenharmony_ci} 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_cistatic nir_alu_type 92bf215546Sopenharmony_cinir_alu_src_type(const nir_alu_instr *instr, unsigned src) 93bf215546Sopenharmony_ci{ 94bf215546Sopenharmony_ci return nir_alu_type_get_base_type(nir_op_infos[instr->op].input_types[src]) | 95bf215546Sopenharmony_ci nir_src_bit_size(instr->src[src].src); 96bf215546Sopenharmony_ci} 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_cistatic struct ssa_result_range 99bf215546Sopenharmony_cianalyze_constant(const struct nir_alu_instr *instr, unsigned src, 100bf215546Sopenharmony_ci nir_alu_type use_type) 101bf215546Sopenharmony_ci{ 102bf215546Sopenharmony_ci uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = { 0, 1, 2, 3, 103bf215546Sopenharmony_ci 4, 5, 6, 7, 104bf215546Sopenharmony_ci 8, 9, 10, 11, 105bf215546Sopenharmony_ci 12, 13, 14, 15 }; 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_ci /* If the source is an explicitly sized source, then we need to reset 108bf215546Sopenharmony_ci * both the number of components and the swizzle. 109bf215546Sopenharmony_ci */ 110bf215546Sopenharmony_ci const unsigned num_components = nir_ssa_alu_instr_src_components(instr, src); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; ++i) 113bf215546Sopenharmony_ci swizzle[i] = instr->src[src].swizzle[i]; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci const nir_load_const_instr *const load = 116bf215546Sopenharmony_ci nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci struct ssa_result_range r = { unknown, false, false, false }; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci switch (nir_alu_type_get_base_type(use_type)) { 121bf215546Sopenharmony_ci case nir_type_float: { 122bf215546Sopenharmony_ci double min_value = DBL_MAX; 123bf215546Sopenharmony_ci double max_value = -DBL_MAX; 124bf215546Sopenharmony_ci bool any_zero = false; 125bf215546Sopenharmony_ci bool all_zero = true; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci r.is_integral = true; 128bf215546Sopenharmony_ci r.is_a_number = true; 129bf215546Sopenharmony_ci r.is_finite = true; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; ++i) { 132bf215546Sopenharmony_ci const double v = nir_const_value_as_float(load->value[swizzle[i]], 133bf215546Sopenharmony_ci load->def.bit_size); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci if (floor(v) != v) 136bf215546Sopenharmony_ci r.is_integral = false; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci if (isnan(v)) 139bf215546Sopenharmony_ci r.is_a_number = false; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci if (!isfinite(v)) 142bf215546Sopenharmony_ci r.is_finite = false; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci any_zero = any_zero || (v == 0.0); 145bf215546Sopenharmony_ci all_zero = all_zero && (v == 0.0); 146bf215546Sopenharmony_ci min_value = MIN2(min_value, v); 147bf215546Sopenharmony_ci max_value = MAX2(max_value, v); 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci assert(any_zero >= all_zero); 151bf215546Sopenharmony_ci assert(isnan(max_value) || max_value >= min_value); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci if (all_zero) 154bf215546Sopenharmony_ci r.range = eq_zero; 155bf215546Sopenharmony_ci else if (min_value > 0.0) 156bf215546Sopenharmony_ci r.range = gt_zero; 157bf215546Sopenharmony_ci else if (min_value == 0.0) 158bf215546Sopenharmony_ci r.range = ge_zero; 159bf215546Sopenharmony_ci else if (max_value < 0.0) 160bf215546Sopenharmony_ci r.range = lt_zero; 161bf215546Sopenharmony_ci else if (max_value == 0.0) 162bf215546Sopenharmony_ci r.range = le_zero; 163bf215546Sopenharmony_ci else if (!any_zero) 164bf215546Sopenharmony_ci r.range = ne_zero; 165bf215546Sopenharmony_ci else 166bf215546Sopenharmony_ci r.range = unknown; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci return r; 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci case nir_type_int: 172bf215546Sopenharmony_ci case nir_type_bool: { 173bf215546Sopenharmony_ci int64_t min_value = INT_MAX; 174bf215546Sopenharmony_ci int64_t max_value = INT_MIN; 175bf215546Sopenharmony_ci bool any_zero = false; 176bf215546Sopenharmony_ci bool all_zero = true; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; ++i) { 179bf215546Sopenharmony_ci const int64_t v = nir_const_value_as_int(load->value[swizzle[i]], 180bf215546Sopenharmony_ci load->def.bit_size); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci any_zero = any_zero || (v == 0); 183bf215546Sopenharmony_ci all_zero = all_zero && (v == 0); 184bf215546Sopenharmony_ci min_value = MIN2(min_value, v); 185bf215546Sopenharmony_ci max_value = MAX2(max_value, v); 186bf215546Sopenharmony_ci } 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci assert(any_zero >= all_zero); 189bf215546Sopenharmony_ci assert(max_value >= min_value); 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci if (all_zero) 192bf215546Sopenharmony_ci r.range = eq_zero; 193bf215546Sopenharmony_ci else if (min_value > 0) 194bf215546Sopenharmony_ci r.range = gt_zero; 195bf215546Sopenharmony_ci else if (min_value == 0) 196bf215546Sopenharmony_ci r.range = ge_zero; 197bf215546Sopenharmony_ci else if (max_value < 0) 198bf215546Sopenharmony_ci r.range = lt_zero; 199bf215546Sopenharmony_ci else if (max_value == 0) 200bf215546Sopenharmony_ci r.range = le_zero; 201bf215546Sopenharmony_ci else if (!any_zero) 202bf215546Sopenharmony_ci r.range = ne_zero; 203bf215546Sopenharmony_ci else 204bf215546Sopenharmony_ci r.range = unknown; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci return r; 207bf215546Sopenharmony_ci } 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci case nir_type_uint: { 210bf215546Sopenharmony_ci bool any_zero = false; 211bf215546Sopenharmony_ci bool all_zero = true; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; ++i) { 214bf215546Sopenharmony_ci const uint64_t v = nir_const_value_as_uint(load->value[swizzle[i]], 215bf215546Sopenharmony_ci load->def.bit_size); 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci any_zero = any_zero || (v == 0); 218bf215546Sopenharmony_ci all_zero = all_zero && (v == 0); 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci assert(any_zero >= all_zero); 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci if (all_zero) 224bf215546Sopenharmony_ci r.range = eq_zero; 225bf215546Sopenharmony_ci else if (any_zero) 226bf215546Sopenharmony_ci r.range = ge_zero; 227bf215546Sopenharmony_ci else 228bf215546Sopenharmony_ci r.range = gt_zero; 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci return r; 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci default: 234bf215546Sopenharmony_ci unreachable("Invalid alu source type"); 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci} 237bf215546Sopenharmony_ci 238bf215546Sopenharmony_ci/** 239bf215546Sopenharmony_ci * Short-hand name for use in the tables in analyze_expression. If this name 240bf215546Sopenharmony_ci * becomes a problem on some compiler, we can change it to _. 241bf215546Sopenharmony_ci */ 242bf215546Sopenharmony_ci#define _______ unknown 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci#if defined(__clang__) 246bf215546Sopenharmony_ci /* clang wants _Pragma("unroll X") */ 247bf215546Sopenharmony_ci #define pragma_unroll_5 _Pragma("unroll 5") 248bf215546Sopenharmony_ci #define pragma_unroll_7 _Pragma("unroll 7") 249bf215546Sopenharmony_ci/* gcc wants _Pragma("GCC unroll X") */ 250bf215546Sopenharmony_ci#elif defined(__GNUC__) 251bf215546Sopenharmony_ci #if __GNUC__ >= 8 252bf215546Sopenharmony_ci #define pragma_unroll_5 _Pragma("GCC unroll 5") 253bf215546Sopenharmony_ci #define pragma_unroll_7 _Pragma("GCC unroll 7") 254bf215546Sopenharmony_ci #else 255bf215546Sopenharmony_ci #pragma GCC optimize ("unroll-loops") 256bf215546Sopenharmony_ci #define pragma_unroll_5 257bf215546Sopenharmony_ci #define pragma_unroll_7 258bf215546Sopenharmony_ci #endif 259bf215546Sopenharmony_ci#else 260bf215546Sopenharmony_ci /* MSVC doesn't have C99's _Pragma() */ 261bf215546Sopenharmony_ci #define pragma_unroll_5 262bf215546Sopenharmony_ci #define pragma_unroll_7 263bf215546Sopenharmony_ci#endif 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci#ifndef NDEBUG 267bf215546Sopenharmony_ci#define ASSERT_TABLE_IS_COMMUTATIVE(t) \ 268bf215546Sopenharmony_ci do { \ 269bf215546Sopenharmony_ci static bool first = true; \ 270bf215546Sopenharmony_ci if (first) { \ 271bf215546Sopenharmony_ci first = false; \ 272bf215546Sopenharmony_ci pragma_unroll_7 \ 273bf215546Sopenharmony_ci for (unsigned r = 0; r < ARRAY_SIZE(t); r++) { \ 274bf215546Sopenharmony_ci pragma_unroll_7 \ 275bf215546Sopenharmony_ci for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++) \ 276bf215546Sopenharmony_ci assert(t[r][c] == t[c][r]); \ 277bf215546Sopenharmony_ci } \ 278bf215546Sopenharmony_ci } \ 279bf215546Sopenharmony_ci } while (false) 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci#define ASSERT_TABLE_IS_DIAGONAL(t) \ 282bf215546Sopenharmony_ci do { \ 283bf215546Sopenharmony_ci static bool first = true; \ 284bf215546Sopenharmony_ci if (first) { \ 285bf215546Sopenharmony_ci first = false; \ 286bf215546Sopenharmony_ci pragma_unroll_7 \ 287bf215546Sopenharmony_ci for (unsigned r = 0; r < ARRAY_SIZE(t); r++) \ 288bf215546Sopenharmony_ci assert(t[r][r] == r); \ 289bf215546Sopenharmony_ci } \ 290bf215546Sopenharmony_ci } while (false) 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci#else 293bf215546Sopenharmony_ci#define ASSERT_TABLE_IS_COMMUTATIVE(t) 294bf215546Sopenharmony_ci#define ASSERT_TABLE_IS_DIAGONAL(t) 295bf215546Sopenharmony_ci#endif /* !defined(NDEBUG) */ 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_cistatic enum ssa_ranges 298bf215546Sopenharmony_ciunion_ranges(enum ssa_ranges a, enum ssa_ranges b) 299bf215546Sopenharmony_ci{ 300bf215546Sopenharmony_ci static const enum ssa_ranges union_table[last_range + 1][last_range + 1] = { 301bf215546Sopenharmony_ci /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ 302bf215546Sopenharmony_ci /* unknown */ { _______, _______, _______, _______, _______, _______, _______ }, 303bf215546Sopenharmony_ci /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero }, 304bf215546Sopenharmony_ci /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero }, 305bf215546Sopenharmony_ci /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero }, 306bf215546Sopenharmony_ci /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero }, 307bf215546Sopenharmony_ci /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ }, 308bf215546Sopenharmony_ci /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero }, 309bf215546Sopenharmony_ci }; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci ASSERT_TABLE_IS_COMMUTATIVE(union_table); 312bf215546Sopenharmony_ci ASSERT_TABLE_IS_DIAGONAL(union_table); 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci return union_table[a][b]; 315bf215546Sopenharmony_ci} 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci#ifndef NDEBUG 318bf215546Sopenharmony_ci/* Verify that the 'unknown' entry in each row (or column) of the table is the 319bf215546Sopenharmony_ci * union of all the other values in the row (or column). 320bf215546Sopenharmony_ci */ 321bf215546Sopenharmony_ci#define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) \ 322bf215546Sopenharmony_ci do { \ 323bf215546Sopenharmony_ci static bool first = true; \ 324bf215546Sopenharmony_ci if (first) { \ 325bf215546Sopenharmony_ci first = false; \ 326bf215546Sopenharmony_ci pragma_unroll_7 \ 327bf215546Sopenharmony_ci for (unsigned i = 0; i < last_range; i++) { \ 328bf215546Sopenharmony_ci enum ssa_ranges col_range = t[i][unknown + 1]; \ 329bf215546Sopenharmony_ci enum ssa_ranges row_range = t[unknown + 1][i]; \ 330bf215546Sopenharmony_ci \ 331bf215546Sopenharmony_ci pragma_unroll_5 \ 332bf215546Sopenharmony_ci for (unsigned j = unknown + 2; j < last_range; j++) { \ 333bf215546Sopenharmony_ci col_range = union_ranges(col_range, t[i][j]); \ 334bf215546Sopenharmony_ci row_range = union_ranges(row_range, t[j][i]); \ 335bf215546Sopenharmony_ci } \ 336bf215546Sopenharmony_ci \ 337bf215546Sopenharmony_ci assert(col_range == t[i][unknown]); \ 338bf215546Sopenharmony_ci assert(row_range == t[unknown][i]); \ 339bf215546Sopenharmony_ci } \ 340bf215546Sopenharmony_ci } \ 341bf215546Sopenharmony_ci } while (false) 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci/* For most operations, the union of ranges for a strict inequality and 344bf215546Sopenharmony_ci * equality should be the range of the non-strict inequality (e.g., 345bf215546Sopenharmony_ci * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)). 346bf215546Sopenharmony_ci * 347bf215546Sopenharmony_ci * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.). 348bf215546Sopenharmony_ci */ 349bf215546Sopenharmony_ci#define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \ 350bf215546Sopenharmony_ci do { \ 351bf215546Sopenharmony_ci assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]); \ 352bf215546Sopenharmony_ci assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]); \ 353bf215546Sopenharmony_ci } while (false) 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci#define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \ 356bf215546Sopenharmony_ci do { \ 357bf215546Sopenharmony_ci static bool first = true; \ 358bf215546Sopenharmony_ci if (first) { \ 359bf215546Sopenharmony_ci first = false; \ 360bf215546Sopenharmony_ci pragma_unroll_7 \ 361bf215546Sopenharmony_ci for (unsigned i = 0; i < last_range; i++) { \ 362bf215546Sopenharmony_ci assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \ 363bf215546Sopenharmony_ci assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \ 364bf215546Sopenharmony_ci assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \ 365bf215546Sopenharmony_ci assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \ 366bf215546Sopenharmony_ci } \ 367bf215546Sopenharmony_ci } \ 368bf215546Sopenharmony_ci } while (false) 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci/* Several other unordered tuples span the range of "everything." Each should 371bf215546Sopenharmony_ci * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and 372bf215546Sopenharmony_ci * (eq_zero, ne_zero). union_ranges is already commutative, so only one 373bf215546Sopenharmony_ci * ordering needs to be checked. 374bf215546Sopenharmony_ci * 375bf215546Sopenharmony_ci * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.). 376bf215546Sopenharmony_ci * 377bf215546Sopenharmony_ci * In cases where this can be used, it is unnecessary to also use 378bf215546Sopenharmony_ci * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE. For any range X, 379bf215546Sopenharmony_ci * union_ranges(X, X) == X. The disjoint ranges cover all of the non-unknown 380bf215546Sopenharmony_ci * possibilities, so the union of all the unions of disjoint ranges is 381bf215546Sopenharmony_ci * equivalent to the union of "others." 382bf215546Sopenharmony_ci */ 383bf215546Sopenharmony_ci#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) \ 384bf215546Sopenharmony_ci do { \ 385bf215546Sopenharmony_ci assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]); \ 386bf215546Sopenharmony_ci assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]); \ 387bf215546Sopenharmony_ci assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]); \ 388bf215546Sopenharmony_ci } while (false) 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) \ 391bf215546Sopenharmony_ci do { \ 392bf215546Sopenharmony_ci static bool first = true; \ 393bf215546Sopenharmony_ci if (first) { \ 394bf215546Sopenharmony_ci first = false; \ 395bf215546Sopenharmony_ci pragma_unroll_7 \ 396bf215546Sopenharmony_ci for (unsigned i = 0; i < last_range; i++) { \ 397bf215546Sopenharmony_ci assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) == \ 398bf215546Sopenharmony_ci t[i][unknown]); \ 399bf215546Sopenharmony_ci assert(union_ranges(t[i][le_zero], t[i][gt_zero]) == \ 400bf215546Sopenharmony_ci t[i][unknown]); \ 401bf215546Sopenharmony_ci assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) == \ 402bf215546Sopenharmony_ci t[i][unknown]); \ 403bf215546Sopenharmony_ci \ 404bf215546Sopenharmony_ci assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) == \ 405bf215546Sopenharmony_ci t[unknown][i]); \ 406bf215546Sopenharmony_ci assert(union_ranges(t[le_zero][i], t[gt_zero][i]) == \ 407bf215546Sopenharmony_ci t[unknown][i]); \ 408bf215546Sopenharmony_ci assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) == \ 409bf215546Sopenharmony_ci t[unknown][i]); \ 410bf215546Sopenharmony_ci } \ 411bf215546Sopenharmony_ci } \ 412bf215546Sopenharmony_ci } while (false) 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci#else 415bf215546Sopenharmony_ci#define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t) 416bf215546Sopenharmony_ci#define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) 417bf215546Sopenharmony_ci#define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) 418bf215546Sopenharmony_ci#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t) 419bf215546Sopenharmony_ci#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t) 420bf215546Sopenharmony_ci#endif /* !defined(NDEBUG) */ 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci/** 423bf215546Sopenharmony_ci * Analyze an expression to determine the range of its result 424bf215546Sopenharmony_ci * 425bf215546Sopenharmony_ci * The end result of this analysis is a token that communicates something 426bf215546Sopenharmony_ci * about the range of values. There's an implicit grammar that produces 427bf215546Sopenharmony_ci * tokens from sequences of literal values, other tokens, and operations. 428bf215546Sopenharmony_ci * This function implements this grammar as a recursive-descent parser. Some 429bf215546Sopenharmony_ci * (but not all) of the grammar is listed in-line in the function. 430bf215546Sopenharmony_ci */ 431bf215546Sopenharmony_cistatic struct ssa_result_range 432bf215546Sopenharmony_cianalyze_expression(const nir_alu_instr *instr, unsigned src, 433bf215546Sopenharmony_ci struct hash_table *ht, nir_alu_type use_type) 434bf215546Sopenharmony_ci{ 435bf215546Sopenharmony_ci /* Ensure that the _Pragma("GCC unroll 7") above are correct. */ 436bf215546Sopenharmony_ci STATIC_ASSERT(last_range + 1 == 7); 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci if (!instr->src[src].src.is_ssa) 439bf215546Sopenharmony_ci return (struct ssa_result_range){unknown, false, false, false}; 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci if (nir_src_is_const(instr->src[src].src)) 442bf215546Sopenharmony_ci return analyze_constant(instr, src, use_type); 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) 445bf215546Sopenharmony_ci return (struct ssa_result_range){unknown, false, false, false}; 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci const struct nir_alu_instr *const alu = 448bf215546Sopenharmony_ci nir_instr_as_alu(instr->src[src].src.ssa->parent_instr); 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci /* Bail if the type of the instruction generating the value does not match 451bf215546Sopenharmony_ci * the type the value will be interpreted as. int/uint/bool can be 452bf215546Sopenharmony_ci * reinterpreted trivially. The most important cases are between float and 453bf215546Sopenharmony_ci * non-float. 454bf215546Sopenharmony_ci */ 455bf215546Sopenharmony_ci if (alu->op != nir_op_mov && alu->op != nir_op_bcsel) { 456bf215546Sopenharmony_ci const nir_alu_type use_base_type = 457bf215546Sopenharmony_ci nir_alu_type_get_base_type(use_type); 458bf215546Sopenharmony_ci const nir_alu_type src_base_type = 459bf215546Sopenharmony_ci nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci if (use_base_type != src_base_type && 462bf215546Sopenharmony_ci (use_base_type == nir_type_float || 463bf215546Sopenharmony_ci src_base_type == nir_type_float)) { 464bf215546Sopenharmony_ci return (struct ssa_result_range){unknown, false, false, false}; 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci } 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci struct hash_entry *he = _mesa_hash_table_search(ht, pack_key(alu, use_type)); 469bf215546Sopenharmony_ci if (he != NULL) 470bf215546Sopenharmony_ci return unpack_data(he->data); 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci struct ssa_result_range r = {unknown, false, false, false}; 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci /* ge_zero: ge_zero + ge_zero 475bf215546Sopenharmony_ci * 476bf215546Sopenharmony_ci * gt_zero: gt_zero + eq_zero 477bf215546Sopenharmony_ci * | gt_zero + ge_zero 478bf215546Sopenharmony_ci * | eq_zero + gt_zero # Addition is commutative 479bf215546Sopenharmony_ci * | ge_zero + gt_zero # Addition is commutative 480bf215546Sopenharmony_ci * | gt_zero + gt_zero 481bf215546Sopenharmony_ci * ; 482bf215546Sopenharmony_ci * 483bf215546Sopenharmony_ci * le_zero: le_zero + le_zero 484bf215546Sopenharmony_ci * 485bf215546Sopenharmony_ci * lt_zero: lt_zero + eq_zero 486bf215546Sopenharmony_ci * | lt_zero + le_zero 487bf215546Sopenharmony_ci * | eq_zero + lt_zero # Addition is commutative 488bf215546Sopenharmony_ci * | le_zero + lt_zero # Addition is commutative 489bf215546Sopenharmony_ci * | lt_zero + lt_zero 490bf215546Sopenharmony_ci * ; 491bf215546Sopenharmony_ci * 492bf215546Sopenharmony_ci * ne_zero: eq_zero + ne_zero 493bf215546Sopenharmony_ci * | ne_zero + eq_zero # Addition is commutative 494bf215546Sopenharmony_ci * ; 495bf215546Sopenharmony_ci * 496bf215546Sopenharmony_ci * eq_zero: eq_zero + eq_zero 497bf215546Sopenharmony_ci * ; 498bf215546Sopenharmony_ci * 499bf215546Sopenharmony_ci * All other cases are 'unknown'. The seeming odd entry is (ne_zero, 500bf215546Sopenharmony_ci * ne_zero), but that could be (-5, +5) which is not ne_zero. 501bf215546Sopenharmony_ci */ 502bf215546Sopenharmony_ci static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = { 503bf215546Sopenharmony_ci /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ 504bf215546Sopenharmony_ci /* unknown */ { _______, _______, _______, _______, _______, _______, _______ }, 505bf215546Sopenharmony_ci /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero }, 506bf215546Sopenharmony_ci /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero }, 507bf215546Sopenharmony_ci /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero }, 508bf215546Sopenharmony_ci /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero }, 509bf215546Sopenharmony_ci /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero }, 510bf215546Sopenharmony_ci /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero }, 511bf215546Sopenharmony_ci }; 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci ASSERT_TABLE_IS_COMMUTATIVE(fadd_table); 514bf215546Sopenharmony_ci ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table); 515bf215546Sopenharmony_ci ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table); 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci /* Due to flush-to-zero semanatics of floating-point numbers with very 518bf215546Sopenharmony_ci * small mangnitudes, we can never really be sure a result will be 519bf215546Sopenharmony_ci * non-zero. 520bf215546Sopenharmony_ci * 521bf215546Sopenharmony_ci * ge_zero: ge_zero * ge_zero 522bf215546Sopenharmony_ci * | ge_zero * gt_zero 523bf215546Sopenharmony_ci * | ge_zero * eq_zero 524bf215546Sopenharmony_ci * | le_zero * lt_zero 525bf215546Sopenharmony_ci * | lt_zero * le_zero # Multiplication is commutative 526bf215546Sopenharmony_ci * | le_zero * le_zero 527bf215546Sopenharmony_ci * | gt_zero * ge_zero # Multiplication is commutative 528bf215546Sopenharmony_ci * | eq_zero * ge_zero # Multiplication is commutative 529bf215546Sopenharmony_ci * | a * a # Left source == right source 530bf215546Sopenharmony_ci * | gt_zero * gt_zero 531bf215546Sopenharmony_ci * | lt_zero * lt_zero 532bf215546Sopenharmony_ci * ; 533bf215546Sopenharmony_ci * 534bf215546Sopenharmony_ci * le_zero: ge_zero * le_zero 535bf215546Sopenharmony_ci * | ge_zero * lt_zero 536bf215546Sopenharmony_ci * | lt_zero * ge_zero # Multiplication is commutative 537bf215546Sopenharmony_ci * | le_zero * ge_zero # Multiplication is commutative 538bf215546Sopenharmony_ci * | le_zero * gt_zero 539bf215546Sopenharmony_ci * | lt_zero * gt_zero 540bf215546Sopenharmony_ci * | gt_zero * lt_zero # Multiplication is commutative 541bf215546Sopenharmony_ci * ; 542bf215546Sopenharmony_ci * 543bf215546Sopenharmony_ci * eq_zero: eq_zero * <any> 544bf215546Sopenharmony_ci * <any> * eq_zero # Multiplication is commutative 545bf215546Sopenharmony_ci * 546bf215546Sopenharmony_ci * All other cases are 'unknown'. 547bf215546Sopenharmony_ci */ 548bf215546Sopenharmony_ci static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = { 549bf215546Sopenharmony_ci /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ 550bf215546Sopenharmony_ci /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero }, 551bf215546Sopenharmony_ci /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero }, 552bf215546Sopenharmony_ci /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero }, 553bf215546Sopenharmony_ci /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero }, 554bf215546Sopenharmony_ci /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero }, 555bf215546Sopenharmony_ci /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero }, 556bf215546Sopenharmony_ci /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero } 557bf215546Sopenharmony_ci }; 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci ASSERT_TABLE_IS_COMMUTATIVE(fmul_table); 560bf215546Sopenharmony_ci ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table); 561bf215546Sopenharmony_ci ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci static const enum ssa_ranges fneg_table[last_range + 1] = { 564bf215546Sopenharmony_ci /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ 565bf215546Sopenharmony_ci _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero 566bf215546Sopenharmony_ci }; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table); 569bf215546Sopenharmony_ci ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table); 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci switch (alu->op) { 573bf215546Sopenharmony_ci case nir_op_b2f32: 574bf215546Sopenharmony_ci case nir_op_b2i32: 575bf215546Sopenharmony_ci /* b2f32 will generate either 0.0 or 1.0. This case is trivial. 576bf215546Sopenharmony_ci * 577bf215546Sopenharmony_ci * b2i32 will generate either 0x00000000 or 0x00000001. When those bit 578bf215546Sopenharmony_ci * patterns are interpreted as floating point, they are 0.0 and 579bf215546Sopenharmony_ci * 1.401298464324817e-45. The latter is subnormal, but it is finite and 580bf215546Sopenharmony_ci * a number. 581bf215546Sopenharmony_ci */ 582bf215546Sopenharmony_ci r = (struct ssa_result_range){ge_zero, alu->op == nir_op_b2f32, true, true}; 583bf215546Sopenharmony_ci break; 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci case nir_op_bcsel: { 586bf215546Sopenharmony_ci const struct ssa_result_range left = 587bf215546Sopenharmony_ci analyze_expression(alu, 1, ht, use_type); 588bf215546Sopenharmony_ci const struct ssa_result_range right = 589bf215546Sopenharmony_ci analyze_expression(alu, 2, ht, use_type); 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci r.is_integral = left.is_integral && right.is_integral; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci /* This could be better, but it would require a lot of work. For 594bf215546Sopenharmony_ci * example, the result of the following is a number: 595bf215546Sopenharmony_ci * 596bf215546Sopenharmony_ci * bcsel(a > 0.0, a, 38.6) 597bf215546Sopenharmony_ci * 598bf215546Sopenharmony_ci * If the result of 'a > 0.0' is true, then the use of 'a' in the true 599bf215546Sopenharmony_ci * part of the bcsel must be a number. 600bf215546Sopenharmony_ci * 601bf215546Sopenharmony_ci * Other cases are even more challenging. 602bf215546Sopenharmony_ci * 603bf215546Sopenharmony_ci * bcsel(a > 0.5, a - 0.5, 0.0) 604bf215546Sopenharmony_ci */ 605bf215546Sopenharmony_ci r.is_a_number = left.is_a_number && right.is_a_number; 606bf215546Sopenharmony_ci r.is_finite = left.is_finite && right.is_finite; 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci r.range = union_ranges(left.range, right.range); 609bf215546Sopenharmony_ci break; 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci case nir_op_i2f32: 613bf215546Sopenharmony_ci case nir_op_u2f32: 614bf215546Sopenharmony_ci r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci r.is_integral = true; 617bf215546Sopenharmony_ci r.is_a_number = true; 618bf215546Sopenharmony_ci r.is_finite = true; 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci if (r.range == unknown && alu->op == nir_op_u2f32) 621bf215546Sopenharmony_ci r.range = ge_zero; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci break; 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci case nir_op_fabs: 626bf215546Sopenharmony_ci r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci switch (r.range) { 629bf215546Sopenharmony_ci case unknown: 630bf215546Sopenharmony_ci case le_zero: 631bf215546Sopenharmony_ci case ge_zero: 632bf215546Sopenharmony_ci r.range = ge_zero; 633bf215546Sopenharmony_ci break; 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci case lt_zero: 636bf215546Sopenharmony_ci case gt_zero: 637bf215546Sopenharmony_ci case ne_zero: 638bf215546Sopenharmony_ci r.range = gt_zero; 639bf215546Sopenharmony_ci break; 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci case eq_zero: 642bf215546Sopenharmony_ci break; 643bf215546Sopenharmony_ci } 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci break; 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci case nir_op_fadd: { 648bf215546Sopenharmony_ci const struct ssa_result_range left = 649bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 650bf215546Sopenharmony_ci const struct ssa_result_range right = 651bf215546Sopenharmony_ci analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci r.is_integral = left.is_integral && right.is_integral; 654bf215546Sopenharmony_ci r.range = fadd_table[left.range][right.range]; 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci /* X + Y is NaN if either operand is NaN or if one operand is +Inf and 657bf215546Sopenharmony_ci * the other is -Inf. If neither operand is NaN and at least one of the 658bf215546Sopenharmony_ci * operands is finite, then the result cannot be NaN. 659bf215546Sopenharmony_ci */ 660bf215546Sopenharmony_ci r.is_a_number = left.is_a_number && right.is_a_number && 661bf215546Sopenharmony_ci (left.is_finite || right.is_finite); 662bf215546Sopenharmony_ci break; 663bf215546Sopenharmony_ci } 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci case nir_op_fexp2: { 666bf215546Sopenharmony_ci /* If the parameter might be less than zero, the mathematically result 667bf215546Sopenharmony_ci * will be on (0, 1). For sufficiently large magnitude negative 668bf215546Sopenharmony_ci * parameters, the result will flush to zero. 669bf215546Sopenharmony_ci */ 670bf215546Sopenharmony_ci static const enum ssa_ranges table[last_range + 1] = { 671bf215546Sopenharmony_ci /* unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ 672bf215546Sopenharmony_ci ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero 673bf215546Sopenharmony_ci }; 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table); 678bf215546Sopenharmony_ci ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table); 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci r.is_integral = r.is_integral && is_not_negative(r.range); 681bf215546Sopenharmony_ci r.range = table[r.range]; 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci /* Various cases can result in NaN, so assume the worst. */ 684bf215546Sopenharmony_ci r.is_finite = false; 685bf215546Sopenharmony_ci r.is_a_number = false; 686bf215546Sopenharmony_ci break; 687bf215546Sopenharmony_ci } 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_ci case nir_op_fmax: { 690bf215546Sopenharmony_ci const struct ssa_result_range left = 691bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 692bf215546Sopenharmony_ci const struct ssa_result_range right = 693bf215546Sopenharmony_ci analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_ci r.is_integral = left.is_integral && right.is_integral; 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_ci /* This is conservative. It may be possible to determine that the 698bf215546Sopenharmony_ci * result must be finite in more cases, but it would take some effort to 699bf215546Sopenharmony_ci * work out all the corners. For example, fmax({lt_zero, finite}, 700bf215546Sopenharmony_ci * {lt_zero}) should result in {lt_zero, finite}. 701bf215546Sopenharmony_ci */ 702bf215546Sopenharmony_ci r.is_finite = left.is_finite && right.is_finite; 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci /* If one source is NaN, fmax always picks the other source. */ 705bf215546Sopenharmony_ci r.is_a_number = left.is_a_number || right.is_a_number; 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci /* gt_zero: fmax(gt_zero, *) 708bf215546Sopenharmony_ci * | fmax(*, gt_zero) # Treat fmax as commutative 709bf215546Sopenharmony_ci * ; 710bf215546Sopenharmony_ci * 711bf215546Sopenharmony_ci * ge_zero: fmax(ge_zero, ne_zero) 712bf215546Sopenharmony_ci * | fmax(ge_zero, lt_zero) 713bf215546Sopenharmony_ci * | fmax(ge_zero, le_zero) 714bf215546Sopenharmony_ci * | fmax(ge_zero, eq_zero) 715bf215546Sopenharmony_ci * | fmax(ne_zero, ge_zero) # Treat fmax as commutative 716bf215546Sopenharmony_ci * | fmax(lt_zero, ge_zero) # Treat fmax as commutative 717bf215546Sopenharmony_ci * | fmax(le_zero, ge_zero) # Treat fmax as commutative 718bf215546Sopenharmony_ci * | fmax(eq_zero, ge_zero) # Treat fmax as commutative 719bf215546Sopenharmony_ci * | fmax(ge_zero, ge_zero) 720bf215546Sopenharmony_ci * ; 721bf215546Sopenharmony_ci * 722bf215546Sopenharmony_ci * le_zero: fmax(le_zero, lt_zero) 723bf215546Sopenharmony_ci * | fmax(lt_zero, le_zero) # Treat fmax as commutative 724bf215546Sopenharmony_ci * | fmax(le_zero, le_zero) 725bf215546Sopenharmony_ci * ; 726bf215546Sopenharmony_ci * 727bf215546Sopenharmony_ci * lt_zero: fmax(lt_zero, lt_zero) 728bf215546Sopenharmony_ci * ; 729bf215546Sopenharmony_ci * 730bf215546Sopenharmony_ci * ne_zero: fmax(ne_zero, lt_zero) 731bf215546Sopenharmony_ci * | fmax(lt_zero, ne_zero) # Treat fmax as commutative 732bf215546Sopenharmony_ci * | fmax(ne_zero, ne_zero) 733bf215546Sopenharmony_ci * ; 734bf215546Sopenharmony_ci * 735bf215546Sopenharmony_ci * eq_zero: fmax(eq_zero, le_zero) 736bf215546Sopenharmony_ci * | fmax(eq_zero, lt_zero) 737bf215546Sopenharmony_ci * | fmax(le_zero, eq_zero) # Treat fmax as commutative 738bf215546Sopenharmony_ci * | fmax(lt_zero, eq_zero) # Treat fmax as commutative 739bf215546Sopenharmony_ci * | fmax(eq_zero, eq_zero) 740bf215546Sopenharmony_ci * ; 741bf215546Sopenharmony_ci * 742bf215546Sopenharmony_ci * All other cases are 'unknown'. 743bf215546Sopenharmony_ci */ 744bf215546Sopenharmony_ci static const enum ssa_ranges table[last_range + 1][last_range + 1] = { 745bf215546Sopenharmony_ci /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ 746bf215546Sopenharmony_ci /* unknown */ { _______, _______, _______, gt_zero, ge_zero, _______, _______ }, 747bf215546Sopenharmony_ci /* lt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero }, 748bf215546Sopenharmony_ci /* le_zero */ { _______, le_zero, le_zero, gt_zero, ge_zero, _______, eq_zero }, 749bf215546Sopenharmony_ci /* gt_zero */ { gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero }, 750bf215546Sopenharmony_ci /* ge_zero */ { ge_zero, ge_zero, ge_zero, gt_zero, ge_zero, ge_zero, ge_zero }, 751bf215546Sopenharmony_ci /* ne_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, _______ }, 752bf215546Sopenharmony_ci /* eq_zero */ { _______, eq_zero, eq_zero, gt_zero, ge_zero, _______, eq_zero } 753bf215546Sopenharmony_ci }; 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_ci /* Treat fmax as commutative. */ 756bf215546Sopenharmony_ci ASSERT_TABLE_IS_COMMUTATIVE(table); 757bf215546Sopenharmony_ci ASSERT_TABLE_IS_DIAGONAL(table); 758bf215546Sopenharmony_ci ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table); 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci r.range = table[left.range][right.range]; 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_ci /* Recall that when either value is NaN, fmax will pick the other value. 763bf215546Sopenharmony_ci * This means the result range of the fmax will either be the "ideal" 764bf215546Sopenharmony_ci * result range (calculated above) or the range of the non-NaN value. 765bf215546Sopenharmony_ci */ 766bf215546Sopenharmony_ci if (!left.is_a_number) 767bf215546Sopenharmony_ci r.range = union_ranges(r.range, right.range); 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci if (!right.is_a_number) 770bf215546Sopenharmony_ci r.range = union_ranges(r.range, left.range); 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci break; 773bf215546Sopenharmony_ci } 774bf215546Sopenharmony_ci 775bf215546Sopenharmony_ci case nir_op_fmin: { 776bf215546Sopenharmony_ci const struct ssa_result_range left = 777bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 778bf215546Sopenharmony_ci const struct ssa_result_range right = 779bf215546Sopenharmony_ci analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); 780bf215546Sopenharmony_ci 781bf215546Sopenharmony_ci r.is_integral = left.is_integral && right.is_integral; 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci /* This is conservative. It may be possible to determine that the 784bf215546Sopenharmony_ci * result must be finite in more cases, but it would take some effort to 785bf215546Sopenharmony_ci * work out all the corners. For example, fmin({gt_zero, finite}, 786bf215546Sopenharmony_ci * {gt_zero}) should result in {gt_zero, finite}. 787bf215546Sopenharmony_ci */ 788bf215546Sopenharmony_ci r.is_finite = left.is_finite && right.is_finite; 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_ci /* If one source is NaN, fmin always picks the other source. */ 791bf215546Sopenharmony_ci r.is_a_number = left.is_a_number || right.is_a_number; 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci /* lt_zero: fmin(lt_zero, *) 794bf215546Sopenharmony_ci * | fmin(*, lt_zero) # Treat fmin as commutative 795bf215546Sopenharmony_ci * ; 796bf215546Sopenharmony_ci * 797bf215546Sopenharmony_ci * le_zero: fmin(le_zero, ne_zero) 798bf215546Sopenharmony_ci * | fmin(le_zero, gt_zero) 799bf215546Sopenharmony_ci * | fmin(le_zero, ge_zero) 800bf215546Sopenharmony_ci * | fmin(le_zero, eq_zero) 801bf215546Sopenharmony_ci * | fmin(ne_zero, le_zero) # Treat fmin as commutative 802bf215546Sopenharmony_ci * | fmin(gt_zero, le_zero) # Treat fmin as commutative 803bf215546Sopenharmony_ci * | fmin(ge_zero, le_zero) # Treat fmin as commutative 804bf215546Sopenharmony_ci * | fmin(eq_zero, le_zero) # Treat fmin as commutative 805bf215546Sopenharmony_ci * | fmin(le_zero, le_zero) 806bf215546Sopenharmony_ci * ; 807bf215546Sopenharmony_ci * 808bf215546Sopenharmony_ci * ge_zero: fmin(ge_zero, gt_zero) 809bf215546Sopenharmony_ci * | fmin(gt_zero, ge_zero) # Treat fmin as commutative 810bf215546Sopenharmony_ci * | fmin(ge_zero, ge_zero) 811bf215546Sopenharmony_ci * ; 812bf215546Sopenharmony_ci * 813bf215546Sopenharmony_ci * gt_zero: fmin(gt_zero, gt_zero) 814bf215546Sopenharmony_ci * ; 815bf215546Sopenharmony_ci * 816bf215546Sopenharmony_ci * ne_zero: fmin(ne_zero, gt_zero) 817bf215546Sopenharmony_ci * | fmin(gt_zero, ne_zero) # Treat fmin as commutative 818bf215546Sopenharmony_ci * | fmin(ne_zero, ne_zero) 819bf215546Sopenharmony_ci * ; 820bf215546Sopenharmony_ci * 821bf215546Sopenharmony_ci * eq_zero: fmin(eq_zero, ge_zero) 822bf215546Sopenharmony_ci * | fmin(eq_zero, gt_zero) 823bf215546Sopenharmony_ci * | fmin(ge_zero, eq_zero) # Treat fmin as commutative 824bf215546Sopenharmony_ci * | fmin(gt_zero, eq_zero) # Treat fmin as commutative 825bf215546Sopenharmony_ci * | fmin(eq_zero, eq_zero) 826bf215546Sopenharmony_ci * ; 827bf215546Sopenharmony_ci * 828bf215546Sopenharmony_ci * All other cases are 'unknown'. 829bf215546Sopenharmony_ci */ 830bf215546Sopenharmony_ci static const enum ssa_ranges table[last_range + 1][last_range + 1] = { 831bf215546Sopenharmony_ci /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ 832bf215546Sopenharmony_ci /* unknown */ { _______, lt_zero, le_zero, _______, _______, _______, _______ }, 833bf215546Sopenharmony_ci /* lt_zero */ { lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero }, 834bf215546Sopenharmony_ci /* le_zero */ { le_zero, lt_zero, le_zero, le_zero, le_zero, le_zero, le_zero }, 835bf215546Sopenharmony_ci /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero }, 836bf215546Sopenharmony_ci /* ge_zero */ { _______, lt_zero, le_zero, ge_zero, ge_zero, _______, eq_zero }, 837bf215546Sopenharmony_ci /* ne_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, _______ }, 838bf215546Sopenharmony_ci /* eq_zero */ { _______, lt_zero, le_zero, eq_zero, eq_zero, _______, eq_zero } 839bf215546Sopenharmony_ci }; 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci /* Treat fmin as commutative. */ 842bf215546Sopenharmony_ci ASSERT_TABLE_IS_COMMUTATIVE(table); 843bf215546Sopenharmony_ci ASSERT_TABLE_IS_DIAGONAL(table); 844bf215546Sopenharmony_ci ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table); 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_ci r.range = table[left.range][right.range]; 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci /* Recall that when either value is NaN, fmin will pick the other value. 849bf215546Sopenharmony_ci * This means the result range of the fmin will either be the "ideal" 850bf215546Sopenharmony_ci * result range (calculated above) or the range of the non-NaN value. 851bf215546Sopenharmony_ci */ 852bf215546Sopenharmony_ci if (!left.is_a_number) 853bf215546Sopenharmony_ci r.range = union_ranges(r.range, right.range); 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_ci if (!right.is_a_number) 856bf215546Sopenharmony_ci r.range = union_ranges(r.range, left.range); 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci break; 859bf215546Sopenharmony_ci } 860bf215546Sopenharmony_ci 861bf215546Sopenharmony_ci case nir_op_fmul: 862bf215546Sopenharmony_ci case nir_op_fmulz: { 863bf215546Sopenharmony_ci const struct ssa_result_range left = 864bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 865bf215546Sopenharmony_ci const struct ssa_result_range right = 866bf215546Sopenharmony_ci analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_ci r.is_integral = left.is_integral && right.is_integral; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_ci /* x * x => ge_zero */ 871bf215546Sopenharmony_ci if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) { 872bf215546Sopenharmony_ci /* Even if x > 0, the result of x*x can be zero when x is, for 873bf215546Sopenharmony_ci * example, a subnormal number. 874bf215546Sopenharmony_ci */ 875bf215546Sopenharmony_ci r.range = ge_zero; 876bf215546Sopenharmony_ci } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) { 877bf215546Sopenharmony_ci /* -x * x => le_zero. */ 878bf215546Sopenharmony_ci r.range = le_zero; 879bf215546Sopenharmony_ci } else 880bf215546Sopenharmony_ci r.range = fmul_table[left.range][right.range]; 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci if (alu->op == nir_op_fmul) { 883bf215546Sopenharmony_ci /* Mulitpliation produces NaN for X * NaN and for 0 * ±Inf. If both 884bf215546Sopenharmony_ci * operands are numbers and either both are finite or one is finite and 885bf215546Sopenharmony_ci * the other cannot be zero, then the result must be a number. 886bf215546Sopenharmony_ci */ 887bf215546Sopenharmony_ci r.is_a_number = (left.is_a_number && right.is_a_number) && 888bf215546Sopenharmony_ci ((left.is_finite && right.is_finite) || 889bf215546Sopenharmony_ci (!is_not_zero(left.range) && right.is_finite) || 890bf215546Sopenharmony_ci (left.is_finite && !is_not_zero(right.range))); 891bf215546Sopenharmony_ci } else { 892bf215546Sopenharmony_ci /* nir_op_fmulz: unlike nir_op_fmul, 0 * ±Inf is a number. */ 893bf215546Sopenharmony_ci r.is_a_number = left.is_a_number && right.is_a_number; 894bf215546Sopenharmony_ci } 895bf215546Sopenharmony_ci 896bf215546Sopenharmony_ci break; 897bf215546Sopenharmony_ci } 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci case nir_op_frcp: 900bf215546Sopenharmony_ci r = (struct ssa_result_range){ 901bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range, 902bf215546Sopenharmony_ci false, 903bf215546Sopenharmony_ci false, /* Various cases can result in NaN, so assume the worst. */ 904bf215546Sopenharmony_ci false /* " " " " " " " " " " */ 905bf215546Sopenharmony_ci }; 906bf215546Sopenharmony_ci break; 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci case nir_op_mov: 909bf215546Sopenharmony_ci r = analyze_expression(alu, 0, ht, use_type); 910bf215546Sopenharmony_ci break; 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci case nir_op_fneg: 913bf215546Sopenharmony_ci r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_ci r.range = fneg_table[r.range]; 916bf215546Sopenharmony_ci break; 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci case nir_op_fsat: { 919bf215546Sopenharmony_ci const struct ssa_result_range left = 920bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci /* fsat(NaN) = 0. */ 923bf215546Sopenharmony_ci r.is_a_number = true; 924bf215546Sopenharmony_ci r.is_finite = true; 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci switch (left.range) { 927bf215546Sopenharmony_ci case le_zero: 928bf215546Sopenharmony_ci case lt_zero: 929bf215546Sopenharmony_ci case eq_zero: 930bf215546Sopenharmony_ci r.range = eq_zero; 931bf215546Sopenharmony_ci r.is_integral = true; 932bf215546Sopenharmony_ci break; 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci case gt_zero: 935bf215546Sopenharmony_ci /* fsat is equivalent to fmin(fmax(X, 0.0), 1.0), so if X is not a 936bf215546Sopenharmony_ci * number, the result will be 0. 937bf215546Sopenharmony_ci */ 938bf215546Sopenharmony_ci r.range = left.is_a_number ? gt_zero : ge_zero; 939bf215546Sopenharmony_ci r.is_integral = left.is_integral; 940bf215546Sopenharmony_ci break; 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci case ge_zero: 943bf215546Sopenharmony_ci case ne_zero: 944bf215546Sopenharmony_ci case unknown: 945bf215546Sopenharmony_ci /* Since the result must be in [0, 1], the value must be >= 0. */ 946bf215546Sopenharmony_ci r.range = ge_zero; 947bf215546Sopenharmony_ci r.is_integral = left.is_integral; 948bf215546Sopenharmony_ci break; 949bf215546Sopenharmony_ci } 950bf215546Sopenharmony_ci break; 951bf215546Sopenharmony_ci } 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci case nir_op_fsign: 954bf215546Sopenharmony_ci r = (struct ssa_result_range){ 955bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range, 956bf215546Sopenharmony_ci true, 957bf215546Sopenharmony_ci true, /* fsign is -1, 0, or 1, even for NaN, so it must be a number. */ 958bf215546Sopenharmony_ci true /* fsign is -1, 0, or 1, even for NaN, so it must be finite. */ 959bf215546Sopenharmony_ci }; 960bf215546Sopenharmony_ci break; 961bf215546Sopenharmony_ci 962bf215546Sopenharmony_ci case nir_op_fsqrt: 963bf215546Sopenharmony_ci case nir_op_frsq: 964bf215546Sopenharmony_ci r = (struct ssa_result_range){ge_zero, false, false, false}; 965bf215546Sopenharmony_ci break; 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci case nir_op_ffloor: { 968bf215546Sopenharmony_ci const struct ssa_result_range left = 969bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_ci r.is_integral = true; 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci /* In IEEE 754, floor(NaN) is NaN, and floor(±Inf) is ±Inf. See 974bf215546Sopenharmony_ci * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/floor.html 975bf215546Sopenharmony_ci */ 976bf215546Sopenharmony_ci r.is_a_number = left.is_a_number; 977bf215546Sopenharmony_ci r.is_finite = left.is_finite; 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci if (left.is_integral || left.range == le_zero || left.range == lt_zero) 980bf215546Sopenharmony_ci r.range = left.range; 981bf215546Sopenharmony_ci else if (left.range == ge_zero || left.range == gt_zero) 982bf215546Sopenharmony_ci r.range = ge_zero; 983bf215546Sopenharmony_ci else if (left.range == ne_zero) 984bf215546Sopenharmony_ci r.range = unknown; 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci break; 987bf215546Sopenharmony_ci } 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci case nir_op_fceil: { 990bf215546Sopenharmony_ci const struct ssa_result_range left = 991bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 992bf215546Sopenharmony_ci 993bf215546Sopenharmony_ci r.is_integral = true; 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_ci /* In IEEE 754, ceil(NaN) is NaN, and ceil(±Inf) is ±Inf. See 996bf215546Sopenharmony_ci * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/ceil.html 997bf215546Sopenharmony_ci */ 998bf215546Sopenharmony_ci r.is_a_number = left.is_a_number; 999bf215546Sopenharmony_ci r.is_finite = left.is_finite; 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci if (left.is_integral || left.range == ge_zero || left.range == gt_zero) 1002bf215546Sopenharmony_ci r.range = left.range; 1003bf215546Sopenharmony_ci else if (left.range == le_zero || left.range == lt_zero) 1004bf215546Sopenharmony_ci r.range = le_zero; 1005bf215546Sopenharmony_ci else if (left.range == ne_zero) 1006bf215546Sopenharmony_ci r.range = unknown; 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci break; 1009bf215546Sopenharmony_ci } 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_ci case nir_op_ftrunc: { 1012bf215546Sopenharmony_ci const struct ssa_result_range left = 1013bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci r.is_integral = true; 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci /* In IEEE 754, trunc(NaN) is NaN, and trunc(±Inf) is ±Inf. See 1018bf215546Sopenharmony_ci * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/trunc.html 1019bf215546Sopenharmony_ci */ 1020bf215546Sopenharmony_ci r.is_a_number = left.is_a_number; 1021bf215546Sopenharmony_ci r.is_finite = left.is_finite; 1022bf215546Sopenharmony_ci 1023bf215546Sopenharmony_ci if (left.is_integral) 1024bf215546Sopenharmony_ci r.range = left.range; 1025bf215546Sopenharmony_ci else if (left.range == ge_zero || left.range == gt_zero) 1026bf215546Sopenharmony_ci r.range = ge_zero; 1027bf215546Sopenharmony_ci else if (left.range == le_zero || left.range == lt_zero) 1028bf215546Sopenharmony_ci r.range = le_zero; 1029bf215546Sopenharmony_ci else if (left.range == ne_zero) 1030bf215546Sopenharmony_ci r.range = unknown; 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci break; 1033bf215546Sopenharmony_ci } 1034bf215546Sopenharmony_ci 1035bf215546Sopenharmony_ci case nir_op_flt: 1036bf215546Sopenharmony_ci case nir_op_fge: 1037bf215546Sopenharmony_ci case nir_op_feq: 1038bf215546Sopenharmony_ci case nir_op_fneu: 1039bf215546Sopenharmony_ci case nir_op_ilt: 1040bf215546Sopenharmony_ci case nir_op_ige: 1041bf215546Sopenharmony_ci case nir_op_ieq: 1042bf215546Sopenharmony_ci case nir_op_ine: 1043bf215546Sopenharmony_ci case nir_op_ult: 1044bf215546Sopenharmony_ci case nir_op_uge: 1045bf215546Sopenharmony_ci /* Boolean results are 0 or -1. */ 1046bf215546Sopenharmony_ci r = (struct ssa_result_range){le_zero, false, true, false}; 1047bf215546Sopenharmony_ci break; 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_ci case nir_op_fdot2: 1050bf215546Sopenharmony_ci case nir_op_fdot3: 1051bf215546Sopenharmony_ci case nir_op_fdot4: 1052bf215546Sopenharmony_ci case nir_op_fdot8: 1053bf215546Sopenharmony_ci case nir_op_fdot16: 1054bf215546Sopenharmony_ci case nir_op_fdot2_replicated: 1055bf215546Sopenharmony_ci case nir_op_fdot3_replicated: 1056bf215546Sopenharmony_ci case nir_op_fdot4_replicated: 1057bf215546Sopenharmony_ci case nir_op_fdot8_replicated: 1058bf215546Sopenharmony_ci case nir_op_fdot16_replicated: { 1059bf215546Sopenharmony_ci const struct ssa_result_range left = 1060bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci /* If the two sources are the same SSA value, then the result is either 1063bf215546Sopenharmony_ci * NaN or some number >= 0. If one source is the negation of the other, 1064bf215546Sopenharmony_ci * the result is either NaN or some number <= 0. 1065bf215546Sopenharmony_ci * 1066bf215546Sopenharmony_ci * In either of these two cases, if one source is a number, then the 1067bf215546Sopenharmony_ci * other must also be a number. Since it should not be possible to get 1068bf215546Sopenharmony_ci * Inf-Inf in the dot-product, the result must also be a number. 1069bf215546Sopenharmony_ci */ 1070bf215546Sopenharmony_ci if (nir_alu_srcs_equal(alu, alu, 0, 1)) { 1071bf215546Sopenharmony_ci r = (struct ssa_result_range){ge_zero, false, left.is_a_number, false }; 1072bf215546Sopenharmony_ci } else if (nir_alu_srcs_negative_equal(alu, alu, 0, 1)) { 1073bf215546Sopenharmony_ci r = (struct ssa_result_range){le_zero, false, left.is_a_number, false }; 1074bf215546Sopenharmony_ci } else { 1075bf215546Sopenharmony_ci r = (struct ssa_result_range){unknown, false, false, false}; 1076bf215546Sopenharmony_ci } 1077bf215546Sopenharmony_ci break; 1078bf215546Sopenharmony_ci } 1079bf215546Sopenharmony_ci 1080bf215546Sopenharmony_ci case nir_op_fpow: { 1081bf215546Sopenharmony_ci /* Due to flush-to-zero semanatics of floating-point numbers with very 1082bf215546Sopenharmony_ci * small mangnitudes, we can never really be sure a result will be 1083bf215546Sopenharmony_ci * non-zero. 1084bf215546Sopenharmony_ci * 1085bf215546Sopenharmony_ci * NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man 1086bf215546Sopenharmony_ci * page for that function says: 1087bf215546Sopenharmony_ci * 1088bf215546Sopenharmony_ci * If y is 0, the result is 1.0 (even if x is a NaN). 1089bf215546Sopenharmony_ci * 1090bf215546Sopenharmony_ci * gt_zero: pow(*, eq_zero) 1091bf215546Sopenharmony_ci * | pow(eq_zero, lt_zero) # 0^-y = +inf 1092bf215546Sopenharmony_ci * | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0 1093bf215546Sopenharmony_ci * ; 1094bf215546Sopenharmony_ci * 1095bf215546Sopenharmony_ci * eq_zero: pow(eq_zero, gt_zero) 1096bf215546Sopenharmony_ci * ; 1097bf215546Sopenharmony_ci * 1098bf215546Sopenharmony_ci * ge_zero: pow(gt_zero, gt_zero) 1099bf215546Sopenharmony_ci * | pow(gt_zero, ge_zero) 1100bf215546Sopenharmony_ci * | pow(gt_zero, lt_zero) 1101bf215546Sopenharmony_ci * | pow(gt_zero, le_zero) 1102bf215546Sopenharmony_ci * | pow(gt_zero, ne_zero) 1103bf215546Sopenharmony_ci * | pow(gt_zero, unknown) 1104bf215546Sopenharmony_ci * | pow(ge_zero, gt_zero) 1105bf215546Sopenharmony_ci * | pow(ge_zero, ge_zero) 1106bf215546Sopenharmony_ci * | pow(ge_zero, lt_zero) 1107bf215546Sopenharmony_ci * | pow(ge_zero, le_zero) 1108bf215546Sopenharmony_ci * | pow(ge_zero, ne_zero) 1109bf215546Sopenharmony_ci * | pow(ge_zero, unknown) 1110bf215546Sopenharmony_ci * | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0 1111bf215546Sopenharmony_ci * | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0 1112bf215546Sopenharmony_ci * | pow(eq_zero, unknown) # union of all other y cases 1113bf215546Sopenharmony_ci * ; 1114bf215546Sopenharmony_ci * 1115bf215546Sopenharmony_ci * All other cases are unknown. 1116bf215546Sopenharmony_ci * 1117bf215546Sopenharmony_ci * We could do better if the right operand is a constant, integral 1118bf215546Sopenharmony_ci * value. 1119bf215546Sopenharmony_ci */ 1120bf215546Sopenharmony_ci static const enum ssa_ranges table[last_range + 1][last_range + 1] = { 1121bf215546Sopenharmony_ci /* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */ 1122bf215546Sopenharmony_ci /* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero }, 1123bf215546Sopenharmony_ci /* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero }, 1124bf215546Sopenharmony_ci /* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero }, 1125bf215546Sopenharmony_ci /* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero }, 1126bf215546Sopenharmony_ci /* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero }, 1127bf215546Sopenharmony_ci /* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero }, 1128bf215546Sopenharmony_ci /* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero }, 1129bf215546Sopenharmony_ci }; 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci const struct ssa_result_range left = 1132bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 1133bf215546Sopenharmony_ci const struct ssa_result_range right = 1134bf215546Sopenharmony_ci analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table); 1137bf215546Sopenharmony_ci ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table); 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci r.is_integral = left.is_integral && right.is_integral && 1140bf215546Sopenharmony_ci is_not_negative(right.range); 1141bf215546Sopenharmony_ci r.range = table[left.range][right.range]; 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci /* Various cases can result in NaN, so assume the worst. */ 1144bf215546Sopenharmony_ci r.is_a_number = false; 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci break; 1147bf215546Sopenharmony_ci } 1148bf215546Sopenharmony_ci 1149bf215546Sopenharmony_ci case nir_op_ffma: { 1150bf215546Sopenharmony_ci const struct ssa_result_range first = 1151bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 1152bf215546Sopenharmony_ci const struct ssa_result_range second = 1153bf215546Sopenharmony_ci analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); 1154bf215546Sopenharmony_ci const struct ssa_result_range third = 1155bf215546Sopenharmony_ci analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2)); 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci r.is_integral = first.is_integral && second.is_integral && 1158bf215546Sopenharmony_ci third.is_integral; 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_ci /* Various cases can result in NaN, so assume the worst. */ 1161bf215546Sopenharmony_ci r.is_a_number = false; 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci enum ssa_ranges fmul_range; 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_ci if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) { 1166bf215546Sopenharmony_ci /* See handling of nir_op_fmul for explanation of why ge_zero is the 1167bf215546Sopenharmony_ci * range. 1168bf215546Sopenharmony_ci */ 1169bf215546Sopenharmony_ci fmul_range = ge_zero; 1170bf215546Sopenharmony_ci } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) { 1171bf215546Sopenharmony_ci /* -x * x => le_zero */ 1172bf215546Sopenharmony_ci fmul_range = le_zero; 1173bf215546Sopenharmony_ci } else 1174bf215546Sopenharmony_ci fmul_range = fmul_table[first.range][second.range]; 1175bf215546Sopenharmony_ci 1176bf215546Sopenharmony_ci r.range = fadd_table[fmul_range][third.range]; 1177bf215546Sopenharmony_ci break; 1178bf215546Sopenharmony_ci } 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_ci case nir_op_flrp: { 1181bf215546Sopenharmony_ci const struct ssa_result_range first = 1182bf215546Sopenharmony_ci analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)); 1183bf215546Sopenharmony_ci const struct ssa_result_range second = 1184bf215546Sopenharmony_ci analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1)); 1185bf215546Sopenharmony_ci const struct ssa_result_range third = 1186bf215546Sopenharmony_ci analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2)); 1187bf215546Sopenharmony_ci 1188bf215546Sopenharmony_ci r.is_integral = first.is_integral && second.is_integral && 1189bf215546Sopenharmony_ci third.is_integral; 1190bf215546Sopenharmony_ci 1191bf215546Sopenharmony_ci /* Various cases can result in NaN, so assume the worst. */ 1192bf215546Sopenharmony_ci r.is_a_number = false; 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ci /* Decompose the flrp to first + third * (second + -first) */ 1195bf215546Sopenharmony_ci const enum ssa_ranges inner_fadd_range = 1196bf215546Sopenharmony_ci fadd_table[second.range][fneg_table[first.range]]; 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci const enum ssa_ranges fmul_range = 1199bf215546Sopenharmony_ci fmul_table[third.range][inner_fadd_range]; 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci r.range = fadd_table[first.range][fmul_range]; 1202bf215546Sopenharmony_ci break; 1203bf215546Sopenharmony_ci } 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci default: 1206bf215546Sopenharmony_ci r = (struct ssa_result_range){unknown, false, false, false}; 1207bf215546Sopenharmony_ci break; 1208bf215546Sopenharmony_ci } 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci if (r.range == eq_zero) 1211bf215546Sopenharmony_ci r.is_integral = true; 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_ci /* Just like isfinite(), the is_finite flag implies the value is a number. */ 1214bf215546Sopenharmony_ci assert((int) r.is_finite <= (int) r.is_a_number); 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci _mesa_hash_table_insert(ht, pack_key(alu, use_type), pack_data(r)); 1217bf215546Sopenharmony_ci return r; 1218bf215546Sopenharmony_ci} 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci#undef _______ 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_cistruct ssa_result_range 1223bf215546Sopenharmony_cinir_analyze_range(struct hash_table *range_ht, 1224bf215546Sopenharmony_ci const nir_alu_instr *instr, unsigned src) 1225bf215546Sopenharmony_ci{ 1226bf215546Sopenharmony_ci return analyze_expression(instr, src, range_ht, 1227bf215546Sopenharmony_ci nir_alu_src_type(instr, src)); 1228bf215546Sopenharmony_ci} 1229bf215546Sopenharmony_ci 1230bf215546Sopenharmony_cistatic uint32_t bitmask(uint32_t size) { 1231bf215546Sopenharmony_ci return size >= 32 ? 0xffffffffu : ((uint32_t)1 << size) - 1u; 1232bf215546Sopenharmony_ci} 1233bf215546Sopenharmony_ci 1234bf215546Sopenharmony_cistatic uint64_t mul_clamp(uint32_t a, uint32_t b) 1235bf215546Sopenharmony_ci{ 1236bf215546Sopenharmony_ci if (a != 0 && (a * b) / a != b) 1237bf215546Sopenharmony_ci return (uint64_t)UINT32_MAX + 1; 1238bf215546Sopenharmony_ci else 1239bf215546Sopenharmony_ci return a * b; 1240bf215546Sopenharmony_ci} 1241bf215546Sopenharmony_ci 1242bf215546Sopenharmony_ci/* recursively gather at most "buf_size" phi/bcsel sources */ 1243bf215546Sopenharmony_cistatic unsigned 1244bf215546Sopenharmony_cisearch_phi_bcsel(nir_ssa_scalar scalar, nir_ssa_scalar *buf, unsigned buf_size, struct set *visited) 1245bf215546Sopenharmony_ci{ 1246bf215546Sopenharmony_ci if (_mesa_set_search(visited, scalar.def)) 1247bf215546Sopenharmony_ci return 0; 1248bf215546Sopenharmony_ci _mesa_set_add(visited, scalar.def); 1249bf215546Sopenharmony_ci 1250bf215546Sopenharmony_ci if (scalar.def->parent_instr->type == nir_instr_type_phi) { 1251bf215546Sopenharmony_ci nir_phi_instr *phi = nir_instr_as_phi(scalar.def->parent_instr); 1252bf215546Sopenharmony_ci unsigned num_sources_left = exec_list_length(&phi->srcs); 1253bf215546Sopenharmony_ci if (buf_size >= num_sources_left) { 1254bf215546Sopenharmony_ci unsigned total_added = 0; 1255bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 1256bf215546Sopenharmony_ci num_sources_left--; 1257bf215546Sopenharmony_ci unsigned added = search_phi_bcsel(nir_get_ssa_scalar(src->src.ssa, 0), 1258bf215546Sopenharmony_ci buf + total_added, buf_size - num_sources_left, visited); 1259bf215546Sopenharmony_ci assert(added <= buf_size); 1260bf215546Sopenharmony_ci buf_size -= added; 1261bf215546Sopenharmony_ci total_added += added; 1262bf215546Sopenharmony_ci } 1263bf215546Sopenharmony_ci return total_added; 1264bf215546Sopenharmony_ci } 1265bf215546Sopenharmony_ci } 1266bf215546Sopenharmony_ci 1267bf215546Sopenharmony_ci if (nir_ssa_scalar_is_alu(scalar)) { 1268bf215546Sopenharmony_ci nir_op op = nir_ssa_scalar_alu_op(scalar); 1269bf215546Sopenharmony_ci 1270bf215546Sopenharmony_ci if ((op == nir_op_bcsel || op == nir_op_b32csel) && buf_size >= 2) { 1271bf215546Sopenharmony_ci nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0); 1272bf215546Sopenharmony_ci nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1); 1273bf215546Sopenharmony_ci 1274bf215546Sopenharmony_ci unsigned added = search_phi_bcsel(src0, buf, buf_size - 1, visited); 1275bf215546Sopenharmony_ci buf_size -= added; 1276bf215546Sopenharmony_ci added += search_phi_bcsel(src1, buf + added, buf_size, visited); 1277bf215546Sopenharmony_ci return added; 1278bf215546Sopenharmony_ci } 1279bf215546Sopenharmony_ci } 1280bf215546Sopenharmony_ci 1281bf215546Sopenharmony_ci buf[0] = scalar; 1282bf215546Sopenharmony_ci return 1; 1283bf215546Sopenharmony_ci} 1284bf215546Sopenharmony_ci 1285bf215546Sopenharmony_cistatic nir_variable * 1286bf215546Sopenharmony_cilookup_input(nir_shader *shader, unsigned driver_location) 1287bf215546Sopenharmony_ci{ 1288bf215546Sopenharmony_ci return nir_find_variable_with_driver_location(shader, nir_var_shader_in, 1289bf215546Sopenharmony_ci driver_location); 1290bf215546Sopenharmony_ci} 1291bf215546Sopenharmony_ci 1292bf215546Sopenharmony_ci/* The config here should be generic enough to be correct on any HW. */ 1293bf215546Sopenharmony_cistatic const nir_unsigned_upper_bound_config default_ub_config = { 1294bf215546Sopenharmony_ci .min_subgroup_size = 1u, 1295bf215546Sopenharmony_ci .max_subgroup_size = UINT16_MAX, 1296bf215546Sopenharmony_ci .max_workgroup_invocations = UINT16_MAX, 1297bf215546Sopenharmony_ci .max_workgroup_count = {UINT16_MAX, UINT16_MAX, UINT16_MAX}, 1298bf215546Sopenharmony_ci .max_workgroup_size = {UINT16_MAX, UINT16_MAX, UINT16_MAX}, 1299bf215546Sopenharmony_ci .vertex_attrib_max = { 1300bf215546Sopenharmony_ci UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, 1301bf215546Sopenharmony_ci UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, 1302bf215546Sopenharmony_ci UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, 1303bf215546Sopenharmony_ci UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, 1304bf215546Sopenharmony_ci }, 1305bf215546Sopenharmony_ci}; 1306bf215546Sopenharmony_ci 1307bf215546Sopenharmony_ciuint32_t 1308bf215546Sopenharmony_cinir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, 1309bf215546Sopenharmony_ci nir_ssa_scalar scalar, 1310bf215546Sopenharmony_ci const nir_unsigned_upper_bound_config *config) 1311bf215546Sopenharmony_ci{ 1312bf215546Sopenharmony_ci assert(scalar.def->bit_size <= 32); 1313bf215546Sopenharmony_ci 1314bf215546Sopenharmony_ci if (!config) 1315bf215546Sopenharmony_ci config = &default_ub_config; 1316bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(scalar)) 1317bf215546Sopenharmony_ci return nir_ssa_scalar_as_uint(scalar); 1318bf215546Sopenharmony_ci 1319bf215546Sopenharmony_ci /* keys can't be 0, so we have to add 1 to the index */ 1320bf215546Sopenharmony_ci void *key = (void*)(((uintptr_t)(scalar.def->index + 1) << 4) | scalar.comp); 1321bf215546Sopenharmony_ci struct hash_entry *he = _mesa_hash_table_search(range_ht, key); 1322bf215546Sopenharmony_ci if (he != NULL) 1323bf215546Sopenharmony_ci return (uintptr_t)he->data; 1324bf215546Sopenharmony_ci 1325bf215546Sopenharmony_ci uint32_t max = bitmask(scalar.def->bit_size); 1326bf215546Sopenharmony_ci 1327bf215546Sopenharmony_ci if (scalar.def->parent_instr->type == nir_instr_type_intrinsic) { 1328bf215546Sopenharmony_ci uint32_t res = max; 1329bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr); 1330bf215546Sopenharmony_ci switch (intrin->intrinsic) { 1331bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_index: 1332bf215546Sopenharmony_ci /* The local invocation index is used under the hood by RADV for 1333bf215546Sopenharmony_ci * some non-compute-like shaders (eg. LS and NGG). These technically 1334bf215546Sopenharmony_ci * run in workgroups on the HW, even though this fact is not exposed 1335bf215546Sopenharmony_ci * by the API. 1336bf215546Sopenharmony_ci * They can safely use the same code path here as variable sized 1337bf215546Sopenharmony_ci * compute-like shader stages. 1338bf215546Sopenharmony_ci */ 1339bf215546Sopenharmony_ci if (!gl_shader_stage_uses_workgroup(shader->info.stage) || 1340bf215546Sopenharmony_ci shader->info.workgroup_size_variable) { 1341bf215546Sopenharmony_ci res = config->max_workgroup_invocations - 1; 1342bf215546Sopenharmony_ci } else { 1343bf215546Sopenharmony_ci res = (shader->info.workgroup_size[0] * 1344bf215546Sopenharmony_ci shader->info.workgroup_size[1] * 1345bf215546Sopenharmony_ci shader->info.workgroup_size[2]) - 1u; 1346bf215546Sopenharmony_ci } 1347bf215546Sopenharmony_ci break; 1348bf215546Sopenharmony_ci case nir_intrinsic_load_local_invocation_id: 1349bf215546Sopenharmony_ci if (shader->info.workgroup_size_variable) 1350bf215546Sopenharmony_ci res = config->max_workgroup_size[scalar.comp] - 1u; 1351bf215546Sopenharmony_ci else 1352bf215546Sopenharmony_ci res = shader->info.workgroup_size[scalar.comp] - 1u; 1353bf215546Sopenharmony_ci break; 1354bf215546Sopenharmony_ci case nir_intrinsic_load_workgroup_id: 1355bf215546Sopenharmony_ci res = config->max_workgroup_count[scalar.comp] - 1u; 1356bf215546Sopenharmony_ci break; 1357bf215546Sopenharmony_ci case nir_intrinsic_load_num_workgroups: 1358bf215546Sopenharmony_ci res = config->max_workgroup_count[scalar.comp]; 1359bf215546Sopenharmony_ci break; 1360bf215546Sopenharmony_ci case nir_intrinsic_load_global_invocation_id: 1361bf215546Sopenharmony_ci if (shader->info.workgroup_size_variable) { 1362bf215546Sopenharmony_ci res = mul_clamp(config->max_workgroup_size[scalar.comp], 1363bf215546Sopenharmony_ci config->max_workgroup_count[scalar.comp]) - 1u; 1364bf215546Sopenharmony_ci } else { 1365bf215546Sopenharmony_ci res = (shader->info.workgroup_size[scalar.comp] * 1366bf215546Sopenharmony_ci config->max_workgroup_count[scalar.comp]) - 1u; 1367bf215546Sopenharmony_ci } 1368bf215546Sopenharmony_ci break; 1369bf215546Sopenharmony_ci case nir_intrinsic_load_invocation_id: 1370bf215546Sopenharmony_ci if (shader->info.stage == MESA_SHADER_TESS_CTRL) 1371bf215546Sopenharmony_ci res = shader->info.tess.tcs_vertices_out 1372bf215546Sopenharmony_ci ? (shader->info.tess.tcs_vertices_out - 1) 1373bf215546Sopenharmony_ci : 511; /* Generous maximum output patch size of 512 */ 1374bf215546Sopenharmony_ci break; 1375bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_invocation: 1376bf215546Sopenharmony_ci case nir_intrinsic_first_invocation: 1377bf215546Sopenharmony_ci res = config->max_subgroup_size - 1; 1378bf215546Sopenharmony_ci break; 1379bf215546Sopenharmony_ci case nir_intrinsic_mbcnt_amd: { 1380bf215546Sopenharmony_ci uint32_t src0 = config->max_subgroup_size - 1; 1381bf215546Sopenharmony_ci uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), config); 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci if (src0 + src1 < src0) 1384bf215546Sopenharmony_ci res = max; /* overflow */ 1385bf215546Sopenharmony_ci else 1386bf215546Sopenharmony_ci res = src0 + src1; 1387bf215546Sopenharmony_ci break; 1388bf215546Sopenharmony_ci } 1389bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_size: 1390bf215546Sopenharmony_ci res = config->max_subgroup_size; 1391bf215546Sopenharmony_ci break; 1392bf215546Sopenharmony_ci case nir_intrinsic_load_subgroup_id: 1393bf215546Sopenharmony_ci case nir_intrinsic_load_num_subgroups: { 1394bf215546Sopenharmony_ci uint32_t workgroup_size = config->max_workgroup_invocations; 1395bf215546Sopenharmony_ci if (gl_shader_stage_uses_workgroup(shader->info.stage) && 1396bf215546Sopenharmony_ci !shader->info.workgroup_size_variable) { 1397bf215546Sopenharmony_ci workgroup_size = shader->info.workgroup_size[0] * 1398bf215546Sopenharmony_ci shader->info.workgroup_size[1] * 1399bf215546Sopenharmony_ci shader->info.workgroup_size[2]; 1400bf215546Sopenharmony_ci } 1401bf215546Sopenharmony_ci res = DIV_ROUND_UP(workgroup_size, config->min_subgroup_size); 1402bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_load_subgroup_id) 1403bf215546Sopenharmony_ci res--; 1404bf215546Sopenharmony_ci break; 1405bf215546Sopenharmony_ci } 1406bf215546Sopenharmony_ci case nir_intrinsic_load_input: { 1407bf215546Sopenharmony_ci if (shader->info.stage == MESA_SHADER_VERTEX && nir_src_is_const(intrin->src[0])) { 1408bf215546Sopenharmony_ci nir_variable *var = lookup_input(shader, nir_intrinsic_base(intrin)); 1409bf215546Sopenharmony_ci if (var) { 1410bf215546Sopenharmony_ci int loc = var->data.location - VERT_ATTRIB_GENERIC0; 1411bf215546Sopenharmony_ci if (loc >= 0) 1412bf215546Sopenharmony_ci res = config->vertex_attrib_max[loc]; 1413bf215546Sopenharmony_ci } 1414bf215546Sopenharmony_ci } 1415bf215546Sopenharmony_ci break; 1416bf215546Sopenharmony_ci } 1417bf215546Sopenharmony_ci case nir_intrinsic_reduce: 1418bf215546Sopenharmony_ci case nir_intrinsic_inclusive_scan: 1419bf215546Sopenharmony_ci case nir_intrinsic_exclusive_scan: { 1420bf215546Sopenharmony_ci nir_op op = nir_intrinsic_reduction_op(intrin); 1421bf215546Sopenharmony_ci if (op == nir_op_umin || op == nir_op_umax || op == nir_op_imin || op == nir_op_imax) 1422bf215546Sopenharmony_ci res = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config); 1423bf215546Sopenharmony_ci break; 1424bf215546Sopenharmony_ci } 1425bf215546Sopenharmony_ci case nir_intrinsic_read_first_invocation: 1426bf215546Sopenharmony_ci case nir_intrinsic_read_invocation: 1427bf215546Sopenharmony_ci case nir_intrinsic_shuffle: 1428bf215546Sopenharmony_ci case nir_intrinsic_shuffle_xor: 1429bf215546Sopenharmony_ci case nir_intrinsic_shuffle_up: 1430bf215546Sopenharmony_ci case nir_intrinsic_shuffle_down: 1431bf215546Sopenharmony_ci case nir_intrinsic_quad_broadcast: 1432bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_horizontal: 1433bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_vertical: 1434bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_diagonal: 1435bf215546Sopenharmony_ci case nir_intrinsic_quad_swizzle_amd: 1436bf215546Sopenharmony_ci case nir_intrinsic_masked_swizzle_amd: 1437bf215546Sopenharmony_ci res = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config); 1438bf215546Sopenharmony_ci break; 1439bf215546Sopenharmony_ci case nir_intrinsic_write_invocation_amd: { 1440bf215546Sopenharmony_ci uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config); 1441bf215546Sopenharmony_ci uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), config); 1442bf215546Sopenharmony_ci res = MAX2(src0, src1); 1443bf215546Sopenharmony_ci break; 1444bf215546Sopenharmony_ci } 1445bf215546Sopenharmony_ci case nir_intrinsic_load_tess_rel_patch_id_amd: 1446bf215546Sopenharmony_ci case nir_intrinsic_load_tcs_num_patches_amd: 1447bf215546Sopenharmony_ci /* Very generous maximum: TCS/TES executed by largest possible workgroup */ 1448bf215546Sopenharmony_ci res = config->max_workgroup_invocations / MAX2(shader->info.tess.tcs_vertices_out, 1u); 1449bf215546Sopenharmony_ci break; 1450bf215546Sopenharmony_ci case nir_intrinsic_load_scalar_arg_amd: 1451bf215546Sopenharmony_ci case nir_intrinsic_load_vector_arg_amd: { 1452bf215546Sopenharmony_ci uint32_t upper_bound = nir_intrinsic_arg_upper_bound_u32_amd(intrin); 1453bf215546Sopenharmony_ci if (upper_bound) 1454bf215546Sopenharmony_ci res = upper_bound; 1455bf215546Sopenharmony_ci break; 1456bf215546Sopenharmony_ci } 1457bf215546Sopenharmony_ci default: 1458bf215546Sopenharmony_ci break; 1459bf215546Sopenharmony_ci } 1460bf215546Sopenharmony_ci if (res != max) 1461bf215546Sopenharmony_ci _mesa_hash_table_insert(range_ht, key, (void*)(uintptr_t)res); 1462bf215546Sopenharmony_ci return res; 1463bf215546Sopenharmony_ci } 1464bf215546Sopenharmony_ci 1465bf215546Sopenharmony_ci if (scalar.def->parent_instr->type == nir_instr_type_phi) { 1466bf215546Sopenharmony_ci nir_cf_node *prev = nir_cf_node_prev(&scalar.def->parent_instr->block->cf_node); 1467bf215546Sopenharmony_ci 1468bf215546Sopenharmony_ci uint32_t res = 0; 1469bf215546Sopenharmony_ci if (!prev || prev->type == nir_cf_node_block) { 1470bf215546Sopenharmony_ci _mesa_hash_table_insert(range_ht, key, (void*)(uintptr_t)max); 1471bf215546Sopenharmony_ci 1472bf215546Sopenharmony_ci struct set *visited = _mesa_pointer_set_create(NULL); 1473bf215546Sopenharmony_ci nir_ssa_scalar defs[64]; 1474bf215546Sopenharmony_ci unsigned def_count = search_phi_bcsel(scalar, defs, 64, visited); 1475bf215546Sopenharmony_ci _mesa_set_destroy(visited, NULL); 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci for (unsigned i = 0; i < def_count; i++) 1478bf215546Sopenharmony_ci res = MAX2(res, nir_unsigned_upper_bound(shader, range_ht, defs[i], config)); 1479bf215546Sopenharmony_ci } else { 1480bf215546Sopenharmony_ci nir_foreach_phi_src(src, nir_instr_as_phi(scalar.def->parent_instr)) { 1481bf215546Sopenharmony_ci res = MAX2(res, nir_unsigned_upper_bound( 1482bf215546Sopenharmony_ci shader, range_ht, nir_get_ssa_scalar(src->src.ssa, 0), config)); 1483bf215546Sopenharmony_ci } 1484bf215546Sopenharmony_ci } 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci _mesa_hash_table_insert(range_ht, key, (void*)(uintptr_t)res); 1487bf215546Sopenharmony_ci return res; 1488bf215546Sopenharmony_ci } 1489bf215546Sopenharmony_ci 1490bf215546Sopenharmony_ci if (nir_ssa_scalar_is_alu(scalar)) { 1491bf215546Sopenharmony_ci nir_op op = nir_ssa_scalar_alu_op(scalar); 1492bf215546Sopenharmony_ci 1493bf215546Sopenharmony_ci switch (op) { 1494bf215546Sopenharmony_ci case nir_op_umin: 1495bf215546Sopenharmony_ci case nir_op_imin: 1496bf215546Sopenharmony_ci case nir_op_imax: 1497bf215546Sopenharmony_ci case nir_op_umax: 1498bf215546Sopenharmony_ci case nir_op_iand: 1499bf215546Sopenharmony_ci case nir_op_ior: 1500bf215546Sopenharmony_ci case nir_op_ixor: 1501bf215546Sopenharmony_ci case nir_op_ishl: 1502bf215546Sopenharmony_ci case nir_op_imul: 1503bf215546Sopenharmony_ci case nir_op_ushr: 1504bf215546Sopenharmony_ci case nir_op_ishr: 1505bf215546Sopenharmony_ci case nir_op_iadd: 1506bf215546Sopenharmony_ci case nir_op_umod: 1507bf215546Sopenharmony_ci case nir_op_udiv: 1508bf215546Sopenharmony_ci case nir_op_bcsel: 1509bf215546Sopenharmony_ci case nir_op_b32csel: 1510bf215546Sopenharmony_ci case nir_op_ubfe: 1511bf215546Sopenharmony_ci case nir_op_bfm: 1512bf215546Sopenharmony_ci case nir_op_fmul: 1513bf215546Sopenharmony_ci case nir_op_fmulz: 1514bf215546Sopenharmony_ci case nir_op_extract_u8: 1515bf215546Sopenharmony_ci case nir_op_extract_i8: 1516bf215546Sopenharmony_ci case nir_op_extract_u16: 1517bf215546Sopenharmony_ci case nir_op_extract_i16: 1518bf215546Sopenharmony_ci break; 1519bf215546Sopenharmony_ci case nir_op_u2u1: 1520bf215546Sopenharmony_ci case nir_op_u2u8: 1521bf215546Sopenharmony_ci case nir_op_u2u16: 1522bf215546Sopenharmony_ci case nir_op_u2u32: 1523bf215546Sopenharmony_ci case nir_op_f2u32: 1524bf215546Sopenharmony_ci if (nir_ssa_scalar_chase_alu_src(scalar, 0).def->bit_size > 32) { 1525bf215546Sopenharmony_ci /* If src is >32 bits, return max */ 1526bf215546Sopenharmony_ci return max; 1527bf215546Sopenharmony_ci } 1528bf215546Sopenharmony_ci break; 1529bf215546Sopenharmony_ci default: 1530bf215546Sopenharmony_ci return max; 1531bf215546Sopenharmony_ci } 1532bf215546Sopenharmony_ci 1533bf215546Sopenharmony_ci uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 0), config); 1534bf215546Sopenharmony_ci uint32_t src1 = max, src2 = max; 1535bf215546Sopenharmony_ci if (nir_op_infos[op].num_inputs > 1) 1536bf215546Sopenharmony_ci src1 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 1), config); 1537bf215546Sopenharmony_ci if (nir_op_infos[op].num_inputs > 2) 1538bf215546Sopenharmony_ci src2 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 2), config); 1539bf215546Sopenharmony_ci 1540bf215546Sopenharmony_ci uint32_t res = max; 1541bf215546Sopenharmony_ci switch (op) { 1542bf215546Sopenharmony_ci case nir_op_umin: 1543bf215546Sopenharmony_ci res = src0 < src1 ? src0 : src1; 1544bf215546Sopenharmony_ci break; 1545bf215546Sopenharmony_ci case nir_op_imin: 1546bf215546Sopenharmony_ci case nir_op_imax: 1547bf215546Sopenharmony_ci case nir_op_umax: 1548bf215546Sopenharmony_ci res = src0 > src1 ? src0 : src1; 1549bf215546Sopenharmony_ci break; 1550bf215546Sopenharmony_ci case nir_op_iand: 1551bf215546Sopenharmony_ci res = bitmask(util_last_bit64(src0)) & bitmask(util_last_bit64(src1)); 1552bf215546Sopenharmony_ci break; 1553bf215546Sopenharmony_ci case nir_op_ior: 1554bf215546Sopenharmony_ci case nir_op_ixor: 1555bf215546Sopenharmony_ci res = bitmask(util_last_bit64(src0)) | bitmask(util_last_bit64(src1)); 1556bf215546Sopenharmony_ci break; 1557bf215546Sopenharmony_ci case nir_op_ishl: 1558bf215546Sopenharmony_ci if (util_last_bit64(src0) + src1 > scalar.def->bit_size) 1559bf215546Sopenharmony_ci res = max; /* overflow */ 1560bf215546Sopenharmony_ci else 1561bf215546Sopenharmony_ci res = src0 << MIN2(src1, scalar.def->bit_size - 1u); 1562bf215546Sopenharmony_ci break; 1563bf215546Sopenharmony_ci case nir_op_imul: 1564bf215546Sopenharmony_ci if (src0 != 0 && (src0 * src1) / src0 != src1) 1565bf215546Sopenharmony_ci res = max; 1566bf215546Sopenharmony_ci else 1567bf215546Sopenharmony_ci res = src0 * src1; 1568bf215546Sopenharmony_ci break; 1569bf215546Sopenharmony_ci case nir_op_ushr: { 1570bf215546Sopenharmony_ci nir_ssa_scalar src1_scalar = nir_ssa_scalar_chase_alu_src(scalar, 1); 1571bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(src1_scalar)) 1572bf215546Sopenharmony_ci res = src0 >> nir_ssa_scalar_as_uint(src1_scalar); 1573bf215546Sopenharmony_ci else 1574bf215546Sopenharmony_ci res = src0; 1575bf215546Sopenharmony_ci break; 1576bf215546Sopenharmony_ci } 1577bf215546Sopenharmony_ci case nir_op_ishr: { 1578bf215546Sopenharmony_ci nir_ssa_scalar src1_scalar = nir_ssa_scalar_chase_alu_src(scalar, 1); 1579bf215546Sopenharmony_ci if (src0 <= 2147483647 && nir_ssa_scalar_is_const(src1_scalar)) 1580bf215546Sopenharmony_ci res = src0 >> nir_ssa_scalar_as_uint(src1_scalar); 1581bf215546Sopenharmony_ci else 1582bf215546Sopenharmony_ci res = src0; 1583bf215546Sopenharmony_ci break; 1584bf215546Sopenharmony_ci } 1585bf215546Sopenharmony_ci case nir_op_iadd: 1586bf215546Sopenharmony_ci if (src0 + src1 < src0) 1587bf215546Sopenharmony_ci res = max; /* overflow */ 1588bf215546Sopenharmony_ci else 1589bf215546Sopenharmony_ci res = src0 + src1; 1590bf215546Sopenharmony_ci break; 1591bf215546Sopenharmony_ci case nir_op_umod: 1592bf215546Sopenharmony_ci res = src1 ? src1 - 1 : 0; 1593bf215546Sopenharmony_ci break; 1594bf215546Sopenharmony_ci case nir_op_udiv: { 1595bf215546Sopenharmony_ci nir_ssa_scalar src1_scalar = nir_ssa_scalar_chase_alu_src(scalar, 1); 1596bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(src1_scalar)) 1597bf215546Sopenharmony_ci res = nir_ssa_scalar_as_uint(src1_scalar) ? src0 / nir_ssa_scalar_as_uint(src1_scalar) : 0; 1598bf215546Sopenharmony_ci else 1599bf215546Sopenharmony_ci res = src0; 1600bf215546Sopenharmony_ci break; 1601bf215546Sopenharmony_ci } 1602bf215546Sopenharmony_ci case nir_op_bcsel: 1603bf215546Sopenharmony_ci case nir_op_b32csel: 1604bf215546Sopenharmony_ci res = src1 > src2 ? src1 : src2; 1605bf215546Sopenharmony_ci break; 1606bf215546Sopenharmony_ci case nir_op_ubfe: 1607bf215546Sopenharmony_ci res = bitmask(MIN2(src2, scalar.def->bit_size)); 1608bf215546Sopenharmony_ci break; 1609bf215546Sopenharmony_ci case nir_op_bfm: { 1610bf215546Sopenharmony_ci nir_ssa_scalar src1_scalar = nir_ssa_scalar_chase_alu_src(scalar, 1); 1611bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(src1_scalar)) { 1612bf215546Sopenharmony_ci src0 = MIN2(src0, 31); 1613bf215546Sopenharmony_ci src1 = nir_ssa_scalar_as_uint(src1_scalar) & 0x1fu; 1614bf215546Sopenharmony_ci res = bitmask(src0) << src1; 1615bf215546Sopenharmony_ci } else { 1616bf215546Sopenharmony_ci src0 = MIN2(src0, 31); 1617bf215546Sopenharmony_ci src1 = MIN2(src1, 31); 1618bf215546Sopenharmony_ci res = bitmask(MIN2(src0 + src1, 32)); 1619bf215546Sopenharmony_ci } 1620bf215546Sopenharmony_ci break; 1621bf215546Sopenharmony_ci } 1622bf215546Sopenharmony_ci /* limited floating-point support for f2u32(fmul(load_input(), <constant>)) */ 1623bf215546Sopenharmony_ci case nir_op_f2u32: 1624bf215546Sopenharmony_ci /* infinity/NaN starts at 0x7f800000u, negative numbers at 0x80000000 */ 1625bf215546Sopenharmony_ci if (src0 < 0x7f800000u) { 1626bf215546Sopenharmony_ci float val; 1627bf215546Sopenharmony_ci memcpy(&val, &src0, 4); 1628bf215546Sopenharmony_ci res = (uint32_t)val; 1629bf215546Sopenharmony_ci } 1630bf215546Sopenharmony_ci break; 1631bf215546Sopenharmony_ci case nir_op_fmul: 1632bf215546Sopenharmony_ci case nir_op_fmulz: 1633bf215546Sopenharmony_ci /* infinity/NaN starts at 0x7f800000u, negative numbers at 0x80000000 */ 1634bf215546Sopenharmony_ci if (src0 < 0x7f800000u && src1 < 0x7f800000u) { 1635bf215546Sopenharmony_ci float src0_f, src1_f; 1636bf215546Sopenharmony_ci memcpy(&src0_f, &src0, 4); 1637bf215546Sopenharmony_ci memcpy(&src1_f, &src1, 4); 1638bf215546Sopenharmony_ci /* not a proper rounding-up multiplication, but should be good enough */ 1639bf215546Sopenharmony_ci float max_f = ceilf(src0_f) * ceilf(src1_f); 1640bf215546Sopenharmony_ci memcpy(&res, &max_f, 4); 1641bf215546Sopenharmony_ci } 1642bf215546Sopenharmony_ci break; 1643bf215546Sopenharmony_ci case nir_op_u2u1: 1644bf215546Sopenharmony_ci case nir_op_u2u8: 1645bf215546Sopenharmony_ci case nir_op_u2u16: 1646bf215546Sopenharmony_ci case nir_op_u2u32: 1647bf215546Sopenharmony_ci res = MIN2(src0, max); 1648bf215546Sopenharmony_ci break; 1649bf215546Sopenharmony_ci case nir_op_sad_u8x4: 1650bf215546Sopenharmony_ci res = src2 + 4 * 255; 1651bf215546Sopenharmony_ci break; 1652bf215546Sopenharmony_ci case nir_op_extract_u8: 1653bf215546Sopenharmony_ci res = MIN2(src0, UINT8_MAX); 1654bf215546Sopenharmony_ci break; 1655bf215546Sopenharmony_ci case nir_op_extract_i8: 1656bf215546Sopenharmony_ci res = (src0 >= 0x80) ? max : MIN2(src0, INT8_MAX); 1657bf215546Sopenharmony_ci break; 1658bf215546Sopenharmony_ci case nir_op_extract_u16: 1659bf215546Sopenharmony_ci res = MIN2(src0, UINT16_MAX); 1660bf215546Sopenharmony_ci break; 1661bf215546Sopenharmony_ci case nir_op_extract_i16: 1662bf215546Sopenharmony_ci res = (src0 >= 0x8000) ? max : MIN2(src0, INT16_MAX); 1663bf215546Sopenharmony_ci break; 1664bf215546Sopenharmony_ci default: 1665bf215546Sopenharmony_ci res = max; 1666bf215546Sopenharmony_ci break; 1667bf215546Sopenharmony_ci } 1668bf215546Sopenharmony_ci _mesa_hash_table_insert(range_ht, key, (void*)(uintptr_t)res); 1669bf215546Sopenharmony_ci return res; 1670bf215546Sopenharmony_ci } 1671bf215546Sopenharmony_ci 1672bf215546Sopenharmony_ci return max; 1673bf215546Sopenharmony_ci} 1674bf215546Sopenharmony_ci 1675bf215546Sopenharmony_cibool 1676bf215546Sopenharmony_cinir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, 1677bf215546Sopenharmony_ci nir_ssa_scalar ssa, unsigned const_val, 1678bf215546Sopenharmony_ci const nir_unsigned_upper_bound_config *config) 1679bf215546Sopenharmony_ci{ 1680bf215546Sopenharmony_ci if (nir_ssa_scalar_is_alu(ssa)) { 1681bf215546Sopenharmony_ci nir_op alu_op = nir_ssa_scalar_alu_op(ssa); 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_ci /* iadd(imul(a, #b), #c) */ 1684bf215546Sopenharmony_ci if (alu_op == nir_op_imul || alu_op == nir_op_ishl) { 1685bf215546Sopenharmony_ci nir_ssa_scalar mul_src0 = nir_ssa_scalar_chase_alu_src(ssa, 0); 1686bf215546Sopenharmony_ci nir_ssa_scalar mul_src1 = nir_ssa_scalar_chase_alu_src(ssa, 1); 1687bf215546Sopenharmony_ci uint32_t stride = 1; 1688bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(mul_src0)) 1689bf215546Sopenharmony_ci stride = nir_ssa_scalar_as_uint(mul_src0); 1690bf215546Sopenharmony_ci else if (nir_ssa_scalar_is_const(mul_src1)) 1691bf215546Sopenharmony_ci stride = nir_ssa_scalar_as_uint(mul_src1); 1692bf215546Sopenharmony_ci 1693bf215546Sopenharmony_ci if (alu_op == nir_op_ishl) 1694bf215546Sopenharmony_ci stride = 1u << (stride % 32u); 1695bf215546Sopenharmony_ci 1696bf215546Sopenharmony_ci if (!stride || const_val <= UINT32_MAX - (UINT32_MAX / stride * stride)) 1697bf215546Sopenharmony_ci return false; 1698bf215546Sopenharmony_ci } 1699bf215546Sopenharmony_ci 1700bf215546Sopenharmony_ci /* iadd(iand(a, #b), #c) */ 1701bf215546Sopenharmony_ci if (alu_op == nir_op_iand) { 1702bf215546Sopenharmony_ci nir_ssa_scalar and_src0 = nir_ssa_scalar_chase_alu_src(ssa, 0); 1703bf215546Sopenharmony_ci nir_ssa_scalar and_src1 = nir_ssa_scalar_chase_alu_src(ssa, 1); 1704bf215546Sopenharmony_ci uint32_t mask = 0xffffffff; 1705bf215546Sopenharmony_ci if (nir_ssa_scalar_is_const(and_src0)) 1706bf215546Sopenharmony_ci mask = nir_ssa_scalar_as_uint(and_src0); 1707bf215546Sopenharmony_ci else if (nir_ssa_scalar_is_const(and_src1)) 1708bf215546Sopenharmony_ci mask = nir_ssa_scalar_as_uint(and_src1); 1709bf215546Sopenharmony_ci if (mask == 0 || const_val < (1u << (ffs(mask) - 1))) 1710bf215546Sopenharmony_ci return false; 1711bf215546Sopenharmony_ci } 1712bf215546Sopenharmony_ci } 1713bf215546Sopenharmony_ci 1714bf215546Sopenharmony_ci uint32_t ub = nir_unsigned_upper_bound(shader, range_ht, ssa, config); 1715bf215546Sopenharmony_ci return const_val + ub < const_val; 1716bf215546Sopenharmony_ci} 1717bf215546Sopenharmony_ci 1718bf215546Sopenharmony_cistatic uint64_t 1719bf215546Sopenharmony_cissa_def_bits_used(const nir_ssa_def *def, int recur) 1720bf215546Sopenharmony_ci{ 1721bf215546Sopenharmony_ci uint64_t bits_used = 0; 1722bf215546Sopenharmony_ci uint64_t all_bits = BITFIELD64_MASK(def->bit_size); 1723bf215546Sopenharmony_ci 1724bf215546Sopenharmony_ci /* Querying the bits used from a vector is too hard of a question to 1725bf215546Sopenharmony_ci * answer. Return the conservative answer that all bits are used. To 1726bf215546Sopenharmony_ci * handle this, the function would need to be extended to be a query of a 1727bf215546Sopenharmony_ci * single component of the vector. That would also necessary to fully 1728bf215546Sopenharmony_ci * handle the 'num_components > 1' inside the loop below. 1729bf215546Sopenharmony_ci * 1730bf215546Sopenharmony_ci * FINISHME: This restriction will eventually need to be restricted to be 1731bf215546Sopenharmony_ci * useful for hardware that uses u16vec2 as the native 16-bit integer type. 1732bf215546Sopenharmony_ci */ 1733bf215546Sopenharmony_ci if (def->num_components > 1) 1734bf215546Sopenharmony_ci return all_bits; 1735bf215546Sopenharmony_ci 1736bf215546Sopenharmony_ci /* Limit recursion */ 1737bf215546Sopenharmony_ci if (recur-- <= 0) 1738bf215546Sopenharmony_ci return all_bits; 1739bf215546Sopenharmony_ci 1740bf215546Sopenharmony_ci nir_foreach_use(src, def) { 1741bf215546Sopenharmony_ci switch (src->parent_instr->type) { 1742bf215546Sopenharmony_ci case nir_instr_type_alu: { 1743bf215546Sopenharmony_ci nir_alu_instr *use_alu = nir_instr_as_alu(src->parent_instr); 1744bf215546Sopenharmony_ci unsigned src_idx = container_of(src, nir_alu_src, src) - use_alu->src; 1745bf215546Sopenharmony_ci 1746bf215546Sopenharmony_ci /* If a user of the value produces a vector result, return the 1747bf215546Sopenharmony_ci * conservative answer that all bits are used. It is possible to 1748bf215546Sopenharmony_ci * answer this query by looping over the components used. For example, 1749bf215546Sopenharmony_ci * 1750bf215546Sopenharmony_ci * vec4 32 ssa_5 = load_const(0x0000f000, 0x00000f00, 0x000000f0, 0x0000000f) 1751bf215546Sopenharmony_ci * ... 1752bf215546Sopenharmony_ci * vec4 32 ssa_8 = iand ssa_7.xxxx, ssa_5 1753bf215546Sopenharmony_ci * 1754bf215546Sopenharmony_ci * could conceivably return 0x0000ffff when queyring the bits used of 1755bf215546Sopenharmony_ci * ssa_7. This is unlikely to be worth the effort because the 1756bf215546Sopenharmony_ci * question can eventually answered after the shader has been 1757bf215546Sopenharmony_ci * scalarized. 1758bf215546Sopenharmony_ci */ 1759bf215546Sopenharmony_ci if (use_alu->dest.dest.ssa.num_components > 1) 1760bf215546Sopenharmony_ci return all_bits; 1761bf215546Sopenharmony_ci 1762bf215546Sopenharmony_ci switch (use_alu->op) { 1763bf215546Sopenharmony_ci case nir_op_u2u8: 1764bf215546Sopenharmony_ci case nir_op_i2i8: 1765bf215546Sopenharmony_ci bits_used |= 0xff; 1766bf215546Sopenharmony_ci break; 1767bf215546Sopenharmony_ci 1768bf215546Sopenharmony_ci case nir_op_u2u16: 1769bf215546Sopenharmony_ci case nir_op_i2i16: 1770bf215546Sopenharmony_ci bits_used |= all_bits & 0xffff; 1771bf215546Sopenharmony_ci break; 1772bf215546Sopenharmony_ci 1773bf215546Sopenharmony_ci case nir_op_u2u32: 1774bf215546Sopenharmony_ci case nir_op_i2i32: 1775bf215546Sopenharmony_ci bits_used |= all_bits & 0xffffffff; 1776bf215546Sopenharmony_ci break; 1777bf215546Sopenharmony_ci 1778bf215546Sopenharmony_ci case nir_op_extract_u8: 1779bf215546Sopenharmony_ci case nir_op_extract_i8: 1780bf215546Sopenharmony_ci if (src_idx == 0 && nir_src_is_const(use_alu->src[1].src)) { 1781bf215546Sopenharmony_ci unsigned chunk = nir_src_comp_as_uint(use_alu->src[1].src, 1782bf215546Sopenharmony_ci use_alu->src[1].swizzle[0]); 1783bf215546Sopenharmony_ci bits_used |= 0xffull << (chunk * 8); 1784bf215546Sopenharmony_ci break; 1785bf215546Sopenharmony_ci } else { 1786bf215546Sopenharmony_ci return all_bits; 1787bf215546Sopenharmony_ci } 1788bf215546Sopenharmony_ci 1789bf215546Sopenharmony_ci case nir_op_extract_u16: 1790bf215546Sopenharmony_ci case nir_op_extract_i16: 1791bf215546Sopenharmony_ci if (src_idx == 0 && nir_src_is_const(use_alu->src[1].src)) { 1792bf215546Sopenharmony_ci unsigned chunk = nir_src_comp_as_uint(use_alu->src[1].src, 1793bf215546Sopenharmony_ci use_alu->src[1].swizzle[0]); 1794bf215546Sopenharmony_ci bits_used |= 0xffffull << (chunk * 16); 1795bf215546Sopenharmony_ci break; 1796bf215546Sopenharmony_ci } else { 1797bf215546Sopenharmony_ci return all_bits; 1798bf215546Sopenharmony_ci } 1799bf215546Sopenharmony_ci 1800bf215546Sopenharmony_ci case nir_op_ishl: 1801bf215546Sopenharmony_ci case nir_op_ishr: 1802bf215546Sopenharmony_ci case nir_op_ushr: 1803bf215546Sopenharmony_ci if (src_idx == 1) { 1804bf215546Sopenharmony_ci bits_used |= (nir_src_bit_size(use_alu->src[0].src) - 1); 1805bf215546Sopenharmony_ci break; 1806bf215546Sopenharmony_ci } else { 1807bf215546Sopenharmony_ci return all_bits; 1808bf215546Sopenharmony_ci } 1809bf215546Sopenharmony_ci 1810bf215546Sopenharmony_ci case nir_op_iand: 1811bf215546Sopenharmony_ci assert(src_idx < 2); 1812bf215546Sopenharmony_ci if (nir_src_is_const(use_alu->src[1 - src_idx].src)) { 1813bf215546Sopenharmony_ci uint64_t u64 = nir_src_comp_as_uint(use_alu->src[1 - src_idx].src, 1814bf215546Sopenharmony_ci use_alu->src[1 - src_idx].swizzle[0]); 1815bf215546Sopenharmony_ci bits_used |= u64; 1816bf215546Sopenharmony_ci break; 1817bf215546Sopenharmony_ci } else { 1818bf215546Sopenharmony_ci return all_bits; 1819bf215546Sopenharmony_ci } 1820bf215546Sopenharmony_ci 1821bf215546Sopenharmony_ci case nir_op_ior: 1822bf215546Sopenharmony_ci assert(src_idx < 2); 1823bf215546Sopenharmony_ci if (nir_src_is_const(use_alu->src[1 - src_idx].src)) { 1824bf215546Sopenharmony_ci uint64_t u64 = nir_src_comp_as_uint(use_alu->src[1 - src_idx].src, 1825bf215546Sopenharmony_ci use_alu->src[1 - src_idx].swizzle[0]); 1826bf215546Sopenharmony_ci bits_used |= all_bits & ~u64; 1827bf215546Sopenharmony_ci break; 1828bf215546Sopenharmony_ci } else { 1829bf215546Sopenharmony_ci return all_bits; 1830bf215546Sopenharmony_ci } 1831bf215546Sopenharmony_ci 1832bf215546Sopenharmony_ci default: 1833bf215546Sopenharmony_ci /* We don't know what this op does */ 1834bf215546Sopenharmony_ci return all_bits; 1835bf215546Sopenharmony_ci } 1836bf215546Sopenharmony_ci break; 1837bf215546Sopenharmony_ci } 1838bf215546Sopenharmony_ci 1839bf215546Sopenharmony_ci case nir_instr_type_intrinsic: { 1840bf215546Sopenharmony_ci nir_intrinsic_instr *use_intrin = 1841bf215546Sopenharmony_ci nir_instr_as_intrinsic(src->parent_instr); 1842bf215546Sopenharmony_ci unsigned src_idx = src - use_intrin->src; 1843bf215546Sopenharmony_ci 1844bf215546Sopenharmony_ci switch (use_intrin->intrinsic) { 1845bf215546Sopenharmony_ci case nir_intrinsic_read_invocation: 1846bf215546Sopenharmony_ci case nir_intrinsic_shuffle: 1847bf215546Sopenharmony_ci case nir_intrinsic_shuffle_up: 1848bf215546Sopenharmony_ci case nir_intrinsic_shuffle_down: 1849bf215546Sopenharmony_ci case nir_intrinsic_shuffle_xor: 1850bf215546Sopenharmony_ci case nir_intrinsic_quad_broadcast: 1851bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_horizontal: 1852bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_vertical: 1853bf215546Sopenharmony_ci case nir_intrinsic_quad_swap_diagonal: 1854bf215546Sopenharmony_ci if (src_idx == 0) { 1855bf215546Sopenharmony_ci assert(use_intrin->dest.is_ssa); 1856bf215546Sopenharmony_ci bits_used |= ssa_def_bits_used(&use_intrin->dest.ssa, recur); 1857bf215546Sopenharmony_ci } else { 1858bf215546Sopenharmony_ci if (use_intrin->intrinsic == nir_intrinsic_quad_broadcast) { 1859bf215546Sopenharmony_ci bits_used |= 3; 1860bf215546Sopenharmony_ci } else { 1861bf215546Sopenharmony_ci /* Subgroups larger than 128 are not a thing */ 1862bf215546Sopenharmony_ci bits_used |= 127; 1863bf215546Sopenharmony_ci } 1864bf215546Sopenharmony_ci } 1865bf215546Sopenharmony_ci break; 1866bf215546Sopenharmony_ci 1867bf215546Sopenharmony_ci case nir_intrinsic_reduce: 1868bf215546Sopenharmony_ci case nir_intrinsic_inclusive_scan: 1869bf215546Sopenharmony_ci case nir_intrinsic_exclusive_scan: 1870bf215546Sopenharmony_ci assert(src_idx == 0); 1871bf215546Sopenharmony_ci switch (nir_intrinsic_reduction_op(use_intrin)) { 1872bf215546Sopenharmony_ci case nir_op_iadd: 1873bf215546Sopenharmony_ci case nir_op_imul: 1874bf215546Sopenharmony_ci case nir_op_ior: 1875bf215546Sopenharmony_ci case nir_op_iand: 1876bf215546Sopenharmony_ci case nir_op_ixor: 1877bf215546Sopenharmony_ci bits_used |= ssa_def_bits_used(&use_intrin->dest.ssa, recur); 1878bf215546Sopenharmony_ci break; 1879bf215546Sopenharmony_ci 1880bf215546Sopenharmony_ci default: 1881bf215546Sopenharmony_ci return all_bits; 1882bf215546Sopenharmony_ci } 1883bf215546Sopenharmony_ci break; 1884bf215546Sopenharmony_ci 1885bf215546Sopenharmony_ci default: 1886bf215546Sopenharmony_ci /* We don't know what this op does */ 1887bf215546Sopenharmony_ci return all_bits; 1888bf215546Sopenharmony_ci } 1889bf215546Sopenharmony_ci break; 1890bf215546Sopenharmony_ci } 1891bf215546Sopenharmony_ci 1892bf215546Sopenharmony_ci case nir_instr_type_phi: { 1893bf215546Sopenharmony_ci nir_phi_instr *use_phi = nir_instr_as_phi(src->parent_instr); 1894bf215546Sopenharmony_ci bits_used |= ssa_def_bits_used(&use_phi->dest.ssa, recur); 1895bf215546Sopenharmony_ci break; 1896bf215546Sopenharmony_ci } 1897bf215546Sopenharmony_ci 1898bf215546Sopenharmony_ci default: 1899bf215546Sopenharmony_ci return all_bits; 1900bf215546Sopenharmony_ci } 1901bf215546Sopenharmony_ci 1902bf215546Sopenharmony_ci /* If we've somehow shown that all our bits are used, we're done */ 1903bf215546Sopenharmony_ci assert((bits_used & ~all_bits) == 0); 1904bf215546Sopenharmony_ci if (bits_used == all_bits) 1905bf215546Sopenharmony_ci return all_bits; 1906bf215546Sopenharmony_ci } 1907bf215546Sopenharmony_ci 1908bf215546Sopenharmony_ci return bits_used; 1909bf215546Sopenharmony_ci} 1910bf215546Sopenharmony_ci 1911bf215546Sopenharmony_ciuint64_t 1912bf215546Sopenharmony_cinir_ssa_def_bits_used(const nir_ssa_def *def) 1913bf215546Sopenharmony_ci{ 1914bf215546Sopenharmony_ci return ssa_def_bits_used(def, 2); 1915bf215546Sopenharmony_ci} 1916