1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci#include <math.h>
24bf215546Sopenharmony_ci#include <float.h>
25bf215546Sopenharmony_ci#include "nir.h"
26bf215546Sopenharmony_ci#include "nir_range_analysis.h"
27bf215546Sopenharmony_ci#include "util/hash_table.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci/**
30bf215546Sopenharmony_ci * Analyzes a sequence of operations to determine some aspects of the range of
31bf215546Sopenharmony_ci * the result.
32bf215546Sopenharmony_ci */
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_cistatic bool
35bf215546Sopenharmony_ciis_not_negative(enum ssa_ranges r)
36bf215546Sopenharmony_ci{
37bf215546Sopenharmony_ci   return r == gt_zero || r == ge_zero || r == eq_zero;
38bf215546Sopenharmony_ci}
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cistatic bool
41bf215546Sopenharmony_ciis_not_zero(enum ssa_ranges r)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci   return r == gt_zero || r == lt_zero || r == ne_zero;
44bf215546Sopenharmony_ci}
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_cistatic void *
47bf215546Sopenharmony_cipack_data(const struct ssa_result_range r)
48bf215546Sopenharmony_ci{
49bf215546Sopenharmony_ci   return (void *)(uintptr_t)(r.range | r.is_integral << 8 | r.is_finite << 9 |
50bf215546Sopenharmony_ci                              r.is_a_number << 10);
51bf215546Sopenharmony_ci}
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_cistatic struct ssa_result_range
54bf215546Sopenharmony_ciunpack_data(const void *p)
55bf215546Sopenharmony_ci{
56bf215546Sopenharmony_ci   const uintptr_t v = (uintptr_t) p;
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci   return (struct ssa_result_range){
59bf215546Sopenharmony_ci      .range       = v & 0xff,
60bf215546Sopenharmony_ci      .is_integral = (v & 0x00100) != 0,
61bf215546Sopenharmony_ci      .is_finite   = (v & 0x00200) != 0,
62bf215546Sopenharmony_ci      .is_a_number = (v & 0x00400) != 0
63bf215546Sopenharmony_ci   };
64bf215546Sopenharmony_ci}
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_cistatic void *
67bf215546Sopenharmony_cipack_key(const struct nir_alu_instr *instr, nir_alu_type type)
68bf215546Sopenharmony_ci{
69bf215546Sopenharmony_ci   uintptr_t type_encoding;
70bf215546Sopenharmony_ci   uintptr_t ptr = (uintptr_t) instr;
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   /* The low 2 bits have to be zero or this whole scheme falls apart. */
73bf215546Sopenharmony_ci   assert((ptr & 0x3) == 0);
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   /* NIR is typeless in the sense that sequences of bits have whatever
76bf215546Sopenharmony_ci    * meaning is attached to them by the instruction that consumes them.
77bf215546Sopenharmony_ci    * However, the number of bits must match between producer and consumer.
78bf215546Sopenharmony_ci    * As a result, the number of bits does not need to be encoded here.
79bf215546Sopenharmony_ci    */
80bf215546Sopenharmony_ci   switch (nir_alu_type_get_base_type(type)) {
81bf215546Sopenharmony_ci   case nir_type_int:   type_encoding = 0; break;
82bf215546Sopenharmony_ci   case nir_type_uint:  type_encoding = 1; break;
83bf215546Sopenharmony_ci   case nir_type_bool:  type_encoding = 2; break;
84bf215546Sopenharmony_ci   case nir_type_float: type_encoding = 3; break;
85bf215546Sopenharmony_ci   default: unreachable("Invalid base type.");
86bf215546Sopenharmony_ci   }
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   return (void *)(ptr | type_encoding);
89bf215546Sopenharmony_ci}
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_cistatic nir_alu_type
92bf215546Sopenharmony_cinir_alu_src_type(const nir_alu_instr *instr, unsigned src)
93bf215546Sopenharmony_ci{
94bf215546Sopenharmony_ci   return nir_alu_type_get_base_type(nir_op_infos[instr->op].input_types[src]) |
95bf215546Sopenharmony_ci          nir_src_bit_size(instr->src[src].src);
96bf215546Sopenharmony_ci}
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_cistatic struct ssa_result_range
99bf215546Sopenharmony_cianalyze_constant(const struct nir_alu_instr *instr, unsigned src,
100bf215546Sopenharmony_ci                 nir_alu_type use_type)
101bf215546Sopenharmony_ci{
102bf215546Sopenharmony_ci   uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = { 0, 1, 2, 3,
103bf215546Sopenharmony_ci                                               4, 5, 6, 7,
104bf215546Sopenharmony_ci                                               8, 9, 10, 11,
105bf215546Sopenharmony_ci                                               12, 13, 14, 15 };
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci   /* If the source is an explicitly sized source, then we need to reset
108bf215546Sopenharmony_ci    * both the number of components and the swizzle.
109bf215546Sopenharmony_ci    */
110bf215546Sopenharmony_ci   const unsigned num_components = nir_ssa_alu_instr_src_components(instr, src);
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_components; ++i)
113bf215546Sopenharmony_ci      swizzle[i] = instr->src[src].swizzle[i];
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   const nir_load_const_instr *const load =
116bf215546Sopenharmony_ci      nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   struct ssa_result_range r = { unknown, false, false, false };
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   switch (nir_alu_type_get_base_type(use_type)) {
121bf215546Sopenharmony_ci   case nir_type_float: {
122bf215546Sopenharmony_ci      double min_value = DBL_MAX;
123bf215546Sopenharmony_ci      double max_value = -DBL_MAX;
124bf215546Sopenharmony_ci      bool any_zero = false;
125bf215546Sopenharmony_ci      bool all_zero = true;
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci      r.is_integral = true;
128bf215546Sopenharmony_ci      r.is_a_number = true;
129bf215546Sopenharmony_ci      r.is_finite = true;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_components; ++i) {
132bf215546Sopenharmony_ci         const double v = nir_const_value_as_float(load->value[swizzle[i]],
133bf215546Sopenharmony_ci                                                   load->def.bit_size);
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci         if (floor(v) != v)
136bf215546Sopenharmony_ci            r.is_integral = false;
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci         if (isnan(v))
139bf215546Sopenharmony_ci            r.is_a_number = false;
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci         if (!isfinite(v))
142bf215546Sopenharmony_ci            r.is_finite = false;
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci         any_zero = any_zero || (v == 0.0);
145bf215546Sopenharmony_ci         all_zero = all_zero && (v == 0.0);
146bf215546Sopenharmony_ci         min_value = MIN2(min_value, v);
147bf215546Sopenharmony_ci         max_value = MAX2(max_value, v);
148bf215546Sopenharmony_ci      }
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci      assert(any_zero >= all_zero);
151bf215546Sopenharmony_ci      assert(isnan(max_value) || max_value >= min_value);
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci      if (all_zero)
154bf215546Sopenharmony_ci         r.range = eq_zero;
155bf215546Sopenharmony_ci      else if (min_value > 0.0)
156bf215546Sopenharmony_ci         r.range = gt_zero;
157bf215546Sopenharmony_ci      else if (min_value == 0.0)
158bf215546Sopenharmony_ci         r.range = ge_zero;
159bf215546Sopenharmony_ci      else if (max_value < 0.0)
160bf215546Sopenharmony_ci         r.range = lt_zero;
161bf215546Sopenharmony_ci      else if (max_value == 0.0)
162bf215546Sopenharmony_ci         r.range = le_zero;
163bf215546Sopenharmony_ci      else if (!any_zero)
164bf215546Sopenharmony_ci         r.range = ne_zero;
165bf215546Sopenharmony_ci      else
166bf215546Sopenharmony_ci         r.range = unknown;
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci      return r;
169bf215546Sopenharmony_ci   }
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   case nir_type_int:
172bf215546Sopenharmony_ci   case nir_type_bool: {
173bf215546Sopenharmony_ci      int64_t min_value = INT_MAX;
174bf215546Sopenharmony_ci      int64_t max_value = INT_MIN;
175bf215546Sopenharmony_ci      bool any_zero = false;
176bf215546Sopenharmony_ci      bool all_zero = true;
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_components; ++i) {
179bf215546Sopenharmony_ci         const int64_t v = nir_const_value_as_int(load->value[swizzle[i]],
180bf215546Sopenharmony_ci                                                  load->def.bit_size);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci         any_zero = any_zero || (v == 0);
183bf215546Sopenharmony_ci         all_zero = all_zero && (v == 0);
184bf215546Sopenharmony_ci         min_value = MIN2(min_value, v);
185bf215546Sopenharmony_ci         max_value = MAX2(max_value, v);
186bf215546Sopenharmony_ci      }
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci      assert(any_zero >= all_zero);
189bf215546Sopenharmony_ci      assert(max_value >= min_value);
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci      if (all_zero)
192bf215546Sopenharmony_ci         r.range = eq_zero;
193bf215546Sopenharmony_ci      else if (min_value > 0)
194bf215546Sopenharmony_ci         r.range = gt_zero;
195bf215546Sopenharmony_ci      else if (min_value == 0)
196bf215546Sopenharmony_ci         r.range = ge_zero;
197bf215546Sopenharmony_ci      else if (max_value < 0)
198bf215546Sopenharmony_ci         r.range = lt_zero;
199bf215546Sopenharmony_ci      else if (max_value == 0)
200bf215546Sopenharmony_ci         r.range = le_zero;
201bf215546Sopenharmony_ci      else if (!any_zero)
202bf215546Sopenharmony_ci         r.range = ne_zero;
203bf215546Sopenharmony_ci      else
204bf215546Sopenharmony_ci         r.range = unknown;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci      return r;
207bf215546Sopenharmony_ci   }
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   case nir_type_uint: {
210bf215546Sopenharmony_ci      bool any_zero = false;
211bf215546Sopenharmony_ci      bool all_zero = true;
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_components; ++i) {
214bf215546Sopenharmony_ci         const uint64_t v = nir_const_value_as_uint(load->value[swizzle[i]],
215bf215546Sopenharmony_ci                                                    load->def.bit_size);
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci         any_zero = any_zero || (v == 0);
218bf215546Sopenharmony_ci         all_zero = all_zero && (v == 0);
219bf215546Sopenharmony_ci      }
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci      assert(any_zero >= all_zero);
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci      if (all_zero)
224bf215546Sopenharmony_ci         r.range = eq_zero;
225bf215546Sopenharmony_ci      else if (any_zero)
226bf215546Sopenharmony_ci         r.range = ge_zero;
227bf215546Sopenharmony_ci      else
228bf215546Sopenharmony_ci         r.range = gt_zero;
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci      return r;
231bf215546Sopenharmony_ci   }
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci   default:
234bf215546Sopenharmony_ci      unreachable("Invalid alu source type");
235bf215546Sopenharmony_ci   }
236bf215546Sopenharmony_ci}
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci/**
239bf215546Sopenharmony_ci * Short-hand name for use in the tables in analyze_expression.  If this name
240bf215546Sopenharmony_ci * becomes a problem on some compiler, we can change it to _.
241bf215546Sopenharmony_ci */
242bf215546Sopenharmony_ci#define _______ unknown
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci#if defined(__clang__)
246bf215546Sopenharmony_ci   /* clang wants _Pragma("unroll X") */
247bf215546Sopenharmony_ci   #define pragma_unroll_5 _Pragma("unroll 5")
248bf215546Sopenharmony_ci   #define pragma_unroll_7 _Pragma("unroll 7")
249bf215546Sopenharmony_ci/* gcc wants _Pragma("GCC unroll X") */
250bf215546Sopenharmony_ci#elif defined(__GNUC__)
251bf215546Sopenharmony_ci   #if __GNUC__ >= 8
252bf215546Sopenharmony_ci      #define pragma_unroll_5 _Pragma("GCC unroll 5")
253bf215546Sopenharmony_ci      #define pragma_unroll_7 _Pragma("GCC unroll 7")
254bf215546Sopenharmony_ci   #else
255bf215546Sopenharmony_ci      #pragma GCC optimize ("unroll-loops")
256bf215546Sopenharmony_ci      #define pragma_unroll_5
257bf215546Sopenharmony_ci      #define pragma_unroll_7
258bf215546Sopenharmony_ci   #endif
259bf215546Sopenharmony_ci#else
260bf215546Sopenharmony_ci   /* MSVC doesn't have C99's _Pragma() */
261bf215546Sopenharmony_ci   #define pragma_unroll_5
262bf215546Sopenharmony_ci   #define pragma_unroll_7
263bf215546Sopenharmony_ci#endif
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci#ifndef NDEBUG
267bf215546Sopenharmony_ci#define ASSERT_TABLE_IS_COMMUTATIVE(t)                        \
268bf215546Sopenharmony_ci   do {                                                       \
269bf215546Sopenharmony_ci      static bool first = true;                               \
270bf215546Sopenharmony_ci      if (first) {                                            \
271bf215546Sopenharmony_ci         first = false;                                       \
272bf215546Sopenharmony_ci         pragma_unroll_7                                      \
273bf215546Sopenharmony_ci         for (unsigned r = 0; r < ARRAY_SIZE(t); r++) {       \
274bf215546Sopenharmony_ci            pragma_unroll_7                                   \
275bf215546Sopenharmony_ci            for (unsigned c = 0; c < ARRAY_SIZE(t[0]); c++)   \
276bf215546Sopenharmony_ci               assert(t[r][c] == t[c][r]);                    \
277bf215546Sopenharmony_ci         }                                                    \
278bf215546Sopenharmony_ci      }                                                       \
279bf215546Sopenharmony_ci   } while (false)
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci#define ASSERT_TABLE_IS_DIAGONAL(t)                           \
282bf215546Sopenharmony_ci   do {                                                       \
283bf215546Sopenharmony_ci      static bool first = true;                               \
284bf215546Sopenharmony_ci      if (first) {                                            \
285bf215546Sopenharmony_ci         first = false;                                       \
286bf215546Sopenharmony_ci         pragma_unroll_7                                      \
287bf215546Sopenharmony_ci         for (unsigned r = 0; r < ARRAY_SIZE(t); r++)         \
288bf215546Sopenharmony_ci            assert(t[r][r] == r);                             \
289bf215546Sopenharmony_ci      }                                                       \
290bf215546Sopenharmony_ci   } while (false)
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci#else
293bf215546Sopenharmony_ci#define ASSERT_TABLE_IS_COMMUTATIVE(t)
294bf215546Sopenharmony_ci#define ASSERT_TABLE_IS_DIAGONAL(t)
295bf215546Sopenharmony_ci#endif /* !defined(NDEBUG) */
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_cistatic enum ssa_ranges
298bf215546Sopenharmony_ciunion_ranges(enum ssa_ranges a, enum ssa_ranges b)
299bf215546Sopenharmony_ci{
300bf215546Sopenharmony_ci   static const enum ssa_ranges union_table[last_range + 1][last_range + 1] = {
301bf215546Sopenharmony_ci      /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
302bf215546Sopenharmony_ci      /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
303bf215546Sopenharmony_ci      /* lt_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, le_zero },
304bf215546Sopenharmony_ci      /* le_zero */ { _______, le_zero, le_zero, _______, _______, _______, le_zero },
305bf215546Sopenharmony_ci      /* gt_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, ge_zero },
306bf215546Sopenharmony_ci      /* ge_zero */ { _______, _______, _______, ge_zero, ge_zero, _______, ge_zero },
307bf215546Sopenharmony_ci      /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, _______ },
308bf215546Sopenharmony_ci      /* eq_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
309bf215546Sopenharmony_ci   };
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   ASSERT_TABLE_IS_COMMUTATIVE(union_table);
312bf215546Sopenharmony_ci   ASSERT_TABLE_IS_DIAGONAL(union_table);
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   return union_table[a][b];
315bf215546Sopenharmony_ci}
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci#ifndef NDEBUG
318bf215546Sopenharmony_ci/* Verify that the 'unknown' entry in each row (or column) of the table is the
319bf215546Sopenharmony_ci * union of all the other values in the row (or column).
320bf215546Sopenharmony_ci */
321bf215546Sopenharmony_ci#define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)              \
322bf215546Sopenharmony_ci   do {                                                                 \
323bf215546Sopenharmony_ci      static bool first = true;                                         \
324bf215546Sopenharmony_ci      if (first) {                                                      \
325bf215546Sopenharmony_ci         first = false;                                                 \
326bf215546Sopenharmony_ci         pragma_unroll_7                                                \
327bf215546Sopenharmony_ci         for (unsigned i = 0; i < last_range; i++) {                    \
328bf215546Sopenharmony_ci            enum ssa_ranges col_range = t[i][unknown + 1];              \
329bf215546Sopenharmony_ci            enum ssa_ranges row_range = t[unknown + 1][i];              \
330bf215546Sopenharmony_ci                                                                        \
331bf215546Sopenharmony_ci            pragma_unroll_5                                             \
332bf215546Sopenharmony_ci            for (unsigned j = unknown + 2; j < last_range; j++) {       \
333bf215546Sopenharmony_ci               col_range = union_ranges(col_range, t[i][j]);            \
334bf215546Sopenharmony_ci               row_range = union_ranges(row_range, t[j][i]);            \
335bf215546Sopenharmony_ci            }                                                           \
336bf215546Sopenharmony_ci                                                                        \
337bf215546Sopenharmony_ci            assert(col_range == t[i][unknown]);                         \
338bf215546Sopenharmony_ci            assert(row_range == t[unknown][i]);                         \
339bf215546Sopenharmony_ci         }                                                              \
340bf215546Sopenharmony_ci      }                                                                 \
341bf215546Sopenharmony_ci   } while (false)
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci/* For most operations, the union of ranges for a strict inequality and
344bf215546Sopenharmony_ci * equality should be the range of the non-strict inequality (e.g.,
345bf215546Sopenharmony_ci * union_ranges(range(op(lt_zero), range(op(eq_zero))) == range(op(le_zero)).
346bf215546Sopenharmony_ci *
347bf215546Sopenharmony_ci * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
348bf215546Sopenharmony_ci */
349bf215546Sopenharmony_ci#define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t) \
350bf215546Sopenharmony_ci   do {                                                                 \
351bf215546Sopenharmony_ci      assert(union_ranges(t[lt_zero], t[eq_zero]) == t[le_zero]);       \
352bf215546Sopenharmony_ci      assert(union_ranges(t[gt_zero], t[eq_zero]) == t[ge_zero]);       \
353bf215546Sopenharmony_ci   } while (false)
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci#define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t) \
356bf215546Sopenharmony_ci   do {                                                                 \
357bf215546Sopenharmony_ci      static bool first = true;                                         \
358bf215546Sopenharmony_ci      if (first) {                                                      \
359bf215546Sopenharmony_ci         first = false;                                                 \
360bf215546Sopenharmony_ci         pragma_unroll_7                                                \
361bf215546Sopenharmony_ci         for (unsigned i = 0; i < last_range; i++) {                    \
362bf215546Sopenharmony_ci            assert(union_ranges(t[i][lt_zero], t[i][eq_zero]) == t[i][le_zero]); \
363bf215546Sopenharmony_ci            assert(union_ranges(t[i][gt_zero], t[i][eq_zero]) == t[i][ge_zero]); \
364bf215546Sopenharmony_ci            assert(union_ranges(t[lt_zero][i], t[eq_zero][i]) == t[le_zero][i]); \
365bf215546Sopenharmony_ci            assert(union_ranges(t[gt_zero][i], t[eq_zero][i]) == t[ge_zero][i]); \
366bf215546Sopenharmony_ci         }                                                              \
367bf215546Sopenharmony_ci      }                                                                 \
368bf215546Sopenharmony_ci   } while (false)
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci/* Several other unordered tuples span the range of "everything."  Each should
371bf215546Sopenharmony_ci * have the same value as unknown: (lt_zero, ge_zero), (le_zero, gt_zero), and
372bf215546Sopenharmony_ci * (eq_zero, ne_zero).  union_ranges is already commutative, so only one
373bf215546Sopenharmony_ci * ordering needs to be checked.
374bf215546Sopenharmony_ci *
375bf215546Sopenharmony_ci * Does not apply to selection-like opcodes (bcsel, fmin, fmax, etc.).
376bf215546Sopenharmony_ci *
377bf215546Sopenharmony_ci * In cases where this can be used, it is unnecessary to also use
378bf215546Sopenharmony_ci * ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_*_SOURCE.  For any range X,
379bf215546Sopenharmony_ci * union_ranges(X, X) == X.  The disjoint ranges cover all of the non-unknown
380bf215546Sopenharmony_ci * possibilities, so the union of all the unions of disjoint ranges is
381bf215546Sopenharmony_ci * equivalent to the union of "others."
382bf215546Sopenharmony_ci */
383bf215546Sopenharmony_ci#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)            \
384bf215546Sopenharmony_ci   do {                                                                 \
385bf215546Sopenharmony_ci      assert(union_ranges(t[lt_zero], t[ge_zero]) == t[unknown]);       \
386bf215546Sopenharmony_ci      assert(union_ranges(t[le_zero], t[gt_zero]) == t[unknown]);       \
387bf215546Sopenharmony_ci      assert(union_ranges(t[eq_zero], t[ne_zero]) == t[unknown]);       \
388bf215546Sopenharmony_ci   } while (false)
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)            \
391bf215546Sopenharmony_ci   do {                                                                 \
392bf215546Sopenharmony_ci      static bool first = true;                                         \
393bf215546Sopenharmony_ci      if (first) {                                                      \
394bf215546Sopenharmony_ci         first = false;                                                 \
395bf215546Sopenharmony_ci         pragma_unroll_7                                                \
396bf215546Sopenharmony_ci         for (unsigned i = 0; i < last_range; i++) {                    \
397bf215546Sopenharmony_ci            assert(union_ranges(t[i][lt_zero], t[i][ge_zero]) ==        \
398bf215546Sopenharmony_ci                   t[i][unknown]);                                      \
399bf215546Sopenharmony_ci            assert(union_ranges(t[i][le_zero], t[i][gt_zero]) ==        \
400bf215546Sopenharmony_ci                   t[i][unknown]);                                      \
401bf215546Sopenharmony_ci            assert(union_ranges(t[i][eq_zero], t[i][ne_zero]) ==        \
402bf215546Sopenharmony_ci                   t[i][unknown]);                                      \
403bf215546Sopenharmony_ci                                                                        \
404bf215546Sopenharmony_ci            assert(union_ranges(t[lt_zero][i], t[ge_zero][i]) ==        \
405bf215546Sopenharmony_ci                   t[unknown][i]);                                      \
406bf215546Sopenharmony_ci            assert(union_ranges(t[le_zero][i], t[gt_zero][i]) ==        \
407bf215546Sopenharmony_ci                   t[unknown][i]);                                      \
408bf215546Sopenharmony_ci            assert(union_ranges(t[eq_zero][i], t[ne_zero][i]) ==        \
409bf215546Sopenharmony_ci                   t[unknown][i]);                                      \
410bf215546Sopenharmony_ci         }                                                              \
411bf215546Sopenharmony_ci      }                                                                 \
412bf215546Sopenharmony_ci   } while (false)
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci#else
415bf215546Sopenharmony_ci#define ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(t)
416bf215546Sopenharmony_ci#define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(t)
417bf215546Sopenharmony_ci#define ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(t)
418bf215546Sopenharmony_ci#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(t)
419bf215546Sopenharmony_ci#define ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(t)
420bf215546Sopenharmony_ci#endif /* !defined(NDEBUG) */
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci/**
423bf215546Sopenharmony_ci * Analyze an expression to determine the range of its result
424bf215546Sopenharmony_ci *
425bf215546Sopenharmony_ci * The end result of this analysis is a token that communicates something
426bf215546Sopenharmony_ci * about the range of values.  There's an implicit grammar that produces
427bf215546Sopenharmony_ci * tokens from sequences of literal values, other tokens, and operations.
428bf215546Sopenharmony_ci * This function implements this grammar as a recursive-descent parser.  Some
429bf215546Sopenharmony_ci * (but not all) of the grammar is listed in-line in the function.
430bf215546Sopenharmony_ci */
431bf215546Sopenharmony_cistatic struct ssa_result_range
432bf215546Sopenharmony_cianalyze_expression(const nir_alu_instr *instr, unsigned src,
433bf215546Sopenharmony_ci                   struct hash_table *ht, nir_alu_type use_type)
434bf215546Sopenharmony_ci{
435bf215546Sopenharmony_ci   /* Ensure that the _Pragma("GCC unroll 7") above are correct. */
436bf215546Sopenharmony_ci   STATIC_ASSERT(last_range + 1 == 7);
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci   if (!instr->src[src].src.is_ssa)
439bf215546Sopenharmony_ci      return (struct ssa_result_range){unknown, false, false, false};
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci   if (nir_src_is_const(instr->src[src].src))
442bf215546Sopenharmony_ci      return analyze_constant(instr, src, use_type);
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
445bf215546Sopenharmony_ci      return (struct ssa_result_range){unknown, false, false, false};
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci   const struct nir_alu_instr *const alu =
448bf215546Sopenharmony_ci       nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci   /* Bail if the type of the instruction generating the value does not match
451bf215546Sopenharmony_ci    * the type the value will be interpreted as.  int/uint/bool can be
452bf215546Sopenharmony_ci    * reinterpreted trivially.  The most important cases are between float and
453bf215546Sopenharmony_ci    * non-float.
454bf215546Sopenharmony_ci    */
455bf215546Sopenharmony_ci   if (alu->op != nir_op_mov && alu->op != nir_op_bcsel) {
456bf215546Sopenharmony_ci      const nir_alu_type use_base_type =
457bf215546Sopenharmony_ci         nir_alu_type_get_base_type(use_type);
458bf215546Sopenharmony_ci      const nir_alu_type src_base_type =
459bf215546Sopenharmony_ci         nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type);
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci      if (use_base_type != src_base_type &&
462bf215546Sopenharmony_ci          (use_base_type == nir_type_float ||
463bf215546Sopenharmony_ci           src_base_type == nir_type_float)) {
464bf215546Sopenharmony_ci         return (struct ssa_result_range){unknown, false, false, false};
465bf215546Sopenharmony_ci      }
466bf215546Sopenharmony_ci   }
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci   struct hash_entry *he = _mesa_hash_table_search(ht, pack_key(alu, use_type));
469bf215546Sopenharmony_ci   if (he != NULL)
470bf215546Sopenharmony_ci      return unpack_data(he->data);
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci   struct ssa_result_range r = {unknown, false, false, false};
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci   /* ge_zero: ge_zero + ge_zero
475bf215546Sopenharmony_ci    *
476bf215546Sopenharmony_ci    * gt_zero: gt_zero + eq_zero
477bf215546Sopenharmony_ci    *        | gt_zero + ge_zero
478bf215546Sopenharmony_ci    *        | eq_zero + gt_zero   # Addition is commutative
479bf215546Sopenharmony_ci    *        | ge_zero + gt_zero   # Addition is commutative
480bf215546Sopenharmony_ci    *        | gt_zero + gt_zero
481bf215546Sopenharmony_ci    *        ;
482bf215546Sopenharmony_ci    *
483bf215546Sopenharmony_ci    * le_zero: le_zero + le_zero
484bf215546Sopenharmony_ci    *
485bf215546Sopenharmony_ci    * lt_zero: lt_zero + eq_zero
486bf215546Sopenharmony_ci    *        | lt_zero + le_zero
487bf215546Sopenharmony_ci    *        | eq_zero + lt_zero   # Addition is commutative
488bf215546Sopenharmony_ci    *        | le_zero + lt_zero   # Addition is commutative
489bf215546Sopenharmony_ci    *        | lt_zero + lt_zero
490bf215546Sopenharmony_ci    *        ;
491bf215546Sopenharmony_ci    *
492bf215546Sopenharmony_ci    * ne_zero: eq_zero + ne_zero
493bf215546Sopenharmony_ci    *        | ne_zero + eq_zero   # Addition is commutative
494bf215546Sopenharmony_ci    *        ;
495bf215546Sopenharmony_ci    *
496bf215546Sopenharmony_ci    * eq_zero: eq_zero + eq_zero
497bf215546Sopenharmony_ci    *        ;
498bf215546Sopenharmony_ci    *
499bf215546Sopenharmony_ci    * All other cases are 'unknown'.  The seeming odd entry is (ne_zero,
500bf215546Sopenharmony_ci    * ne_zero), but that could be (-5, +5) which is not ne_zero.
501bf215546Sopenharmony_ci    */
502bf215546Sopenharmony_ci   static const enum ssa_ranges fadd_table[last_range + 1][last_range + 1] = {
503bf215546Sopenharmony_ci      /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
504bf215546Sopenharmony_ci      /* unknown */ { _______, _______, _______, _______, _______, _______, _______ },
505bf215546Sopenharmony_ci      /* lt_zero */ { _______, lt_zero, lt_zero, _______, _______, _______, lt_zero },
506bf215546Sopenharmony_ci      /* le_zero */ { _______, lt_zero, le_zero, _______, _______, _______, le_zero },
507bf215546Sopenharmony_ci      /* gt_zero */ { _______, _______, _______, gt_zero, gt_zero, _______, gt_zero },
508bf215546Sopenharmony_ci      /* ge_zero */ { _______, _______, _______, gt_zero, ge_zero, _______, ge_zero },
509bf215546Sopenharmony_ci      /* ne_zero */ { _______, _______, _______, _______, _______, _______, ne_zero },
510bf215546Sopenharmony_ci      /* eq_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
511bf215546Sopenharmony_ci   };
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci   ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
514bf215546Sopenharmony_ci   ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fadd_table);
515bf215546Sopenharmony_ci   ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fadd_table);
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci   /* Due to flush-to-zero semanatics of floating-point numbers with very
518bf215546Sopenharmony_ci    * small mangnitudes, we can never really be sure a result will be
519bf215546Sopenharmony_ci    * non-zero.
520bf215546Sopenharmony_ci    *
521bf215546Sopenharmony_ci    * ge_zero: ge_zero * ge_zero
522bf215546Sopenharmony_ci    *        | ge_zero * gt_zero
523bf215546Sopenharmony_ci    *        | ge_zero * eq_zero
524bf215546Sopenharmony_ci    *        | le_zero * lt_zero
525bf215546Sopenharmony_ci    *        | lt_zero * le_zero  # Multiplication is commutative
526bf215546Sopenharmony_ci    *        | le_zero * le_zero
527bf215546Sopenharmony_ci    *        | gt_zero * ge_zero  # Multiplication is commutative
528bf215546Sopenharmony_ci    *        | eq_zero * ge_zero  # Multiplication is commutative
529bf215546Sopenharmony_ci    *        | a * a              # Left source == right source
530bf215546Sopenharmony_ci    *        | gt_zero * gt_zero
531bf215546Sopenharmony_ci    *        | lt_zero * lt_zero
532bf215546Sopenharmony_ci    *        ;
533bf215546Sopenharmony_ci    *
534bf215546Sopenharmony_ci    * le_zero: ge_zero * le_zero
535bf215546Sopenharmony_ci    *        | ge_zero * lt_zero
536bf215546Sopenharmony_ci    *        | lt_zero * ge_zero  # Multiplication is commutative
537bf215546Sopenharmony_ci    *        | le_zero * ge_zero  # Multiplication is commutative
538bf215546Sopenharmony_ci    *        | le_zero * gt_zero
539bf215546Sopenharmony_ci    *        | lt_zero * gt_zero
540bf215546Sopenharmony_ci    *        | gt_zero * lt_zero  # Multiplication is commutative
541bf215546Sopenharmony_ci    *        ;
542bf215546Sopenharmony_ci    *
543bf215546Sopenharmony_ci    * eq_zero: eq_zero * <any>
544bf215546Sopenharmony_ci    *          <any> * eq_zero    # Multiplication is commutative
545bf215546Sopenharmony_ci    *
546bf215546Sopenharmony_ci    * All other cases are 'unknown'.
547bf215546Sopenharmony_ci    */
548bf215546Sopenharmony_ci   static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
549bf215546Sopenharmony_ci      /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
550bf215546Sopenharmony_ci      /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
551bf215546Sopenharmony_ci      /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
552bf215546Sopenharmony_ci      /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
553bf215546Sopenharmony_ci      /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
554bf215546Sopenharmony_ci      /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
555bf215546Sopenharmony_ci      /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
556bf215546Sopenharmony_ci      /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
557bf215546Sopenharmony_ci   };
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci   ASSERT_TABLE_IS_COMMUTATIVE(fmul_table);
560bf215546Sopenharmony_ci   ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(fmul_table);
561bf215546Sopenharmony_ci   ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(fmul_table);
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   static const enum ssa_ranges fneg_table[last_range + 1] = {
564bf215546Sopenharmony_ci   /* unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
565bf215546Sopenharmony_ci      _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero
566bf215546Sopenharmony_ci   };
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(fneg_table);
569bf215546Sopenharmony_ci   ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(fneg_table);
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci   switch (alu->op) {
573bf215546Sopenharmony_ci   case nir_op_b2f32:
574bf215546Sopenharmony_ci   case nir_op_b2i32:
575bf215546Sopenharmony_ci      /* b2f32 will generate either 0.0 or 1.0.  This case is trivial.
576bf215546Sopenharmony_ci       *
577bf215546Sopenharmony_ci       * b2i32 will generate either 0x00000000 or 0x00000001.  When those bit
578bf215546Sopenharmony_ci       * patterns are interpreted as floating point, they are 0.0 and
579bf215546Sopenharmony_ci       * 1.401298464324817e-45.  The latter is subnormal, but it is finite and
580bf215546Sopenharmony_ci       * a number.
581bf215546Sopenharmony_ci       */
582bf215546Sopenharmony_ci      r = (struct ssa_result_range){ge_zero, alu->op == nir_op_b2f32, true, true};
583bf215546Sopenharmony_ci      break;
584bf215546Sopenharmony_ci
585bf215546Sopenharmony_ci   case nir_op_bcsel: {
586bf215546Sopenharmony_ci      const struct ssa_result_range left =
587bf215546Sopenharmony_ci         analyze_expression(alu, 1, ht, use_type);
588bf215546Sopenharmony_ci      const struct ssa_result_range right =
589bf215546Sopenharmony_ci         analyze_expression(alu, 2, ht, use_type);
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci      r.is_integral = left.is_integral && right.is_integral;
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci      /* This could be better, but it would require a lot of work.  For
594bf215546Sopenharmony_ci       * example, the result of the following is a number:
595bf215546Sopenharmony_ci       *
596bf215546Sopenharmony_ci       *    bcsel(a > 0.0, a, 38.6)
597bf215546Sopenharmony_ci       *
598bf215546Sopenharmony_ci       * If the result of 'a > 0.0' is true, then the use of 'a' in the true
599bf215546Sopenharmony_ci       * part of the bcsel must be a number.
600bf215546Sopenharmony_ci       *
601bf215546Sopenharmony_ci       * Other cases are even more challenging.
602bf215546Sopenharmony_ci       *
603bf215546Sopenharmony_ci       *    bcsel(a > 0.5, a - 0.5, 0.0)
604bf215546Sopenharmony_ci       */
605bf215546Sopenharmony_ci      r.is_a_number = left.is_a_number && right.is_a_number;
606bf215546Sopenharmony_ci      r.is_finite = left.is_finite && right.is_finite;
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci      r.range = union_ranges(left.range, right.range);
609bf215546Sopenharmony_ci      break;
610bf215546Sopenharmony_ci   }
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci   case nir_op_i2f32:
613bf215546Sopenharmony_ci   case nir_op_u2f32:
614bf215546Sopenharmony_ci      r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci      r.is_integral = true;
617bf215546Sopenharmony_ci      r.is_a_number = true;
618bf215546Sopenharmony_ci      r.is_finite = true;
619bf215546Sopenharmony_ci
620bf215546Sopenharmony_ci      if (r.range == unknown && alu->op == nir_op_u2f32)
621bf215546Sopenharmony_ci         r.range = ge_zero;
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci      break;
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci   case nir_op_fabs:
626bf215546Sopenharmony_ci      r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci      switch (r.range) {
629bf215546Sopenharmony_ci      case unknown:
630bf215546Sopenharmony_ci      case le_zero:
631bf215546Sopenharmony_ci      case ge_zero:
632bf215546Sopenharmony_ci         r.range = ge_zero;
633bf215546Sopenharmony_ci         break;
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_ci      case lt_zero:
636bf215546Sopenharmony_ci      case gt_zero:
637bf215546Sopenharmony_ci      case ne_zero:
638bf215546Sopenharmony_ci         r.range = gt_zero;
639bf215546Sopenharmony_ci         break;
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci      case eq_zero:
642bf215546Sopenharmony_ci         break;
643bf215546Sopenharmony_ci      }
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci      break;
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci   case nir_op_fadd: {
648bf215546Sopenharmony_ci      const struct ssa_result_range left =
649bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
650bf215546Sopenharmony_ci      const struct ssa_result_range right =
651bf215546Sopenharmony_ci         analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci      r.is_integral = left.is_integral && right.is_integral;
654bf215546Sopenharmony_ci      r.range = fadd_table[left.range][right.range];
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci      /* X + Y is NaN if either operand is NaN or if one operand is +Inf and
657bf215546Sopenharmony_ci       * the other is -Inf.  If neither operand is NaN and at least one of the
658bf215546Sopenharmony_ci       * operands is finite, then the result cannot be NaN.
659bf215546Sopenharmony_ci       */
660bf215546Sopenharmony_ci      r.is_a_number = left.is_a_number && right.is_a_number &&
661bf215546Sopenharmony_ci         (left.is_finite || right.is_finite);
662bf215546Sopenharmony_ci      break;
663bf215546Sopenharmony_ci   }
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   case nir_op_fexp2: {
666bf215546Sopenharmony_ci      /* If the parameter might be less than zero, the mathematically result
667bf215546Sopenharmony_ci       * will be on (0, 1).  For sufficiently large magnitude negative
668bf215546Sopenharmony_ci       * parameters, the result will flush to zero.
669bf215546Sopenharmony_ci       */
670bf215546Sopenharmony_ci      static const enum ssa_ranges table[last_range + 1] = {
671bf215546Sopenharmony_ci      /* unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
672bf215546Sopenharmony_ci         ge_zero, ge_zero, ge_zero, gt_zero, gt_zero, ge_zero, gt_zero
673bf215546Sopenharmony_ci      };
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_ci      r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci      ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_1_SOURCE(table);
678bf215546Sopenharmony_ci      ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_1_SOURCE(table);
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci      r.is_integral = r.is_integral && is_not_negative(r.range);
681bf215546Sopenharmony_ci      r.range = table[r.range];
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci      /* Various cases can result in NaN, so assume the worst. */
684bf215546Sopenharmony_ci      r.is_finite = false;
685bf215546Sopenharmony_ci      r.is_a_number = false;
686bf215546Sopenharmony_ci      break;
687bf215546Sopenharmony_ci   }
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_ci   case nir_op_fmax: {
690bf215546Sopenharmony_ci      const struct ssa_result_range left =
691bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
692bf215546Sopenharmony_ci      const struct ssa_result_range right =
693bf215546Sopenharmony_ci         analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
694bf215546Sopenharmony_ci
695bf215546Sopenharmony_ci      r.is_integral = left.is_integral && right.is_integral;
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_ci      /* This is conservative.  It may be possible to determine that the
698bf215546Sopenharmony_ci       * result must be finite in more cases, but it would take some effort to
699bf215546Sopenharmony_ci       * work out all the corners.  For example, fmax({lt_zero, finite},
700bf215546Sopenharmony_ci       * {lt_zero}) should result in {lt_zero, finite}.
701bf215546Sopenharmony_ci       */
702bf215546Sopenharmony_ci      r.is_finite = left.is_finite && right.is_finite;
703bf215546Sopenharmony_ci
704bf215546Sopenharmony_ci      /* If one source is NaN, fmax always picks the other source. */
705bf215546Sopenharmony_ci      r.is_a_number = left.is_a_number || right.is_a_number;
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci      /* gt_zero: fmax(gt_zero, *)
708bf215546Sopenharmony_ci       *        | fmax(*, gt_zero)        # Treat fmax as commutative
709bf215546Sopenharmony_ci       *        ;
710bf215546Sopenharmony_ci       *
711bf215546Sopenharmony_ci       * ge_zero: fmax(ge_zero, ne_zero)
712bf215546Sopenharmony_ci       *        | fmax(ge_zero, lt_zero)
713bf215546Sopenharmony_ci       *        | fmax(ge_zero, le_zero)
714bf215546Sopenharmony_ci       *        | fmax(ge_zero, eq_zero)
715bf215546Sopenharmony_ci       *        | fmax(ne_zero, ge_zero)  # Treat fmax as commutative
716bf215546Sopenharmony_ci       *        | fmax(lt_zero, ge_zero)  # Treat fmax as commutative
717bf215546Sopenharmony_ci       *        | fmax(le_zero, ge_zero)  # Treat fmax as commutative
718bf215546Sopenharmony_ci       *        | fmax(eq_zero, ge_zero)  # Treat fmax as commutative
719bf215546Sopenharmony_ci       *        | fmax(ge_zero, ge_zero)
720bf215546Sopenharmony_ci       *        ;
721bf215546Sopenharmony_ci       *
722bf215546Sopenharmony_ci       * le_zero: fmax(le_zero, lt_zero)
723bf215546Sopenharmony_ci       *        | fmax(lt_zero, le_zero)  # Treat fmax as commutative
724bf215546Sopenharmony_ci       *        | fmax(le_zero, le_zero)
725bf215546Sopenharmony_ci       *        ;
726bf215546Sopenharmony_ci       *
727bf215546Sopenharmony_ci       * lt_zero: fmax(lt_zero, lt_zero)
728bf215546Sopenharmony_ci       *        ;
729bf215546Sopenharmony_ci       *
730bf215546Sopenharmony_ci       * ne_zero: fmax(ne_zero, lt_zero)
731bf215546Sopenharmony_ci       *        | fmax(lt_zero, ne_zero)  # Treat fmax as commutative
732bf215546Sopenharmony_ci       *        | fmax(ne_zero, ne_zero)
733bf215546Sopenharmony_ci       *        ;
734bf215546Sopenharmony_ci       *
735bf215546Sopenharmony_ci       * eq_zero: fmax(eq_zero, le_zero)
736bf215546Sopenharmony_ci       *        | fmax(eq_zero, lt_zero)
737bf215546Sopenharmony_ci       *        | fmax(le_zero, eq_zero)  # Treat fmax as commutative
738bf215546Sopenharmony_ci       *        | fmax(lt_zero, eq_zero)  # Treat fmax as commutative
739bf215546Sopenharmony_ci       *        | fmax(eq_zero, eq_zero)
740bf215546Sopenharmony_ci       *        ;
741bf215546Sopenharmony_ci       *
742bf215546Sopenharmony_ci       * All other cases are 'unknown'.
743bf215546Sopenharmony_ci       */
744bf215546Sopenharmony_ci      static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
745bf215546Sopenharmony_ci         /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
746bf215546Sopenharmony_ci         /* unknown */ { _______, _______, _______, gt_zero, ge_zero, _______, _______ },
747bf215546Sopenharmony_ci         /* lt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
748bf215546Sopenharmony_ci         /* le_zero */ { _______, le_zero, le_zero, gt_zero, ge_zero, _______, eq_zero },
749bf215546Sopenharmony_ci         /* gt_zero */ { gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero, gt_zero },
750bf215546Sopenharmony_ci         /* ge_zero */ { ge_zero, ge_zero, ge_zero, gt_zero, ge_zero, ge_zero, ge_zero },
751bf215546Sopenharmony_ci         /* ne_zero */ { _______, ne_zero, _______, gt_zero, ge_zero, ne_zero, _______ },
752bf215546Sopenharmony_ci         /* eq_zero */ { _______, eq_zero, eq_zero, gt_zero, ge_zero, _______, eq_zero }
753bf215546Sopenharmony_ci      };
754bf215546Sopenharmony_ci
755bf215546Sopenharmony_ci      /* Treat fmax as commutative. */
756bf215546Sopenharmony_ci      ASSERT_TABLE_IS_COMMUTATIVE(table);
757bf215546Sopenharmony_ci      ASSERT_TABLE_IS_DIAGONAL(table);
758bf215546Sopenharmony_ci      ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ci      r.range = table[left.range][right.range];
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_ci      /* Recall that when either value is NaN, fmax will pick the other value.
763bf215546Sopenharmony_ci       * This means the result range of the fmax will either be the "ideal"
764bf215546Sopenharmony_ci       * result range (calculated above) or the range of the non-NaN value.
765bf215546Sopenharmony_ci       */
766bf215546Sopenharmony_ci      if (!left.is_a_number)
767bf215546Sopenharmony_ci         r.range = union_ranges(r.range, right.range);
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci      if (!right.is_a_number)
770bf215546Sopenharmony_ci         r.range = union_ranges(r.range, left.range);
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci      break;
773bf215546Sopenharmony_ci   }
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci   case nir_op_fmin: {
776bf215546Sopenharmony_ci      const struct ssa_result_range left =
777bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
778bf215546Sopenharmony_ci      const struct ssa_result_range right =
779bf215546Sopenharmony_ci         analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
780bf215546Sopenharmony_ci
781bf215546Sopenharmony_ci      r.is_integral = left.is_integral && right.is_integral;
782bf215546Sopenharmony_ci
783bf215546Sopenharmony_ci      /* This is conservative.  It may be possible to determine that the
784bf215546Sopenharmony_ci       * result must be finite in more cases, but it would take some effort to
785bf215546Sopenharmony_ci       * work out all the corners.  For example, fmin({gt_zero, finite},
786bf215546Sopenharmony_ci       * {gt_zero}) should result in {gt_zero, finite}.
787bf215546Sopenharmony_ci       */
788bf215546Sopenharmony_ci      r.is_finite = left.is_finite && right.is_finite;
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_ci      /* If one source is NaN, fmin always picks the other source. */
791bf215546Sopenharmony_ci      r.is_a_number = left.is_a_number || right.is_a_number;
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci      /* lt_zero: fmin(lt_zero, *)
794bf215546Sopenharmony_ci       *        | fmin(*, lt_zero)        # Treat fmin as commutative
795bf215546Sopenharmony_ci       *        ;
796bf215546Sopenharmony_ci       *
797bf215546Sopenharmony_ci       * le_zero: fmin(le_zero, ne_zero)
798bf215546Sopenharmony_ci       *        | fmin(le_zero, gt_zero)
799bf215546Sopenharmony_ci       *        | fmin(le_zero, ge_zero)
800bf215546Sopenharmony_ci       *        | fmin(le_zero, eq_zero)
801bf215546Sopenharmony_ci       *        | fmin(ne_zero, le_zero)  # Treat fmin as commutative
802bf215546Sopenharmony_ci       *        | fmin(gt_zero, le_zero)  # Treat fmin as commutative
803bf215546Sopenharmony_ci       *        | fmin(ge_zero, le_zero)  # Treat fmin as commutative
804bf215546Sopenharmony_ci       *        | fmin(eq_zero, le_zero)  # Treat fmin as commutative
805bf215546Sopenharmony_ci       *        | fmin(le_zero, le_zero)
806bf215546Sopenharmony_ci       *        ;
807bf215546Sopenharmony_ci       *
808bf215546Sopenharmony_ci       * ge_zero: fmin(ge_zero, gt_zero)
809bf215546Sopenharmony_ci       *        | fmin(gt_zero, ge_zero)  # Treat fmin as commutative
810bf215546Sopenharmony_ci       *        | fmin(ge_zero, ge_zero)
811bf215546Sopenharmony_ci       *        ;
812bf215546Sopenharmony_ci       *
813bf215546Sopenharmony_ci       * gt_zero: fmin(gt_zero, gt_zero)
814bf215546Sopenharmony_ci       *        ;
815bf215546Sopenharmony_ci       *
816bf215546Sopenharmony_ci       * ne_zero: fmin(ne_zero, gt_zero)
817bf215546Sopenharmony_ci       *        | fmin(gt_zero, ne_zero)  # Treat fmin as commutative
818bf215546Sopenharmony_ci       *        | fmin(ne_zero, ne_zero)
819bf215546Sopenharmony_ci       *        ;
820bf215546Sopenharmony_ci       *
821bf215546Sopenharmony_ci       * eq_zero: fmin(eq_zero, ge_zero)
822bf215546Sopenharmony_ci       *        | fmin(eq_zero, gt_zero)
823bf215546Sopenharmony_ci       *        | fmin(ge_zero, eq_zero)  # Treat fmin as commutative
824bf215546Sopenharmony_ci       *        | fmin(gt_zero, eq_zero)  # Treat fmin as commutative
825bf215546Sopenharmony_ci       *        | fmin(eq_zero, eq_zero)
826bf215546Sopenharmony_ci       *        ;
827bf215546Sopenharmony_ci       *
828bf215546Sopenharmony_ci       * All other cases are 'unknown'.
829bf215546Sopenharmony_ci       */
830bf215546Sopenharmony_ci      static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
831bf215546Sopenharmony_ci         /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
832bf215546Sopenharmony_ci         /* unknown */ { _______, lt_zero, le_zero, _______, _______, _______, _______ },
833bf215546Sopenharmony_ci         /* lt_zero */ { lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero, lt_zero },
834bf215546Sopenharmony_ci         /* le_zero */ { le_zero, lt_zero, le_zero, le_zero, le_zero, le_zero, le_zero },
835bf215546Sopenharmony_ci         /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
836bf215546Sopenharmony_ci         /* ge_zero */ { _______, lt_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
837bf215546Sopenharmony_ci         /* ne_zero */ { _______, lt_zero, le_zero, ne_zero, _______, ne_zero, _______ },
838bf215546Sopenharmony_ci         /* eq_zero */ { _______, lt_zero, le_zero, eq_zero, eq_zero, _______, eq_zero }
839bf215546Sopenharmony_ci      };
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci      /* Treat fmin as commutative. */
842bf215546Sopenharmony_ci      ASSERT_TABLE_IS_COMMUTATIVE(table);
843bf215546Sopenharmony_ci      ASSERT_TABLE_IS_DIAGONAL(table);
844bf215546Sopenharmony_ci      ASSERT_UNION_OF_OTHERS_MATCHES_UNKNOWN_2_SOURCE(table);
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_ci      r.range = table[left.range][right.range];
847bf215546Sopenharmony_ci
848bf215546Sopenharmony_ci      /* Recall that when either value is NaN, fmin will pick the other value.
849bf215546Sopenharmony_ci       * This means the result range of the fmin will either be the "ideal"
850bf215546Sopenharmony_ci       * result range (calculated above) or the range of the non-NaN value.
851bf215546Sopenharmony_ci       */
852bf215546Sopenharmony_ci      if (!left.is_a_number)
853bf215546Sopenharmony_ci         r.range = union_ranges(r.range, right.range);
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci      if (!right.is_a_number)
856bf215546Sopenharmony_ci         r.range = union_ranges(r.range, left.range);
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci      break;
859bf215546Sopenharmony_ci   }
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_ci   case nir_op_fmul:
862bf215546Sopenharmony_ci   case nir_op_fmulz: {
863bf215546Sopenharmony_ci      const struct ssa_result_range left =
864bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
865bf215546Sopenharmony_ci      const struct ssa_result_range right =
866bf215546Sopenharmony_ci         analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
867bf215546Sopenharmony_ci
868bf215546Sopenharmony_ci      r.is_integral = left.is_integral && right.is_integral;
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_ci      /* x * x => ge_zero */
871bf215546Sopenharmony_ci      if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
872bf215546Sopenharmony_ci         /* Even if x > 0, the result of x*x can be zero when x is, for
873bf215546Sopenharmony_ci          * example, a subnormal number.
874bf215546Sopenharmony_ci          */
875bf215546Sopenharmony_ci         r.range = ge_zero;
876bf215546Sopenharmony_ci      } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
877bf215546Sopenharmony_ci         /* -x * x => le_zero. */
878bf215546Sopenharmony_ci         r.range = le_zero;
879bf215546Sopenharmony_ci      } else
880bf215546Sopenharmony_ci         r.range = fmul_table[left.range][right.range];
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci      if (alu->op == nir_op_fmul) {
883bf215546Sopenharmony_ci         /* Mulitpliation produces NaN for X * NaN and for 0 * ±Inf.  If both
884bf215546Sopenharmony_ci          * operands are numbers and either both are finite or one is finite and
885bf215546Sopenharmony_ci          * the other cannot be zero, then the result must be a number.
886bf215546Sopenharmony_ci          */
887bf215546Sopenharmony_ci         r.is_a_number = (left.is_a_number && right.is_a_number) &&
888bf215546Sopenharmony_ci            ((left.is_finite && right.is_finite) ||
889bf215546Sopenharmony_ci             (!is_not_zero(left.range) && right.is_finite) ||
890bf215546Sopenharmony_ci             (left.is_finite && !is_not_zero(right.range)));
891bf215546Sopenharmony_ci      } else {
892bf215546Sopenharmony_ci         /* nir_op_fmulz: unlike nir_op_fmul, 0 * ±Inf is a number. */
893bf215546Sopenharmony_ci         r.is_a_number = left.is_a_number && right.is_a_number;
894bf215546Sopenharmony_ci      }
895bf215546Sopenharmony_ci
896bf215546Sopenharmony_ci      break;
897bf215546Sopenharmony_ci   }
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci   case nir_op_frcp:
900bf215546Sopenharmony_ci      r = (struct ssa_result_range){
901bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range,
902bf215546Sopenharmony_ci         false,
903bf215546Sopenharmony_ci         false, /* Various cases can result in NaN, so assume the worst. */
904bf215546Sopenharmony_ci         false  /*    "      "    "     "    "  "    "    "    "    "    */
905bf215546Sopenharmony_ci      };
906bf215546Sopenharmony_ci      break;
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci   case nir_op_mov:
909bf215546Sopenharmony_ci      r = analyze_expression(alu, 0, ht, use_type);
910bf215546Sopenharmony_ci      break;
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci   case nir_op_fneg:
913bf215546Sopenharmony_ci      r = analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
914bf215546Sopenharmony_ci
915bf215546Sopenharmony_ci      r.range = fneg_table[r.range];
916bf215546Sopenharmony_ci      break;
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci   case nir_op_fsat: {
919bf215546Sopenharmony_ci      const struct ssa_result_range left =
920bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci      /* fsat(NaN) = 0. */
923bf215546Sopenharmony_ci      r.is_a_number = true;
924bf215546Sopenharmony_ci      r.is_finite = true;
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_ci      switch (left.range) {
927bf215546Sopenharmony_ci      case le_zero:
928bf215546Sopenharmony_ci      case lt_zero:
929bf215546Sopenharmony_ci      case eq_zero:
930bf215546Sopenharmony_ci         r.range = eq_zero;
931bf215546Sopenharmony_ci         r.is_integral = true;
932bf215546Sopenharmony_ci         break;
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci      case gt_zero:
935bf215546Sopenharmony_ci         /* fsat is equivalent to fmin(fmax(X, 0.0), 1.0), so if X is not a
936bf215546Sopenharmony_ci          * number, the result will be 0.
937bf215546Sopenharmony_ci          */
938bf215546Sopenharmony_ci         r.range = left.is_a_number ? gt_zero : ge_zero;
939bf215546Sopenharmony_ci         r.is_integral = left.is_integral;
940bf215546Sopenharmony_ci         break;
941bf215546Sopenharmony_ci
942bf215546Sopenharmony_ci      case ge_zero:
943bf215546Sopenharmony_ci      case ne_zero:
944bf215546Sopenharmony_ci      case unknown:
945bf215546Sopenharmony_ci         /* Since the result must be in [0, 1], the value must be >= 0. */
946bf215546Sopenharmony_ci         r.range = ge_zero;
947bf215546Sopenharmony_ci         r.is_integral = left.is_integral;
948bf215546Sopenharmony_ci         break;
949bf215546Sopenharmony_ci      }
950bf215546Sopenharmony_ci      break;
951bf215546Sopenharmony_ci   }
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci   case nir_op_fsign:
954bf215546Sopenharmony_ci      r = (struct ssa_result_range){
955bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0)).range,
956bf215546Sopenharmony_ci         true,
957bf215546Sopenharmony_ci         true, /* fsign is -1, 0, or 1, even for NaN, so it must be a number. */
958bf215546Sopenharmony_ci         true  /* fsign is -1, 0, or 1, even for NaN, so it must be finite. */
959bf215546Sopenharmony_ci      };
960bf215546Sopenharmony_ci      break;
961bf215546Sopenharmony_ci
962bf215546Sopenharmony_ci   case nir_op_fsqrt:
963bf215546Sopenharmony_ci   case nir_op_frsq:
964bf215546Sopenharmony_ci      r = (struct ssa_result_range){ge_zero, false, false, false};
965bf215546Sopenharmony_ci      break;
966bf215546Sopenharmony_ci
967bf215546Sopenharmony_ci   case nir_op_ffloor: {
968bf215546Sopenharmony_ci      const struct ssa_result_range left =
969bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_ci      r.is_integral = true;
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_ci      /* In IEEE 754, floor(NaN) is NaN, and floor(±Inf) is ±Inf. See
974bf215546Sopenharmony_ci       * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/floor.html
975bf215546Sopenharmony_ci       */
976bf215546Sopenharmony_ci      r.is_a_number = left.is_a_number;
977bf215546Sopenharmony_ci      r.is_finite = left.is_finite;
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci      if (left.is_integral || left.range == le_zero || left.range == lt_zero)
980bf215546Sopenharmony_ci         r.range = left.range;
981bf215546Sopenharmony_ci      else if (left.range == ge_zero || left.range == gt_zero)
982bf215546Sopenharmony_ci         r.range = ge_zero;
983bf215546Sopenharmony_ci      else if (left.range == ne_zero)
984bf215546Sopenharmony_ci         r.range = unknown;
985bf215546Sopenharmony_ci
986bf215546Sopenharmony_ci      break;
987bf215546Sopenharmony_ci   }
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci   case nir_op_fceil: {
990bf215546Sopenharmony_ci      const struct ssa_result_range left =
991bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
992bf215546Sopenharmony_ci
993bf215546Sopenharmony_ci      r.is_integral = true;
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci      /* In IEEE 754, ceil(NaN) is NaN, and ceil(±Inf) is ±Inf. See
996bf215546Sopenharmony_ci       * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/ceil.html
997bf215546Sopenharmony_ci       */
998bf215546Sopenharmony_ci      r.is_a_number = left.is_a_number;
999bf215546Sopenharmony_ci      r.is_finite = left.is_finite;
1000bf215546Sopenharmony_ci
1001bf215546Sopenharmony_ci      if (left.is_integral || left.range == ge_zero || left.range == gt_zero)
1002bf215546Sopenharmony_ci         r.range = left.range;
1003bf215546Sopenharmony_ci      else if (left.range == le_zero || left.range == lt_zero)
1004bf215546Sopenharmony_ci         r.range = le_zero;
1005bf215546Sopenharmony_ci      else if (left.range == ne_zero)
1006bf215546Sopenharmony_ci         r.range = unknown;
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_ci      break;
1009bf215546Sopenharmony_ci   }
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_ci   case nir_op_ftrunc: {
1012bf215546Sopenharmony_ci      const struct ssa_result_range left =
1013bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
1014bf215546Sopenharmony_ci
1015bf215546Sopenharmony_ci      r.is_integral = true;
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_ci      /* In IEEE 754, trunc(NaN) is NaN, and trunc(±Inf) is ±Inf.  See
1018bf215546Sopenharmony_ci       * https://pubs.opengroup.org/onlinepubs/9699919799.2016edition/functions/trunc.html
1019bf215546Sopenharmony_ci       */
1020bf215546Sopenharmony_ci      r.is_a_number = left.is_a_number;
1021bf215546Sopenharmony_ci      r.is_finite = left.is_finite;
1022bf215546Sopenharmony_ci
1023bf215546Sopenharmony_ci      if (left.is_integral)
1024bf215546Sopenharmony_ci         r.range = left.range;
1025bf215546Sopenharmony_ci      else if (left.range == ge_zero || left.range == gt_zero)
1026bf215546Sopenharmony_ci         r.range = ge_zero;
1027bf215546Sopenharmony_ci      else if (left.range == le_zero || left.range == lt_zero)
1028bf215546Sopenharmony_ci         r.range = le_zero;
1029bf215546Sopenharmony_ci      else if (left.range == ne_zero)
1030bf215546Sopenharmony_ci         r.range = unknown;
1031bf215546Sopenharmony_ci
1032bf215546Sopenharmony_ci      break;
1033bf215546Sopenharmony_ci   }
1034bf215546Sopenharmony_ci
1035bf215546Sopenharmony_ci   case nir_op_flt:
1036bf215546Sopenharmony_ci   case nir_op_fge:
1037bf215546Sopenharmony_ci   case nir_op_feq:
1038bf215546Sopenharmony_ci   case nir_op_fneu:
1039bf215546Sopenharmony_ci   case nir_op_ilt:
1040bf215546Sopenharmony_ci   case nir_op_ige:
1041bf215546Sopenharmony_ci   case nir_op_ieq:
1042bf215546Sopenharmony_ci   case nir_op_ine:
1043bf215546Sopenharmony_ci   case nir_op_ult:
1044bf215546Sopenharmony_ci   case nir_op_uge:
1045bf215546Sopenharmony_ci      /* Boolean results are 0 or -1. */
1046bf215546Sopenharmony_ci      r = (struct ssa_result_range){le_zero, false, true, false};
1047bf215546Sopenharmony_ci      break;
1048bf215546Sopenharmony_ci
1049bf215546Sopenharmony_ci   case nir_op_fdot2:
1050bf215546Sopenharmony_ci   case nir_op_fdot3:
1051bf215546Sopenharmony_ci   case nir_op_fdot4:
1052bf215546Sopenharmony_ci   case nir_op_fdot8:
1053bf215546Sopenharmony_ci   case nir_op_fdot16:
1054bf215546Sopenharmony_ci   case nir_op_fdot2_replicated:
1055bf215546Sopenharmony_ci   case nir_op_fdot3_replicated:
1056bf215546Sopenharmony_ci   case nir_op_fdot4_replicated:
1057bf215546Sopenharmony_ci   case nir_op_fdot8_replicated:
1058bf215546Sopenharmony_ci   case nir_op_fdot16_replicated: {
1059bf215546Sopenharmony_ci      const struct ssa_result_range left =
1060bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_ci      /* If the two sources are the same SSA value, then the result is either
1063bf215546Sopenharmony_ci       * NaN or some number >= 0.  If one source is the negation of the other,
1064bf215546Sopenharmony_ci       * the result is either NaN or some number <= 0.
1065bf215546Sopenharmony_ci       *
1066bf215546Sopenharmony_ci       * In either of these two cases, if one source is a number, then the
1067bf215546Sopenharmony_ci       * other must also be a number.  Since it should not be possible to get
1068bf215546Sopenharmony_ci       * Inf-Inf in the dot-product, the result must also be a number.
1069bf215546Sopenharmony_ci       */
1070bf215546Sopenharmony_ci      if (nir_alu_srcs_equal(alu, alu, 0, 1)) {
1071bf215546Sopenharmony_ci         r = (struct ssa_result_range){ge_zero, false, left.is_a_number, false };
1072bf215546Sopenharmony_ci      } else if (nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
1073bf215546Sopenharmony_ci         r = (struct ssa_result_range){le_zero, false, left.is_a_number, false };
1074bf215546Sopenharmony_ci      } else {
1075bf215546Sopenharmony_ci         r = (struct ssa_result_range){unknown, false, false, false};
1076bf215546Sopenharmony_ci      }
1077bf215546Sopenharmony_ci      break;
1078bf215546Sopenharmony_ci   }
1079bf215546Sopenharmony_ci
1080bf215546Sopenharmony_ci   case nir_op_fpow: {
1081bf215546Sopenharmony_ci      /* Due to flush-to-zero semanatics of floating-point numbers with very
1082bf215546Sopenharmony_ci       * small mangnitudes, we can never really be sure a result will be
1083bf215546Sopenharmony_ci       * non-zero.
1084bf215546Sopenharmony_ci       *
1085bf215546Sopenharmony_ci       * NIR uses pow() and powf() to constant evaluate nir_op_fpow.  The man
1086bf215546Sopenharmony_ci       * page for that function says:
1087bf215546Sopenharmony_ci       *
1088bf215546Sopenharmony_ci       *    If y is 0, the result is 1.0 (even if x is a NaN).
1089bf215546Sopenharmony_ci       *
1090bf215546Sopenharmony_ci       * gt_zero: pow(*, eq_zero)
1091bf215546Sopenharmony_ci       *        | pow(eq_zero, lt_zero)   # 0^-y = +inf
1092bf215546Sopenharmony_ci       *        | pow(eq_zero, le_zero)   # 0^-y = +inf or 0^0 = 1.0
1093bf215546Sopenharmony_ci       *        ;
1094bf215546Sopenharmony_ci       *
1095bf215546Sopenharmony_ci       * eq_zero: pow(eq_zero, gt_zero)
1096bf215546Sopenharmony_ci       *        ;
1097bf215546Sopenharmony_ci       *
1098bf215546Sopenharmony_ci       * ge_zero: pow(gt_zero, gt_zero)
1099bf215546Sopenharmony_ci       *        | pow(gt_zero, ge_zero)
1100bf215546Sopenharmony_ci       *        | pow(gt_zero, lt_zero)
1101bf215546Sopenharmony_ci       *        | pow(gt_zero, le_zero)
1102bf215546Sopenharmony_ci       *        | pow(gt_zero, ne_zero)
1103bf215546Sopenharmony_ci       *        | pow(gt_zero, unknown)
1104bf215546Sopenharmony_ci       *        | pow(ge_zero, gt_zero)
1105bf215546Sopenharmony_ci       *        | pow(ge_zero, ge_zero)
1106bf215546Sopenharmony_ci       *        | pow(ge_zero, lt_zero)
1107bf215546Sopenharmony_ci       *        | pow(ge_zero, le_zero)
1108bf215546Sopenharmony_ci       *        | pow(ge_zero, ne_zero)
1109bf215546Sopenharmony_ci       *        | pow(ge_zero, unknown)
1110bf215546Sopenharmony_ci       *        | pow(eq_zero, ge_zero)  # 0^0 = 1.0 or 0^+y = 0.0
1111bf215546Sopenharmony_ci       *        | pow(eq_zero, ne_zero)  # 0^-y = +inf or 0^+y = 0.0
1112bf215546Sopenharmony_ci       *        | pow(eq_zero, unknown)  # union of all other y cases
1113bf215546Sopenharmony_ci       *        ;
1114bf215546Sopenharmony_ci       *
1115bf215546Sopenharmony_ci       * All other cases are unknown.
1116bf215546Sopenharmony_ci       *
1117bf215546Sopenharmony_ci       * We could do better if the right operand is a constant, integral
1118bf215546Sopenharmony_ci       * value.
1119bf215546Sopenharmony_ci       */
1120bf215546Sopenharmony_ci      static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
1121bf215546Sopenharmony_ci         /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
1122bf215546Sopenharmony_ci         /* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero },
1123bf215546Sopenharmony_ci         /* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
1124bf215546Sopenharmony_ci         /* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
1125bf215546Sopenharmony_ci         /* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
1126bf215546Sopenharmony_ci         /* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
1127bf215546Sopenharmony_ci         /* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
1128bf215546Sopenharmony_ci         /* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero },
1129bf215546Sopenharmony_ci      };
1130bf215546Sopenharmony_ci
1131bf215546Sopenharmony_ci      const struct ssa_result_range left =
1132bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
1133bf215546Sopenharmony_ci      const struct ssa_result_range right =
1134bf215546Sopenharmony_ci         analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
1135bf215546Sopenharmony_ci
1136bf215546Sopenharmony_ci      ASSERT_UNION_OF_DISJOINT_MATCHES_UNKNOWN_2_SOURCE(table);
1137bf215546Sopenharmony_ci      ASSERT_UNION_OF_EQ_AND_STRICT_INEQ_MATCHES_NONSTRICT_2_SOURCE(table);
1138bf215546Sopenharmony_ci
1139bf215546Sopenharmony_ci      r.is_integral = left.is_integral && right.is_integral &&
1140bf215546Sopenharmony_ci                      is_not_negative(right.range);
1141bf215546Sopenharmony_ci      r.range = table[left.range][right.range];
1142bf215546Sopenharmony_ci
1143bf215546Sopenharmony_ci      /* Various cases can result in NaN, so assume the worst. */
1144bf215546Sopenharmony_ci      r.is_a_number = false;
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci      break;
1147bf215546Sopenharmony_ci   }
1148bf215546Sopenharmony_ci
1149bf215546Sopenharmony_ci   case nir_op_ffma: {
1150bf215546Sopenharmony_ci      const struct ssa_result_range first =
1151bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
1152bf215546Sopenharmony_ci      const struct ssa_result_range second =
1153bf215546Sopenharmony_ci         analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
1154bf215546Sopenharmony_ci      const struct ssa_result_range third =
1155bf215546Sopenharmony_ci         analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
1156bf215546Sopenharmony_ci
1157bf215546Sopenharmony_ci      r.is_integral = first.is_integral && second.is_integral &&
1158bf215546Sopenharmony_ci                      third.is_integral;
1159bf215546Sopenharmony_ci
1160bf215546Sopenharmony_ci      /* Various cases can result in NaN, so assume the worst. */
1161bf215546Sopenharmony_ci      r.is_a_number = false;
1162bf215546Sopenharmony_ci
1163bf215546Sopenharmony_ci      enum ssa_ranges fmul_range;
1164bf215546Sopenharmony_ci
1165bf215546Sopenharmony_ci      if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
1166bf215546Sopenharmony_ci         /* See handling of nir_op_fmul for explanation of why ge_zero is the
1167bf215546Sopenharmony_ci          * range.
1168bf215546Sopenharmony_ci          */
1169bf215546Sopenharmony_ci         fmul_range = ge_zero;
1170bf215546Sopenharmony_ci      } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
1171bf215546Sopenharmony_ci         /* -x * x => le_zero */
1172bf215546Sopenharmony_ci         fmul_range = le_zero;
1173bf215546Sopenharmony_ci      } else
1174bf215546Sopenharmony_ci         fmul_range = fmul_table[first.range][second.range];
1175bf215546Sopenharmony_ci
1176bf215546Sopenharmony_ci      r.range = fadd_table[fmul_range][third.range];
1177bf215546Sopenharmony_ci      break;
1178bf215546Sopenharmony_ci   }
1179bf215546Sopenharmony_ci
1180bf215546Sopenharmony_ci   case nir_op_flrp: {
1181bf215546Sopenharmony_ci      const struct ssa_result_range first =
1182bf215546Sopenharmony_ci         analyze_expression(alu, 0, ht, nir_alu_src_type(alu, 0));
1183bf215546Sopenharmony_ci      const struct ssa_result_range second =
1184bf215546Sopenharmony_ci         analyze_expression(alu, 1, ht, nir_alu_src_type(alu, 1));
1185bf215546Sopenharmony_ci      const struct ssa_result_range third =
1186bf215546Sopenharmony_ci         analyze_expression(alu, 2, ht, nir_alu_src_type(alu, 2));
1187bf215546Sopenharmony_ci
1188bf215546Sopenharmony_ci      r.is_integral = first.is_integral && second.is_integral &&
1189bf215546Sopenharmony_ci                      third.is_integral;
1190bf215546Sopenharmony_ci
1191bf215546Sopenharmony_ci      /* Various cases can result in NaN, so assume the worst. */
1192bf215546Sopenharmony_ci      r.is_a_number = false;
1193bf215546Sopenharmony_ci
1194bf215546Sopenharmony_ci      /* Decompose the flrp to first + third * (second + -first) */
1195bf215546Sopenharmony_ci      const enum ssa_ranges inner_fadd_range =
1196bf215546Sopenharmony_ci         fadd_table[second.range][fneg_table[first.range]];
1197bf215546Sopenharmony_ci
1198bf215546Sopenharmony_ci      const enum ssa_ranges fmul_range =
1199bf215546Sopenharmony_ci         fmul_table[third.range][inner_fadd_range];
1200bf215546Sopenharmony_ci
1201bf215546Sopenharmony_ci      r.range = fadd_table[first.range][fmul_range];
1202bf215546Sopenharmony_ci      break;
1203bf215546Sopenharmony_ci   }
1204bf215546Sopenharmony_ci
1205bf215546Sopenharmony_ci   default:
1206bf215546Sopenharmony_ci      r = (struct ssa_result_range){unknown, false, false, false};
1207bf215546Sopenharmony_ci      break;
1208bf215546Sopenharmony_ci   }
1209bf215546Sopenharmony_ci
1210bf215546Sopenharmony_ci   if (r.range == eq_zero)
1211bf215546Sopenharmony_ci      r.is_integral = true;
1212bf215546Sopenharmony_ci
1213bf215546Sopenharmony_ci   /* Just like isfinite(), the is_finite flag implies the value is a number. */
1214bf215546Sopenharmony_ci   assert((int) r.is_finite <= (int) r.is_a_number);
1215bf215546Sopenharmony_ci
1216bf215546Sopenharmony_ci   _mesa_hash_table_insert(ht, pack_key(alu, use_type), pack_data(r));
1217bf215546Sopenharmony_ci   return r;
1218bf215546Sopenharmony_ci}
1219bf215546Sopenharmony_ci
1220bf215546Sopenharmony_ci#undef _______
1221bf215546Sopenharmony_ci
1222bf215546Sopenharmony_cistruct ssa_result_range
1223bf215546Sopenharmony_cinir_analyze_range(struct hash_table *range_ht,
1224bf215546Sopenharmony_ci                  const nir_alu_instr *instr, unsigned src)
1225bf215546Sopenharmony_ci{
1226bf215546Sopenharmony_ci   return analyze_expression(instr, src, range_ht,
1227bf215546Sopenharmony_ci                             nir_alu_src_type(instr, src));
1228bf215546Sopenharmony_ci}
1229bf215546Sopenharmony_ci
1230bf215546Sopenharmony_cistatic uint32_t bitmask(uint32_t size) {
1231bf215546Sopenharmony_ci   return size >= 32 ? 0xffffffffu : ((uint32_t)1 << size) - 1u;
1232bf215546Sopenharmony_ci}
1233bf215546Sopenharmony_ci
1234bf215546Sopenharmony_cistatic uint64_t mul_clamp(uint32_t a, uint32_t b)
1235bf215546Sopenharmony_ci{
1236bf215546Sopenharmony_ci   if (a != 0 && (a * b) / a != b)
1237bf215546Sopenharmony_ci      return (uint64_t)UINT32_MAX + 1;
1238bf215546Sopenharmony_ci   else
1239bf215546Sopenharmony_ci      return a * b;
1240bf215546Sopenharmony_ci}
1241bf215546Sopenharmony_ci
1242bf215546Sopenharmony_ci/* recursively gather at most "buf_size" phi/bcsel sources */
1243bf215546Sopenharmony_cistatic unsigned
1244bf215546Sopenharmony_cisearch_phi_bcsel(nir_ssa_scalar scalar, nir_ssa_scalar *buf, unsigned buf_size, struct set *visited)
1245bf215546Sopenharmony_ci{
1246bf215546Sopenharmony_ci   if (_mesa_set_search(visited, scalar.def))
1247bf215546Sopenharmony_ci      return 0;
1248bf215546Sopenharmony_ci   _mesa_set_add(visited, scalar.def);
1249bf215546Sopenharmony_ci
1250bf215546Sopenharmony_ci   if (scalar.def->parent_instr->type == nir_instr_type_phi) {
1251bf215546Sopenharmony_ci      nir_phi_instr *phi = nir_instr_as_phi(scalar.def->parent_instr);
1252bf215546Sopenharmony_ci      unsigned num_sources_left = exec_list_length(&phi->srcs);
1253bf215546Sopenharmony_ci      if (buf_size >= num_sources_left) {
1254bf215546Sopenharmony_ci         unsigned total_added = 0;
1255bf215546Sopenharmony_ci         nir_foreach_phi_src(src, phi) {
1256bf215546Sopenharmony_ci            num_sources_left--;
1257bf215546Sopenharmony_ci            unsigned added = search_phi_bcsel(nir_get_ssa_scalar(src->src.ssa, 0),
1258bf215546Sopenharmony_ci               buf + total_added, buf_size - num_sources_left, visited);
1259bf215546Sopenharmony_ci            assert(added <= buf_size);
1260bf215546Sopenharmony_ci            buf_size -= added;
1261bf215546Sopenharmony_ci            total_added += added;
1262bf215546Sopenharmony_ci         }
1263bf215546Sopenharmony_ci         return total_added;
1264bf215546Sopenharmony_ci      }
1265bf215546Sopenharmony_ci   }
1266bf215546Sopenharmony_ci
1267bf215546Sopenharmony_ci   if (nir_ssa_scalar_is_alu(scalar)) {
1268bf215546Sopenharmony_ci      nir_op op = nir_ssa_scalar_alu_op(scalar);
1269bf215546Sopenharmony_ci
1270bf215546Sopenharmony_ci      if ((op == nir_op_bcsel || op == nir_op_b32csel) && buf_size >= 2) {
1271bf215546Sopenharmony_ci         nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0);
1272bf215546Sopenharmony_ci         nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1);
1273bf215546Sopenharmony_ci
1274bf215546Sopenharmony_ci         unsigned added = search_phi_bcsel(src0, buf, buf_size - 1, visited);
1275bf215546Sopenharmony_ci         buf_size -= added;
1276bf215546Sopenharmony_ci         added += search_phi_bcsel(src1, buf + added, buf_size, visited);
1277bf215546Sopenharmony_ci         return added;
1278bf215546Sopenharmony_ci      }
1279bf215546Sopenharmony_ci   }
1280bf215546Sopenharmony_ci
1281bf215546Sopenharmony_ci   buf[0] = scalar;
1282bf215546Sopenharmony_ci   return 1;
1283bf215546Sopenharmony_ci}
1284bf215546Sopenharmony_ci
1285bf215546Sopenharmony_cistatic nir_variable *
1286bf215546Sopenharmony_cilookup_input(nir_shader *shader, unsigned driver_location)
1287bf215546Sopenharmony_ci{
1288bf215546Sopenharmony_ci   return nir_find_variable_with_driver_location(shader, nir_var_shader_in,
1289bf215546Sopenharmony_ci                                                 driver_location);
1290bf215546Sopenharmony_ci}
1291bf215546Sopenharmony_ci
1292bf215546Sopenharmony_ci/* The config here should be generic enough to be correct on any HW. */
1293bf215546Sopenharmony_cistatic const nir_unsigned_upper_bound_config default_ub_config = {
1294bf215546Sopenharmony_ci   .min_subgroup_size = 1u,
1295bf215546Sopenharmony_ci   .max_subgroup_size = UINT16_MAX,
1296bf215546Sopenharmony_ci   .max_workgroup_invocations = UINT16_MAX,
1297bf215546Sopenharmony_ci   .max_workgroup_count = {UINT16_MAX, UINT16_MAX, UINT16_MAX},
1298bf215546Sopenharmony_ci   .max_workgroup_size = {UINT16_MAX, UINT16_MAX, UINT16_MAX},
1299bf215546Sopenharmony_ci   .vertex_attrib_max = {
1300bf215546Sopenharmony_ci      UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX,
1301bf215546Sopenharmony_ci      UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX,
1302bf215546Sopenharmony_ci      UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX,
1303bf215546Sopenharmony_ci      UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX,
1304bf215546Sopenharmony_ci   },
1305bf215546Sopenharmony_ci};
1306bf215546Sopenharmony_ci
1307bf215546Sopenharmony_ciuint32_t
1308bf215546Sopenharmony_cinir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
1309bf215546Sopenharmony_ci                         nir_ssa_scalar scalar,
1310bf215546Sopenharmony_ci                         const nir_unsigned_upper_bound_config *config)
1311bf215546Sopenharmony_ci{
1312bf215546Sopenharmony_ci   assert(scalar.def->bit_size <= 32);
1313bf215546Sopenharmony_ci
1314bf215546Sopenharmony_ci   if (!config)
1315bf215546Sopenharmony_ci      config = &default_ub_config;
1316bf215546Sopenharmony_ci   if (nir_ssa_scalar_is_const(scalar))
1317bf215546Sopenharmony_ci      return nir_ssa_scalar_as_uint(scalar);
1318bf215546Sopenharmony_ci
1319bf215546Sopenharmony_ci   /* keys can't be 0, so we have to add 1 to the index */
1320bf215546Sopenharmony_ci   void *key = (void*)(((uintptr_t)(scalar.def->index + 1) << 4) | scalar.comp);
1321bf215546Sopenharmony_ci   struct hash_entry *he = _mesa_hash_table_search(range_ht, key);
1322bf215546Sopenharmony_ci   if (he != NULL)
1323bf215546Sopenharmony_ci      return (uintptr_t)he->data;
1324bf215546Sopenharmony_ci
1325bf215546Sopenharmony_ci   uint32_t max = bitmask(scalar.def->bit_size);
1326bf215546Sopenharmony_ci
1327bf215546Sopenharmony_ci   if (scalar.def->parent_instr->type == nir_instr_type_intrinsic) {
1328bf215546Sopenharmony_ci      uint32_t res = max;
1329bf215546Sopenharmony_ci      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr);
1330bf215546Sopenharmony_ci      switch (intrin->intrinsic) {
1331bf215546Sopenharmony_ci      case nir_intrinsic_load_local_invocation_index:
1332bf215546Sopenharmony_ci         /* The local invocation index is used under the hood by RADV for
1333bf215546Sopenharmony_ci          * some non-compute-like shaders (eg. LS and NGG). These technically
1334bf215546Sopenharmony_ci          * run in workgroups on the HW, even though this fact is not exposed
1335bf215546Sopenharmony_ci          * by the API.
1336bf215546Sopenharmony_ci          * They can safely use the same code path here as variable sized
1337bf215546Sopenharmony_ci          * compute-like shader stages.
1338bf215546Sopenharmony_ci          */
1339bf215546Sopenharmony_ci         if (!gl_shader_stage_uses_workgroup(shader->info.stage) ||
1340bf215546Sopenharmony_ci             shader->info.workgroup_size_variable) {
1341bf215546Sopenharmony_ci            res = config->max_workgroup_invocations - 1;
1342bf215546Sopenharmony_ci         } else {
1343bf215546Sopenharmony_ci            res = (shader->info.workgroup_size[0] *
1344bf215546Sopenharmony_ci                   shader->info.workgroup_size[1] *
1345bf215546Sopenharmony_ci                   shader->info.workgroup_size[2]) - 1u;
1346bf215546Sopenharmony_ci         }
1347bf215546Sopenharmony_ci         break;
1348bf215546Sopenharmony_ci      case nir_intrinsic_load_local_invocation_id:
1349bf215546Sopenharmony_ci         if (shader->info.workgroup_size_variable)
1350bf215546Sopenharmony_ci            res = config->max_workgroup_size[scalar.comp] - 1u;
1351bf215546Sopenharmony_ci         else
1352bf215546Sopenharmony_ci            res = shader->info.workgroup_size[scalar.comp] - 1u;
1353bf215546Sopenharmony_ci         break;
1354bf215546Sopenharmony_ci      case nir_intrinsic_load_workgroup_id:
1355bf215546Sopenharmony_ci         res = config->max_workgroup_count[scalar.comp] - 1u;
1356bf215546Sopenharmony_ci         break;
1357bf215546Sopenharmony_ci      case nir_intrinsic_load_num_workgroups:
1358bf215546Sopenharmony_ci         res = config->max_workgroup_count[scalar.comp];
1359bf215546Sopenharmony_ci         break;
1360bf215546Sopenharmony_ci      case nir_intrinsic_load_global_invocation_id:
1361bf215546Sopenharmony_ci         if (shader->info.workgroup_size_variable) {
1362bf215546Sopenharmony_ci            res = mul_clamp(config->max_workgroup_size[scalar.comp],
1363bf215546Sopenharmony_ci                            config->max_workgroup_count[scalar.comp]) - 1u;
1364bf215546Sopenharmony_ci         } else {
1365bf215546Sopenharmony_ci            res = (shader->info.workgroup_size[scalar.comp] *
1366bf215546Sopenharmony_ci                   config->max_workgroup_count[scalar.comp]) - 1u;
1367bf215546Sopenharmony_ci         }
1368bf215546Sopenharmony_ci         break;
1369bf215546Sopenharmony_ci      case nir_intrinsic_load_invocation_id:
1370bf215546Sopenharmony_ci         if (shader->info.stage == MESA_SHADER_TESS_CTRL)
1371bf215546Sopenharmony_ci            res = shader->info.tess.tcs_vertices_out
1372bf215546Sopenharmony_ci                  ? (shader->info.tess.tcs_vertices_out - 1)
1373bf215546Sopenharmony_ci                  : 511; /* Generous maximum output patch size of 512 */
1374bf215546Sopenharmony_ci         break;
1375bf215546Sopenharmony_ci      case nir_intrinsic_load_subgroup_invocation:
1376bf215546Sopenharmony_ci      case nir_intrinsic_first_invocation:
1377bf215546Sopenharmony_ci         res = config->max_subgroup_size - 1;
1378bf215546Sopenharmony_ci         break;
1379bf215546Sopenharmony_ci      case nir_intrinsic_mbcnt_amd: {
1380bf215546Sopenharmony_ci         uint32_t src0 = config->max_subgroup_size - 1;
1381bf215546Sopenharmony_ci         uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), config);
1382bf215546Sopenharmony_ci
1383bf215546Sopenharmony_ci         if (src0 + src1 < src0)
1384bf215546Sopenharmony_ci            res = max; /* overflow */
1385bf215546Sopenharmony_ci         else
1386bf215546Sopenharmony_ci            res = src0 + src1;
1387bf215546Sopenharmony_ci         break;
1388bf215546Sopenharmony_ci      }
1389bf215546Sopenharmony_ci      case nir_intrinsic_load_subgroup_size:
1390bf215546Sopenharmony_ci         res = config->max_subgroup_size;
1391bf215546Sopenharmony_ci         break;
1392bf215546Sopenharmony_ci      case nir_intrinsic_load_subgroup_id:
1393bf215546Sopenharmony_ci      case nir_intrinsic_load_num_subgroups: {
1394bf215546Sopenharmony_ci         uint32_t workgroup_size = config->max_workgroup_invocations;
1395bf215546Sopenharmony_ci         if (gl_shader_stage_uses_workgroup(shader->info.stage) &&
1396bf215546Sopenharmony_ci             !shader->info.workgroup_size_variable) {
1397bf215546Sopenharmony_ci            workgroup_size = shader->info.workgroup_size[0] *
1398bf215546Sopenharmony_ci                             shader->info.workgroup_size[1] *
1399bf215546Sopenharmony_ci                             shader->info.workgroup_size[2];
1400bf215546Sopenharmony_ci         }
1401bf215546Sopenharmony_ci         res = DIV_ROUND_UP(workgroup_size, config->min_subgroup_size);
1402bf215546Sopenharmony_ci         if (intrin->intrinsic == nir_intrinsic_load_subgroup_id)
1403bf215546Sopenharmony_ci            res--;
1404bf215546Sopenharmony_ci         break;
1405bf215546Sopenharmony_ci      }
1406bf215546Sopenharmony_ci      case nir_intrinsic_load_input: {
1407bf215546Sopenharmony_ci         if (shader->info.stage == MESA_SHADER_VERTEX && nir_src_is_const(intrin->src[0])) {
1408bf215546Sopenharmony_ci            nir_variable *var = lookup_input(shader, nir_intrinsic_base(intrin));
1409bf215546Sopenharmony_ci            if (var) {
1410bf215546Sopenharmony_ci               int loc = var->data.location - VERT_ATTRIB_GENERIC0;
1411bf215546Sopenharmony_ci               if (loc >= 0)
1412bf215546Sopenharmony_ci                  res = config->vertex_attrib_max[loc];
1413bf215546Sopenharmony_ci            }
1414bf215546Sopenharmony_ci         }
1415bf215546Sopenharmony_ci         break;
1416bf215546Sopenharmony_ci      }
1417bf215546Sopenharmony_ci      case nir_intrinsic_reduce:
1418bf215546Sopenharmony_ci      case nir_intrinsic_inclusive_scan:
1419bf215546Sopenharmony_ci      case nir_intrinsic_exclusive_scan: {
1420bf215546Sopenharmony_ci         nir_op op = nir_intrinsic_reduction_op(intrin);
1421bf215546Sopenharmony_ci         if (op == nir_op_umin || op == nir_op_umax || op == nir_op_imin || op == nir_op_imax)
1422bf215546Sopenharmony_ci            res = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
1423bf215546Sopenharmony_ci         break;
1424bf215546Sopenharmony_ci      }
1425bf215546Sopenharmony_ci      case nir_intrinsic_read_first_invocation:
1426bf215546Sopenharmony_ci      case nir_intrinsic_read_invocation:
1427bf215546Sopenharmony_ci      case nir_intrinsic_shuffle:
1428bf215546Sopenharmony_ci      case nir_intrinsic_shuffle_xor:
1429bf215546Sopenharmony_ci      case nir_intrinsic_shuffle_up:
1430bf215546Sopenharmony_ci      case nir_intrinsic_shuffle_down:
1431bf215546Sopenharmony_ci      case nir_intrinsic_quad_broadcast:
1432bf215546Sopenharmony_ci      case nir_intrinsic_quad_swap_horizontal:
1433bf215546Sopenharmony_ci      case nir_intrinsic_quad_swap_vertical:
1434bf215546Sopenharmony_ci      case nir_intrinsic_quad_swap_diagonal:
1435bf215546Sopenharmony_ci      case nir_intrinsic_quad_swizzle_amd:
1436bf215546Sopenharmony_ci      case nir_intrinsic_masked_swizzle_amd:
1437bf215546Sopenharmony_ci         res = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
1438bf215546Sopenharmony_ci         break;
1439bf215546Sopenharmony_ci      case nir_intrinsic_write_invocation_amd: {
1440bf215546Sopenharmony_ci         uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
1441bf215546Sopenharmony_ci         uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), config);
1442bf215546Sopenharmony_ci         res = MAX2(src0, src1);
1443bf215546Sopenharmony_ci         break;
1444bf215546Sopenharmony_ci      }
1445bf215546Sopenharmony_ci      case nir_intrinsic_load_tess_rel_patch_id_amd:
1446bf215546Sopenharmony_ci      case nir_intrinsic_load_tcs_num_patches_amd:
1447bf215546Sopenharmony_ci         /* Very generous maximum: TCS/TES executed by largest possible workgroup */
1448bf215546Sopenharmony_ci         res = config->max_workgroup_invocations / MAX2(shader->info.tess.tcs_vertices_out, 1u);
1449bf215546Sopenharmony_ci         break;
1450bf215546Sopenharmony_ci      case nir_intrinsic_load_scalar_arg_amd:
1451bf215546Sopenharmony_ci      case nir_intrinsic_load_vector_arg_amd: {
1452bf215546Sopenharmony_ci         uint32_t upper_bound = nir_intrinsic_arg_upper_bound_u32_amd(intrin);
1453bf215546Sopenharmony_ci         if (upper_bound)
1454bf215546Sopenharmony_ci            res = upper_bound;
1455bf215546Sopenharmony_ci         break;
1456bf215546Sopenharmony_ci      }
1457bf215546Sopenharmony_ci      default:
1458bf215546Sopenharmony_ci         break;
1459bf215546Sopenharmony_ci      }
1460bf215546Sopenharmony_ci      if (res != max)
1461bf215546Sopenharmony_ci         _mesa_hash_table_insert(range_ht, key, (void*)(uintptr_t)res);
1462bf215546Sopenharmony_ci      return res;
1463bf215546Sopenharmony_ci   }
1464bf215546Sopenharmony_ci
1465bf215546Sopenharmony_ci   if (scalar.def->parent_instr->type == nir_instr_type_phi) {
1466bf215546Sopenharmony_ci      nir_cf_node *prev = nir_cf_node_prev(&scalar.def->parent_instr->block->cf_node);
1467bf215546Sopenharmony_ci
1468bf215546Sopenharmony_ci      uint32_t res = 0;
1469bf215546Sopenharmony_ci      if (!prev || prev->type == nir_cf_node_block) {
1470bf215546Sopenharmony_ci         _mesa_hash_table_insert(range_ht, key, (void*)(uintptr_t)max);
1471bf215546Sopenharmony_ci
1472bf215546Sopenharmony_ci         struct set *visited = _mesa_pointer_set_create(NULL);
1473bf215546Sopenharmony_ci         nir_ssa_scalar defs[64];
1474bf215546Sopenharmony_ci         unsigned def_count = search_phi_bcsel(scalar, defs, 64, visited);
1475bf215546Sopenharmony_ci         _mesa_set_destroy(visited, NULL);
1476bf215546Sopenharmony_ci
1477bf215546Sopenharmony_ci         for (unsigned i = 0; i < def_count; i++)
1478bf215546Sopenharmony_ci            res = MAX2(res, nir_unsigned_upper_bound(shader, range_ht, defs[i], config));
1479bf215546Sopenharmony_ci      } else {
1480bf215546Sopenharmony_ci         nir_foreach_phi_src(src, nir_instr_as_phi(scalar.def->parent_instr)) {
1481bf215546Sopenharmony_ci            res = MAX2(res, nir_unsigned_upper_bound(
1482bf215546Sopenharmony_ci               shader, range_ht, nir_get_ssa_scalar(src->src.ssa, 0), config));
1483bf215546Sopenharmony_ci         }
1484bf215546Sopenharmony_ci      }
1485bf215546Sopenharmony_ci
1486bf215546Sopenharmony_ci      _mesa_hash_table_insert(range_ht, key, (void*)(uintptr_t)res);
1487bf215546Sopenharmony_ci      return res;
1488bf215546Sopenharmony_ci   }
1489bf215546Sopenharmony_ci
1490bf215546Sopenharmony_ci   if (nir_ssa_scalar_is_alu(scalar)) {
1491bf215546Sopenharmony_ci      nir_op op = nir_ssa_scalar_alu_op(scalar);
1492bf215546Sopenharmony_ci
1493bf215546Sopenharmony_ci      switch (op) {
1494bf215546Sopenharmony_ci      case nir_op_umin:
1495bf215546Sopenharmony_ci      case nir_op_imin:
1496bf215546Sopenharmony_ci      case nir_op_imax:
1497bf215546Sopenharmony_ci      case nir_op_umax:
1498bf215546Sopenharmony_ci      case nir_op_iand:
1499bf215546Sopenharmony_ci      case nir_op_ior:
1500bf215546Sopenharmony_ci      case nir_op_ixor:
1501bf215546Sopenharmony_ci      case nir_op_ishl:
1502bf215546Sopenharmony_ci      case nir_op_imul:
1503bf215546Sopenharmony_ci      case nir_op_ushr:
1504bf215546Sopenharmony_ci      case nir_op_ishr:
1505bf215546Sopenharmony_ci      case nir_op_iadd:
1506bf215546Sopenharmony_ci      case nir_op_umod:
1507bf215546Sopenharmony_ci      case nir_op_udiv:
1508bf215546Sopenharmony_ci      case nir_op_bcsel:
1509bf215546Sopenharmony_ci      case nir_op_b32csel:
1510bf215546Sopenharmony_ci      case nir_op_ubfe:
1511bf215546Sopenharmony_ci      case nir_op_bfm:
1512bf215546Sopenharmony_ci      case nir_op_fmul:
1513bf215546Sopenharmony_ci      case nir_op_fmulz:
1514bf215546Sopenharmony_ci      case nir_op_extract_u8:
1515bf215546Sopenharmony_ci      case nir_op_extract_i8:
1516bf215546Sopenharmony_ci      case nir_op_extract_u16:
1517bf215546Sopenharmony_ci      case nir_op_extract_i16:
1518bf215546Sopenharmony_ci         break;
1519bf215546Sopenharmony_ci      case nir_op_u2u1:
1520bf215546Sopenharmony_ci      case nir_op_u2u8:
1521bf215546Sopenharmony_ci      case nir_op_u2u16:
1522bf215546Sopenharmony_ci      case nir_op_u2u32:
1523bf215546Sopenharmony_ci      case nir_op_f2u32:
1524bf215546Sopenharmony_ci         if (nir_ssa_scalar_chase_alu_src(scalar, 0).def->bit_size > 32) {
1525bf215546Sopenharmony_ci            /* If src is >32 bits, return max */
1526bf215546Sopenharmony_ci            return max;
1527bf215546Sopenharmony_ci         }
1528bf215546Sopenharmony_ci         break;
1529bf215546Sopenharmony_ci      default:
1530bf215546Sopenharmony_ci         return max;
1531bf215546Sopenharmony_ci      }
1532bf215546Sopenharmony_ci
1533bf215546Sopenharmony_ci      uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 0), config);
1534bf215546Sopenharmony_ci      uint32_t src1 = max, src2 = max;
1535bf215546Sopenharmony_ci      if (nir_op_infos[op].num_inputs > 1)
1536bf215546Sopenharmony_ci         src1 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 1), config);
1537bf215546Sopenharmony_ci      if (nir_op_infos[op].num_inputs > 2)
1538bf215546Sopenharmony_ci         src2 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 2), config);
1539bf215546Sopenharmony_ci
1540bf215546Sopenharmony_ci      uint32_t res = max;
1541bf215546Sopenharmony_ci      switch (op) {
1542bf215546Sopenharmony_ci      case nir_op_umin:
1543bf215546Sopenharmony_ci         res = src0 < src1 ? src0 : src1;
1544bf215546Sopenharmony_ci         break;
1545bf215546Sopenharmony_ci      case nir_op_imin:
1546bf215546Sopenharmony_ci      case nir_op_imax:
1547bf215546Sopenharmony_ci      case nir_op_umax:
1548bf215546Sopenharmony_ci         res = src0 > src1 ? src0 : src1;
1549bf215546Sopenharmony_ci         break;
1550bf215546Sopenharmony_ci      case nir_op_iand:
1551bf215546Sopenharmony_ci         res = bitmask(util_last_bit64(src0)) & bitmask(util_last_bit64(src1));
1552bf215546Sopenharmony_ci         break;
1553bf215546Sopenharmony_ci      case nir_op_ior:
1554bf215546Sopenharmony_ci      case nir_op_ixor:
1555bf215546Sopenharmony_ci         res = bitmask(util_last_bit64(src0)) | bitmask(util_last_bit64(src1));
1556bf215546Sopenharmony_ci         break;
1557bf215546Sopenharmony_ci      case nir_op_ishl:
1558bf215546Sopenharmony_ci         if (util_last_bit64(src0) + src1 > scalar.def->bit_size)
1559bf215546Sopenharmony_ci            res = max; /* overflow */
1560bf215546Sopenharmony_ci         else
1561bf215546Sopenharmony_ci            res = src0 << MIN2(src1, scalar.def->bit_size - 1u);
1562bf215546Sopenharmony_ci         break;
1563bf215546Sopenharmony_ci      case nir_op_imul:
1564bf215546Sopenharmony_ci         if (src0 != 0 && (src0 * src1) / src0 != src1)
1565bf215546Sopenharmony_ci            res = max;
1566bf215546Sopenharmony_ci         else
1567bf215546Sopenharmony_ci            res = src0 * src1;
1568bf215546Sopenharmony_ci         break;
1569bf215546Sopenharmony_ci      case nir_op_ushr: {
1570bf215546Sopenharmony_ci         nir_ssa_scalar src1_scalar = nir_ssa_scalar_chase_alu_src(scalar, 1);
1571bf215546Sopenharmony_ci         if (nir_ssa_scalar_is_const(src1_scalar))
1572bf215546Sopenharmony_ci            res = src0 >> nir_ssa_scalar_as_uint(src1_scalar);
1573bf215546Sopenharmony_ci         else
1574bf215546Sopenharmony_ci            res = src0;
1575bf215546Sopenharmony_ci         break;
1576bf215546Sopenharmony_ci      }
1577bf215546Sopenharmony_ci      case nir_op_ishr: {
1578bf215546Sopenharmony_ci         nir_ssa_scalar src1_scalar = nir_ssa_scalar_chase_alu_src(scalar, 1);
1579bf215546Sopenharmony_ci         if (src0 <= 2147483647 && nir_ssa_scalar_is_const(src1_scalar))
1580bf215546Sopenharmony_ci            res = src0 >> nir_ssa_scalar_as_uint(src1_scalar);
1581bf215546Sopenharmony_ci         else
1582bf215546Sopenharmony_ci            res = src0;
1583bf215546Sopenharmony_ci         break;
1584bf215546Sopenharmony_ci      }
1585bf215546Sopenharmony_ci      case nir_op_iadd:
1586bf215546Sopenharmony_ci         if (src0 + src1 < src0)
1587bf215546Sopenharmony_ci            res = max; /* overflow */
1588bf215546Sopenharmony_ci         else
1589bf215546Sopenharmony_ci            res = src0 + src1;
1590bf215546Sopenharmony_ci         break;
1591bf215546Sopenharmony_ci      case nir_op_umod:
1592bf215546Sopenharmony_ci         res = src1 ? src1 - 1 : 0;
1593bf215546Sopenharmony_ci         break;
1594bf215546Sopenharmony_ci      case nir_op_udiv: {
1595bf215546Sopenharmony_ci         nir_ssa_scalar src1_scalar = nir_ssa_scalar_chase_alu_src(scalar, 1);
1596bf215546Sopenharmony_ci         if (nir_ssa_scalar_is_const(src1_scalar))
1597bf215546Sopenharmony_ci            res = nir_ssa_scalar_as_uint(src1_scalar) ? src0 / nir_ssa_scalar_as_uint(src1_scalar) : 0;
1598bf215546Sopenharmony_ci         else
1599bf215546Sopenharmony_ci            res = src0;
1600bf215546Sopenharmony_ci         break;
1601bf215546Sopenharmony_ci      }
1602bf215546Sopenharmony_ci      case nir_op_bcsel:
1603bf215546Sopenharmony_ci      case nir_op_b32csel:
1604bf215546Sopenharmony_ci         res = src1 > src2 ? src1 : src2;
1605bf215546Sopenharmony_ci         break;
1606bf215546Sopenharmony_ci      case nir_op_ubfe:
1607bf215546Sopenharmony_ci         res = bitmask(MIN2(src2, scalar.def->bit_size));
1608bf215546Sopenharmony_ci         break;
1609bf215546Sopenharmony_ci      case nir_op_bfm: {
1610bf215546Sopenharmony_ci         nir_ssa_scalar src1_scalar = nir_ssa_scalar_chase_alu_src(scalar, 1);
1611bf215546Sopenharmony_ci         if (nir_ssa_scalar_is_const(src1_scalar)) {
1612bf215546Sopenharmony_ci            src0 = MIN2(src0, 31);
1613bf215546Sopenharmony_ci            src1 = nir_ssa_scalar_as_uint(src1_scalar) & 0x1fu;
1614bf215546Sopenharmony_ci            res = bitmask(src0) << src1;
1615bf215546Sopenharmony_ci         } else {
1616bf215546Sopenharmony_ci            src0 = MIN2(src0, 31);
1617bf215546Sopenharmony_ci            src1 = MIN2(src1, 31);
1618bf215546Sopenharmony_ci            res = bitmask(MIN2(src0 + src1, 32));
1619bf215546Sopenharmony_ci         }
1620bf215546Sopenharmony_ci         break;
1621bf215546Sopenharmony_ci      }
1622bf215546Sopenharmony_ci      /* limited floating-point support for f2u32(fmul(load_input(), <constant>)) */
1623bf215546Sopenharmony_ci      case nir_op_f2u32:
1624bf215546Sopenharmony_ci         /* infinity/NaN starts at 0x7f800000u, negative numbers at 0x80000000 */
1625bf215546Sopenharmony_ci         if (src0 < 0x7f800000u) {
1626bf215546Sopenharmony_ci            float val;
1627bf215546Sopenharmony_ci            memcpy(&val, &src0, 4);
1628bf215546Sopenharmony_ci            res = (uint32_t)val;
1629bf215546Sopenharmony_ci         }
1630bf215546Sopenharmony_ci         break;
1631bf215546Sopenharmony_ci      case nir_op_fmul:
1632bf215546Sopenharmony_ci      case nir_op_fmulz:
1633bf215546Sopenharmony_ci         /* infinity/NaN starts at 0x7f800000u, negative numbers at 0x80000000 */
1634bf215546Sopenharmony_ci         if (src0 < 0x7f800000u && src1 < 0x7f800000u) {
1635bf215546Sopenharmony_ci            float src0_f, src1_f;
1636bf215546Sopenharmony_ci            memcpy(&src0_f, &src0, 4);
1637bf215546Sopenharmony_ci            memcpy(&src1_f, &src1, 4);
1638bf215546Sopenharmony_ci            /* not a proper rounding-up multiplication, but should be good enough */
1639bf215546Sopenharmony_ci            float max_f = ceilf(src0_f) * ceilf(src1_f);
1640bf215546Sopenharmony_ci            memcpy(&res, &max_f, 4);
1641bf215546Sopenharmony_ci         }
1642bf215546Sopenharmony_ci         break;
1643bf215546Sopenharmony_ci      case nir_op_u2u1:
1644bf215546Sopenharmony_ci      case nir_op_u2u8:
1645bf215546Sopenharmony_ci      case nir_op_u2u16:
1646bf215546Sopenharmony_ci      case nir_op_u2u32:
1647bf215546Sopenharmony_ci         res = MIN2(src0, max);
1648bf215546Sopenharmony_ci         break;
1649bf215546Sopenharmony_ci      case nir_op_sad_u8x4:
1650bf215546Sopenharmony_ci         res = src2 + 4 * 255;
1651bf215546Sopenharmony_ci         break;
1652bf215546Sopenharmony_ci      case nir_op_extract_u8:
1653bf215546Sopenharmony_ci         res = MIN2(src0, UINT8_MAX);
1654bf215546Sopenharmony_ci         break;
1655bf215546Sopenharmony_ci      case nir_op_extract_i8:
1656bf215546Sopenharmony_ci         res = (src0 >= 0x80) ? max : MIN2(src0, INT8_MAX);
1657bf215546Sopenharmony_ci         break;
1658bf215546Sopenharmony_ci      case nir_op_extract_u16:
1659bf215546Sopenharmony_ci         res = MIN2(src0, UINT16_MAX);
1660bf215546Sopenharmony_ci         break;
1661bf215546Sopenharmony_ci      case nir_op_extract_i16:
1662bf215546Sopenharmony_ci         res = (src0 >= 0x8000) ? max : MIN2(src0, INT16_MAX);
1663bf215546Sopenharmony_ci         break;
1664bf215546Sopenharmony_ci      default:
1665bf215546Sopenharmony_ci         res = max;
1666bf215546Sopenharmony_ci         break;
1667bf215546Sopenharmony_ci      }
1668bf215546Sopenharmony_ci      _mesa_hash_table_insert(range_ht, key, (void*)(uintptr_t)res);
1669bf215546Sopenharmony_ci      return res;
1670bf215546Sopenharmony_ci   }
1671bf215546Sopenharmony_ci
1672bf215546Sopenharmony_ci   return max;
1673bf215546Sopenharmony_ci}
1674bf215546Sopenharmony_ci
1675bf215546Sopenharmony_cibool
1676bf215546Sopenharmony_cinir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
1677bf215546Sopenharmony_ci                            nir_ssa_scalar ssa, unsigned const_val,
1678bf215546Sopenharmony_ci                            const nir_unsigned_upper_bound_config *config)
1679bf215546Sopenharmony_ci{
1680bf215546Sopenharmony_ci   if (nir_ssa_scalar_is_alu(ssa)) {
1681bf215546Sopenharmony_ci      nir_op alu_op = nir_ssa_scalar_alu_op(ssa);
1682bf215546Sopenharmony_ci
1683bf215546Sopenharmony_ci      /* iadd(imul(a, #b), #c) */
1684bf215546Sopenharmony_ci      if (alu_op == nir_op_imul || alu_op == nir_op_ishl) {
1685bf215546Sopenharmony_ci         nir_ssa_scalar mul_src0 = nir_ssa_scalar_chase_alu_src(ssa, 0);
1686bf215546Sopenharmony_ci         nir_ssa_scalar mul_src1 = nir_ssa_scalar_chase_alu_src(ssa, 1);
1687bf215546Sopenharmony_ci         uint32_t stride = 1;
1688bf215546Sopenharmony_ci         if (nir_ssa_scalar_is_const(mul_src0))
1689bf215546Sopenharmony_ci            stride = nir_ssa_scalar_as_uint(mul_src0);
1690bf215546Sopenharmony_ci         else if (nir_ssa_scalar_is_const(mul_src1))
1691bf215546Sopenharmony_ci            stride = nir_ssa_scalar_as_uint(mul_src1);
1692bf215546Sopenharmony_ci
1693bf215546Sopenharmony_ci         if (alu_op == nir_op_ishl)
1694bf215546Sopenharmony_ci            stride = 1u << (stride % 32u);
1695bf215546Sopenharmony_ci
1696bf215546Sopenharmony_ci         if (!stride || const_val <= UINT32_MAX - (UINT32_MAX / stride * stride))
1697bf215546Sopenharmony_ci            return false;
1698bf215546Sopenharmony_ci      }
1699bf215546Sopenharmony_ci
1700bf215546Sopenharmony_ci      /* iadd(iand(a, #b), #c) */
1701bf215546Sopenharmony_ci      if (alu_op == nir_op_iand) {
1702bf215546Sopenharmony_ci         nir_ssa_scalar and_src0 = nir_ssa_scalar_chase_alu_src(ssa, 0);
1703bf215546Sopenharmony_ci         nir_ssa_scalar and_src1 = nir_ssa_scalar_chase_alu_src(ssa, 1);
1704bf215546Sopenharmony_ci         uint32_t mask = 0xffffffff;
1705bf215546Sopenharmony_ci         if (nir_ssa_scalar_is_const(and_src0))
1706bf215546Sopenharmony_ci            mask = nir_ssa_scalar_as_uint(and_src0);
1707bf215546Sopenharmony_ci         else if (nir_ssa_scalar_is_const(and_src1))
1708bf215546Sopenharmony_ci            mask = nir_ssa_scalar_as_uint(and_src1);
1709bf215546Sopenharmony_ci         if (mask == 0 || const_val < (1u << (ffs(mask) - 1)))
1710bf215546Sopenharmony_ci            return false;
1711bf215546Sopenharmony_ci      }
1712bf215546Sopenharmony_ci   }
1713bf215546Sopenharmony_ci
1714bf215546Sopenharmony_ci   uint32_t ub = nir_unsigned_upper_bound(shader, range_ht, ssa, config);
1715bf215546Sopenharmony_ci   return const_val + ub < const_val;
1716bf215546Sopenharmony_ci}
1717bf215546Sopenharmony_ci
1718bf215546Sopenharmony_cistatic uint64_t
1719bf215546Sopenharmony_cissa_def_bits_used(const nir_ssa_def *def, int recur)
1720bf215546Sopenharmony_ci{
1721bf215546Sopenharmony_ci   uint64_t bits_used = 0;
1722bf215546Sopenharmony_ci   uint64_t all_bits = BITFIELD64_MASK(def->bit_size);
1723bf215546Sopenharmony_ci
1724bf215546Sopenharmony_ci   /* Querying the bits used from a vector is too hard of a question to
1725bf215546Sopenharmony_ci    * answer.  Return the conservative answer that all bits are used.  To
1726bf215546Sopenharmony_ci    * handle this, the function would need to be extended to be a query of a
1727bf215546Sopenharmony_ci    * single component of the vector.  That would also necessary to fully
1728bf215546Sopenharmony_ci    * handle the 'num_components > 1' inside the loop below.
1729bf215546Sopenharmony_ci    *
1730bf215546Sopenharmony_ci    * FINISHME: This restriction will eventually need to be restricted to be
1731bf215546Sopenharmony_ci    * useful for hardware that uses u16vec2 as the native 16-bit integer type.
1732bf215546Sopenharmony_ci    */
1733bf215546Sopenharmony_ci   if (def->num_components > 1)
1734bf215546Sopenharmony_ci      return all_bits;
1735bf215546Sopenharmony_ci
1736bf215546Sopenharmony_ci   /* Limit recursion */
1737bf215546Sopenharmony_ci   if (recur-- <= 0)
1738bf215546Sopenharmony_ci      return all_bits;
1739bf215546Sopenharmony_ci
1740bf215546Sopenharmony_ci   nir_foreach_use(src, def) {
1741bf215546Sopenharmony_ci      switch (src->parent_instr->type) {
1742bf215546Sopenharmony_ci      case nir_instr_type_alu: {
1743bf215546Sopenharmony_ci         nir_alu_instr *use_alu = nir_instr_as_alu(src->parent_instr);
1744bf215546Sopenharmony_ci         unsigned src_idx = container_of(src, nir_alu_src, src) - use_alu->src;
1745bf215546Sopenharmony_ci
1746bf215546Sopenharmony_ci         /* If a user of the value produces a vector result, return the
1747bf215546Sopenharmony_ci          * conservative answer that all bits are used.  It is possible to
1748bf215546Sopenharmony_ci          * answer this query by looping over the components used.  For example,
1749bf215546Sopenharmony_ci          *
1750bf215546Sopenharmony_ci          * vec4 32 ssa_5 = load_const(0x0000f000, 0x00000f00, 0x000000f0, 0x0000000f)
1751bf215546Sopenharmony_ci          * ...
1752bf215546Sopenharmony_ci          * vec4 32 ssa_8 = iand ssa_7.xxxx, ssa_5
1753bf215546Sopenharmony_ci          *
1754bf215546Sopenharmony_ci          * could conceivably return 0x0000ffff when queyring the bits used of
1755bf215546Sopenharmony_ci          * ssa_7.  This is unlikely to be worth the effort because the
1756bf215546Sopenharmony_ci          * question can eventually answered after the shader has been
1757bf215546Sopenharmony_ci          * scalarized.
1758bf215546Sopenharmony_ci          */
1759bf215546Sopenharmony_ci         if (use_alu->dest.dest.ssa.num_components > 1)
1760bf215546Sopenharmony_ci            return all_bits;
1761bf215546Sopenharmony_ci
1762bf215546Sopenharmony_ci         switch (use_alu->op) {
1763bf215546Sopenharmony_ci         case nir_op_u2u8:
1764bf215546Sopenharmony_ci         case nir_op_i2i8:
1765bf215546Sopenharmony_ci            bits_used |= 0xff;
1766bf215546Sopenharmony_ci            break;
1767bf215546Sopenharmony_ci
1768bf215546Sopenharmony_ci         case nir_op_u2u16:
1769bf215546Sopenharmony_ci         case nir_op_i2i16:
1770bf215546Sopenharmony_ci            bits_used |= all_bits & 0xffff;
1771bf215546Sopenharmony_ci            break;
1772bf215546Sopenharmony_ci
1773bf215546Sopenharmony_ci         case nir_op_u2u32:
1774bf215546Sopenharmony_ci         case nir_op_i2i32:
1775bf215546Sopenharmony_ci            bits_used |= all_bits & 0xffffffff;
1776bf215546Sopenharmony_ci            break;
1777bf215546Sopenharmony_ci
1778bf215546Sopenharmony_ci         case nir_op_extract_u8:
1779bf215546Sopenharmony_ci         case nir_op_extract_i8:
1780bf215546Sopenharmony_ci            if (src_idx == 0 && nir_src_is_const(use_alu->src[1].src)) {
1781bf215546Sopenharmony_ci               unsigned chunk = nir_src_comp_as_uint(use_alu->src[1].src,
1782bf215546Sopenharmony_ci                                                     use_alu->src[1].swizzle[0]);
1783bf215546Sopenharmony_ci               bits_used |= 0xffull << (chunk * 8);
1784bf215546Sopenharmony_ci               break;
1785bf215546Sopenharmony_ci            } else {
1786bf215546Sopenharmony_ci               return all_bits;
1787bf215546Sopenharmony_ci            }
1788bf215546Sopenharmony_ci
1789bf215546Sopenharmony_ci         case nir_op_extract_u16:
1790bf215546Sopenharmony_ci         case nir_op_extract_i16:
1791bf215546Sopenharmony_ci            if (src_idx == 0 && nir_src_is_const(use_alu->src[1].src)) {
1792bf215546Sopenharmony_ci               unsigned chunk = nir_src_comp_as_uint(use_alu->src[1].src,
1793bf215546Sopenharmony_ci                                                     use_alu->src[1].swizzle[0]);
1794bf215546Sopenharmony_ci               bits_used |= 0xffffull << (chunk * 16);
1795bf215546Sopenharmony_ci               break;
1796bf215546Sopenharmony_ci            } else {
1797bf215546Sopenharmony_ci               return all_bits;
1798bf215546Sopenharmony_ci            }
1799bf215546Sopenharmony_ci
1800bf215546Sopenharmony_ci         case nir_op_ishl:
1801bf215546Sopenharmony_ci         case nir_op_ishr:
1802bf215546Sopenharmony_ci         case nir_op_ushr:
1803bf215546Sopenharmony_ci            if (src_idx == 1) {
1804bf215546Sopenharmony_ci               bits_used |= (nir_src_bit_size(use_alu->src[0].src) - 1);
1805bf215546Sopenharmony_ci               break;
1806bf215546Sopenharmony_ci            } else {
1807bf215546Sopenharmony_ci               return all_bits;
1808bf215546Sopenharmony_ci            }
1809bf215546Sopenharmony_ci
1810bf215546Sopenharmony_ci         case nir_op_iand:
1811bf215546Sopenharmony_ci            assert(src_idx < 2);
1812bf215546Sopenharmony_ci            if (nir_src_is_const(use_alu->src[1 - src_idx].src)) {
1813bf215546Sopenharmony_ci               uint64_t u64 = nir_src_comp_as_uint(use_alu->src[1 - src_idx].src,
1814bf215546Sopenharmony_ci                                                   use_alu->src[1 - src_idx].swizzle[0]);
1815bf215546Sopenharmony_ci               bits_used |= u64;
1816bf215546Sopenharmony_ci               break;
1817bf215546Sopenharmony_ci            } else {
1818bf215546Sopenharmony_ci               return all_bits;
1819bf215546Sopenharmony_ci            }
1820bf215546Sopenharmony_ci
1821bf215546Sopenharmony_ci         case nir_op_ior:
1822bf215546Sopenharmony_ci            assert(src_idx < 2);
1823bf215546Sopenharmony_ci            if (nir_src_is_const(use_alu->src[1 - src_idx].src)) {
1824bf215546Sopenharmony_ci               uint64_t u64 = nir_src_comp_as_uint(use_alu->src[1 - src_idx].src,
1825bf215546Sopenharmony_ci                                                   use_alu->src[1 - src_idx].swizzle[0]);
1826bf215546Sopenharmony_ci               bits_used |= all_bits & ~u64;
1827bf215546Sopenharmony_ci               break;
1828bf215546Sopenharmony_ci            } else {
1829bf215546Sopenharmony_ci               return all_bits;
1830bf215546Sopenharmony_ci            }
1831bf215546Sopenharmony_ci
1832bf215546Sopenharmony_ci         default:
1833bf215546Sopenharmony_ci            /* We don't know what this op does */
1834bf215546Sopenharmony_ci            return all_bits;
1835bf215546Sopenharmony_ci         }
1836bf215546Sopenharmony_ci         break;
1837bf215546Sopenharmony_ci      }
1838bf215546Sopenharmony_ci
1839bf215546Sopenharmony_ci      case nir_instr_type_intrinsic: {
1840bf215546Sopenharmony_ci         nir_intrinsic_instr *use_intrin =
1841bf215546Sopenharmony_ci            nir_instr_as_intrinsic(src->parent_instr);
1842bf215546Sopenharmony_ci         unsigned src_idx = src - use_intrin->src;
1843bf215546Sopenharmony_ci
1844bf215546Sopenharmony_ci         switch (use_intrin->intrinsic) {
1845bf215546Sopenharmony_ci         case nir_intrinsic_read_invocation:
1846bf215546Sopenharmony_ci         case nir_intrinsic_shuffle:
1847bf215546Sopenharmony_ci         case nir_intrinsic_shuffle_up:
1848bf215546Sopenharmony_ci         case nir_intrinsic_shuffle_down:
1849bf215546Sopenharmony_ci         case nir_intrinsic_shuffle_xor:
1850bf215546Sopenharmony_ci         case nir_intrinsic_quad_broadcast:
1851bf215546Sopenharmony_ci         case nir_intrinsic_quad_swap_horizontal:
1852bf215546Sopenharmony_ci         case nir_intrinsic_quad_swap_vertical:
1853bf215546Sopenharmony_ci         case nir_intrinsic_quad_swap_diagonal:
1854bf215546Sopenharmony_ci            if (src_idx == 0) {
1855bf215546Sopenharmony_ci               assert(use_intrin->dest.is_ssa);
1856bf215546Sopenharmony_ci               bits_used |= ssa_def_bits_used(&use_intrin->dest.ssa, recur);
1857bf215546Sopenharmony_ci            } else {
1858bf215546Sopenharmony_ci               if (use_intrin->intrinsic == nir_intrinsic_quad_broadcast) {
1859bf215546Sopenharmony_ci                  bits_used |= 3;
1860bf215546Sopenharmony_ci               } else {
1861bf215546Sopenharmony_ci                  /* Subgroups larger than 128 are not a thing */
1862bf215546Sopenharmony_ci                  bits_used |= 127;
1863bf215546Sopenharmony_ci               }
1864bf215546Sopenharmony_ci            }
1865bf215546Sopenharmony_ci            break;
1866bf215546Sopenharmony_ci
1867bf215546Sopenharmony_ci         case nir_intrinsic_reduce:
1868bf215546Sopenharmony_ci         case nir_intrinsic_inclusive_scan:
1869bf215546Sopenharmony_ci         case nir_intrinsic_exclusive_scan:
1870bf215546Sopenharmony_ci            assert(src_idx == 0);
1871bf215546Sopenharmony_ci            switch (nir_intrinsic_reduction_op(use_intrin)) {
1872bf215546Sopenharmony_ci            case nir_op_iadd:
1873bf215546Sopenharmony_ci            case nir_op_imul:
1874bf215546Sopenharmony_ci            case nir_op_ior:
1875bf215546Sopenharmony_ci            case nir_op_iand:
1876bf215546Sopenharmony_ci            case nir_op_ixor:
1877bf215546Sopenharmony_ci               bits_used |= ssa_def_bits_used(&use_intrin->dest.ssa, recur);
1878bf215546Sopenharmony_ci               break;
1879bf215546Sopenharmony_ci
1880bf215546Sopenharmony_ci            default:
1881bf215546Sopenharmony_ci               return all_bits;
1882bf215546Sopenharmony_ci            }
1883bf215546Sopenharmony_ci            break;
1884bf215546Sopenharmony_ci
1885bf215546Sopenharmony_ci         default:
1886bf215546Sopenharmony_ci            /* We don't know what this op does */
1887bf215546Sopenharmony_ci            return all_bits;
1888bf215546Sopenharmony_ci         }
1889bf215546Sopenharmony_ci         break;
1890bf215546Sopenharmony_ci      }
1891bf215546Sopenharmony_ci
1892bf215546Sopenharmony_ci      case nir_instr_type_phi: {
1893bf215546Sopenharmony_ci         nir_phi_instr *use_phi = nir_instr_as_phi(src->parent_instr);
1894bf215546Sopenharmony_ci         bits_used |= ssa_def_bits_used(&use_phi->dest.ssa, recur);
1895bf215546Sopenharmony_ci         break;
1896bf215546Sopenharmony_ci      }
1897bf215546Sopenharmony_ci
1898bf215546Sopenharmony_ci      default:
1899bf215546Sopenharmony_ci         return all_bits;
1900bf215546Sopenharmony_ci      }
1901bf215546Sopenharmony_ci
1902bf215546Sopenharmony_ci      /* If we've somehow shown that all our bits are used, we're done */
1903bf215546Sopenharmony_ci      assert((bits_used & ~all_bits) == 0);
1904bf215546Sopenharmony_ci      if (bits_used == all_bits)
1905bf215546Sopenharmony_ci         return all_bits;
1906bf215546Sopenharmony_ci   }
1907bf215546Sopenharmony_ci
1908bf215546Sopenharmony_ci   return bits_used;
1909bf215546Sopenharmony_ci}
1910bf215546Sopenharmony_ci
1911bf215546Sopenharmony_ciuint64_t
1912bf215546Sopenharmony_cinir_ssa_def_bits_used(const nir_ssa_def *def)
1913bf215546Sopenharmony_ci{
1914bf215546Sopenharmony_ci   return ssa_def_bits_used(def, 2);
1915bf215546Sopenharmony_ci}
1916