1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2020 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#ifndef NIR_CONVERSION_BUILDER_H
25bf215546Sopenharmony_ci#define NIR_CONVERSION_BUILDER_H
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "util/u_math.h"
28bf215546Sopenharmony_ci#include "nir_builder.h"
29bf215546Sopenharmony_ci#include "nir_builtin_builder.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#ifdef __cplusplus
32bf215546Sopenharmony_ciextern "C" {
33bf215546Sopenharmony_ci#endif
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_cistatic inline nir_ssa_def *
36bf215546Sopenharmony_cinir_round_float_to_int(nir_builder *b, nir_ssa_def *src,
37bf215546Sopenharmony_ci                       nir_rounding_mode round)
38bf215546Sopenharmony_ci{
39bf215546Sopenharmony_ci   switch (round) {
40bf215546Sopenharmony_ci   case nir_rounding_mode_ru:
41bf215546Sopenharmony_ci      return nir_fceil(b, src);
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   case nir_rounding_mode_rd:
44bf215546Sopenharmony_ci      return nir_ffloor(b, src);
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci   case nir_rounding_mode_rtne:
47bf215546Sopenharmony_ci      return nir_fround_even(b, src);
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci   case nir_rounding_mode_undef:
50bf215546Sopenharmony_ci   case nir_rounding_mode_rtz:
51bf215546Sopenharmony_ci      break;
52bf215546Sopenharmony_ci   }
53bf215546Sopenharmony_ci   unreachable("unexpected rounding mode");
54bf215546Sopenharmony_ci}
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_cistatic inline nir_ssa_def *
57bf215546Sopenharmony_cinir_round_float_to_float(nir_builder *b, nir_ssa_def *src,
58bf215546Sopenharmony_ci                         unsigned dest_bit_size,
59bf215546Sopenharmony_ci                         nir_rounding_mode round)
60bf215546Sopenharmony_ci{
61bf215546Sopenharmony_ci   unsigned src_bit_size = src->bit_size;
62bf215546Sopenharmony_ci   if (dest_bit_size > src_bit_size)
63bf215546Sopenharmony_ci      return src; /* No rounding is needed for an up-convert */
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   nir_op low_conv = nir_type_conversion_op(nir_type_float | src_bit_size,
66bf215546Sopenharmony_ci                                            nir_type_float | dest_bit_size,
67bf215546Sopenharmony_ci                                            nir_rounding_mode_undef);
68bf215546Sopenharmony_ci   nir_op high_conv = nir_type_conversion_op(nir_type_float | dest_bit_size,
69bf215546Sopenharmony_ci                                             nir_type_float | src_bit_size,
70bf215546Sopenharmony_ci                                             nir_rounding_mode_undef);
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   switch (round) {
73bf215546Sopenharmony_ci   case nir_rounding_mode_ru: {
74bf215546Sopenharmony_ci      /* If lower-precision conversion results in a lower value, push it
75bf215546Sopenharmony_ci      * up one ULP. */
76bf215546Sopenharmony_ci      nir_ssa_def *lower_prec =
77bf215546Sopenharmony_ci         nir_build_alu(b, low_conv, src, NULL, NULL, NULL);
78bf215546Sopenharmony_ci      nir_ssa_def *roundtrip =
79bf215546Sopenharmony_ci         nir_build_alu(b, high_conv, lower_prec, NULL, NULL, NULL);
80bf215546Sopenharmony_ci      nir_ssa_def *cmp = nir_flt(b, roundtrip, src);
81bf215546Sopenharmony_ci      nir_ssa_def *inf = nir_imm_floatN_t(b, INFINITY, dest_bit_size);
82bf215546Sopenharmony_ci      return nir_bcsel(b, cmp, nir_nextafter(b, lower_prec, inf), lower_prec);
83bf215546Sopenharmony_ci   }
84bf215546Sopenharmony_ci   case nir_rounding_mode_rd: {
85bf215546Sopenharmony_ci      /* If lower-precision conversion results in a higher value, push it
86bf215546Sopenharmony_ci      * down one ULP. */
87bf215546Sopenharmony_ci      nir_ssa_def *lower_prec =
88bf215546Sopenharmony_ci         nir_build_alu(b, low_conv, src, NULL, NULL, NULL);
89bf215546Sopenharmony_ci      nir_ssa_def *roundtrip =
90bf215546Sopenharmony_ci         nir_build_alu(b, high_conv, lower_prec, NULL, NULL, NULL);
91bf215546Sopenharmony_ci      nir_ssa_def *cmp = nir_flt(b, src, roundtrip);
92bf215546Sopenharmony_ci      nir_ssa_def *neg_inf = nir_imm_floatN_t(b, -INFINITY, dest_bit_size);
93bf215546Sopenharmony_ci      return nir_bcsel(b, cmp, nir_nextafter(b, lower_prec, neg_inf), lower_prec);
94bf215546Sopenharmony_ci   }
95bf215546Sopenharmony_ci   case nir_rounding_mode_rtz:
96bf215546Sopenharmony_ci      return nir_bcsel(b, nir_flt(b, src, nir_imm_zero(b, 1, src->bit_size)),
97bf215546Sopenharmony_ci                          nir_round_float_to_float(b, src, dest_bit_size,
98bf215546Sopenharmony_ci                                                   nir_rounding_mode_ru),
99bf215546Sopenharmony_ci                          nir_round_float_to_float(b, src, dest_bit_size,
100bf215546Sopenharmony_ci                                                   nir_rounding_mode_rd));
101bf215546Sopenharmony_ci   case nir_rounding_mode_rtne:
102bf215546Sopenharmony_ci   case nir_rounding_mode_undef:
103bf215546Sopenharmony_ci      break;
104bf215546Sopenharmony_ci   }
105bf215546Sopenharmony_ci   unreachable("unexpected rounding mode");
106bf215546Sopenharmony_ci}
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_cistatic inline nir_ssa_def *
109bf215546Sopenharmony_cinir_round_int_to_float(nir_builder *b, nir_ssa_def *src,
110bf215546Sopenharmony_ci                       nir_alu_type src_type,
111bf215546Sopenharmony_ci                       unsigned dest_bit_size,
112bf215546Sopenharmony_ci                       nir_rounding_mode round)
113bf215546Sopenharmony_ci{
114bf215546Sopenharmony_ci   /* We only care whether or not its signed */
115bf215546Sopenharmony_ci   src_type = nir_alu_type_get_base_type(src_type);
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci   unsigned mantissa_bits;
118bf215546Sopenharmony_ci   switch (dest_bit_size) {
119bf215546Sopenharmony_ci   case 16:
120bf215546Sopenharmony_ci      mantissa_bits = 10;
121bf215546Sopenharmony_ci      break;
122bf215546Sopenharmony_ci   case 32:
123bf215546Sopenharmony_ci      mantissa_bits = 23;
124bf215546Sopenharmony_ci      break;
125bf215546Sopenharmony_ci   case 64:
126bf215546Sopenharmony_ci      mantissa_bits = 52;
127bf215546Sopenharmony_ci      break;
128bf215546Sopenharmony_ci   default: unreachable("Unsupported bit size");
129bf215546Sopenharmony_ci   }
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   if (src->bit_size < mantissa_bits)
132bf215546Sopenharmony_ci      return src;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   if (src_type == nir_type_int) {
135bf215546Sopenharmony_ci      nir_ssa_def *sign =
136bf215546Sopenharmony_ci         nir_i2b1(b, nir_ishr(b, src, nir_imm_int(b, src->bit_size - 1)));
137bf215546Sopenharmony_ci      nir_ssa_def *abs = nir_iabs(b, src);
138bf215546Sopenharmony_ci      nir_ssa_def *positive_rounded =
139bf215546Sopenharmony_ci         nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, round);
140bf215546Sopenharmony_ci      nir_ssa_def *max_positive =
141bf215546Sopenharmony_ci         nir_imm_intN_t(b, (1ull << (src->bit_size - 1)) - 1, src->bit_size);
142bf215546Sopenharmony_ci      switch (round) {
143bf215546Sopenharmony_ci      case nir_rounding_mode_rtz:
144bf215546Sopenharmony_ci         return nir_bcsel(b, sign, nir_ineg(b, positive_rounded),
145bf215546Sopenharmony_ci                                   positive_rounded);
146bf215546Sopenharmony_ci         break;
147bf215546Sopenharmony_ci      case nir_rounding_mode_ru:
148bf215546Sopenharmony_ci         return nir_bcsel(b, sign,
149bf215546Sopenharmony_ci                          nir_ineg(b, nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, nir_rounding_mode_rd)),
150bf215546Sopenharmony_ci                          nir_umin(b, positive_rounded, max_positive));
151bf215546Sopenharmony_ci         break;
152bf215546Sopenharmony_ci      case nir_rounding_mode_rd:
153bf215546Sopenharmony_ci         return nir_bcsel(b, sign,
154bf215546Sopenharmony_ci                          nir_ineg(b,
155bf215546Sopenharmony_ci                                   nir_umin(b, max_positive,
156bf215546Sopenharmony_ci                                            nir_round_int_to_float(b, abs, nir_type_uint, dest_bit_size, nir_rounding_mode_ru))),
157bf215546Sopenharmony_ci                          positive_rounded);
158bf215546Sopenharmony_ci      case nir_rounding_mode_rtne:
159bf215546Sopenharmony_ci      case nir_rounding_mode_undef:
160bf215546Sopenharmony_ci         break;
161bf215546Sopenharmony_ci      }
162bf215546Sopenharmony_ci      unreachable("unexpected rounding mode");
163bf215546Sopenharmony_ci   } else {
164bf215546Sopenharmony_ci      nir_ssa_def *mantissa_bit_size = nir_imm_int(b, mantissa_bits);
165bf215546Sopenharmony_ci      nir_ssa_def *msb = nir_imax(b, nir_ufind_msb(b, src), mantissa_bit_size);
166bf215546Sopenharmony_ci      nir_ssa_def *bits_to_lose = nir_isub(b, msb, mantissa_bit_size);
167bf215546Sopenharmony_ci      nir_ssa_def *one = nir_imm_intN_t(b, 1, src->bit_size);
168bf215546Sopenharmony_ci      nir_ssa_def *adjust = nir_ishl(b, one, bits_to_lose);
169bf215546Sopenharmony_ci      nir_ssa_def *mask = nir_inot(b, nir_isub(b, adjust, one));
170bf215546Sopenharmony_ci      nir_ssa_def *truncated = nir_iand(b, src, mask);
171bf215546Sopenharmony_ci      switch (round) {
172bf215546Sopenharmony_ci      case nir_rounding_mode_rtz:
173bf215546Sopenharmony_ci      case nir_rounding_mode_rd:
174bf215546Sopenharmony_ci         return truncated;
175bf215546Sopenharmony_ci         break;
176bf215546Sopenharmony_ci      case nir_rounding_mode_ru:
177bf215546Sopenharmony_ci         return nir_bcsel(b, nir_ieq(b, src, truncated),
178bf215546Sopenharmony_ci                             src, nir_uadd_sat(b, truncated, adjust));
179bf215546Sopenharmony_ci      case nir_rounding_mode_rtne:
180bf215546Sopenharmony_ci      case nir_rounding_mode_undef:
181bf215546Sopenharmony_ci         break;
182bf215546Sopenharmony_ci      }
183bf215546Sopenharmony_ci      unreachable("unexpected rounding mode");
184bf215546Sopenharmony_ci   }
185bf215546Sopenharmony_ci}
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci/** Returns true if the representable range of a contains the representable
188bf215546Sopenharmony_ci * range of b.
189bf215546Sopenharmony_ci */
190bf215546Sopenharmony_cistatic inline bool
191bf215546Sopenharmony_cinir_alu_type_range_contains_type_range(nir_alu_type a, nir_alu_type b)
192bf215546Sopenharmony_ci{
193bf215546Sopenharmony_ci   /* Split types from bit sizes */
194bf215546Sopenharmony_ci   nir_alu_type a_base_type = nir_alu_type_get_base_type(a);
195bf215546Sopenharmony_ci   nir_alu_type b_base_type = nir_alu_type_get_base_type(b);
196bf215546Sopenharmony_ci   unsigned a_bit_size = nir_alu_type_get_type_size(a);
197bf215546Sopenharmony_ci   unsigned b_bit_size = nir_alu_type_get_type_size(b);
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   /* This requires sized types */
200bf215546Sopenharmony_ci   assert(a_bit_size > 0 && b_bit_size > 0);
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   if (a_base_type == b_base_type && a_bit_size >= b_bit_size)
203bf215546Sopenharmony_ci      return true;
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   if (a_base_type == nir_type_int && b_base_type == nir_type_uint &&
206bf215546Sopenharmony_ci       a_bit_size > b_bit_size)
207bf215546Sopenharmony_ci      return true;
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   /* 16-bit floats fit in 32-bit integers */
210bf215546Sopenharmony_ci   if (a_base_type == nir_type_int && a_bit_size >= 32 &&
211bf215546Sopenharmony_ci       b == nir_type_float16)
212bf215546Sopenharmony_ci      return true;
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci   /* All signed or unsigned ints can fit in float or above. A uint8 can fit
215bf215546Sopenharmony_ci    * in a float16.
216bf215546Sopenharmony_ci    */
217bf215546Sopenharmony_ci   if (a_base_type == nir_type_float && b_base_type != nir_type_float &&
218bf215546Sopenharmony_ci       (a_bit_size >= 32 || b_bit_size == 8))
219bf215546Sopenharmony_ci      return true;
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci   return false;
222bf215546Sopenharmony_ci}
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci/**
225bf215546Sopenharmony_ci * Retrieves limits used for clamping a value of the src type into
226bf215546Sopenharmony_ci * the widest representable range of the dst type via cmp + bcsel
227bf215546Sopenharmony_ci */
228bf215546Sopenharmony_cistatic inline void
229bf215546Sopenharmony_cinir_get_clamp_limits(nir_builder *b,
230bf215546Sopenharmony_ci                     nir_alu_type src_type,
231bf215546Sopenharmony_ci                     nir_alu_type dest_type,
232bf215546Sopenharmony_ci                     nir_ssa_def **low, nir_ssa_def **high)
233bf215546Sopenharmony_ci{
234bf215546Sopenharmony_ci   /* Split types from bit sizes */
235bf215546Sopenharmony_ci   nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type);
236bf215546Sopenharmony_ci   nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
237bf215546Sopenharmony_ci   unsigned src_bit_size = nir_alu_type_get_type_size(src_type);
238bf215546Sopenharmony_ci   unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
239bf215546Sopenharmony_ci   assert(dest_bit_size != 0 && src_bit_size != 0);
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci   *low = NULL;
242bf215546Sopenharmony_ci   *high = NULL;
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci   /* limits of the destination type, expressed in the source type */
245bf215546Sopenharmony_ci   switch (dest_base_type) {
246bf215546Sopenharmony_ci   case nir_type_int: {
247bf215546Sopenharmony_ci      int64_t ilow, ihigh;
248bf215546Sopenharmony_ci      if (dest_bit_size == 64) {
249bf215546Sopenharmony_ci         ilow = INT64_MIN;
250bf215546Sopenharmony_ci         ihigh = INT64_MAX;
251bf215546Sopenharmony_ci      } else {
252bf215546Sopenharmony_ci         ilow = -(1ll << (dest_bit_size - 1));
253bf215546Sopenharmony_ci         ihigh = (1ll << (dest_bit_size - 1)) - 1;
254bf215546Sopenharmony_ci      }
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci      if (src_base_type == nir_type_int) {
257bf215546Sopenharmony_ci         *low = nir_imm_intN_t(b, ilow, src_bit_size);
258bf215546Sopenharmony_ci         *high = nir_imm_intN_t(b, ihigh, src_bit_size);
259bf215546Sopenharmony_ci      } else if (src_base_type == nir_type_uint) {
260bf215546Sopenharmony_ci         assert(src_bit_size >= dest_bit_size);
261bf215546Sopenharmony_ci         *high = nir_imm_intN_t(b, ihigh, src_bit_size);
262bf215546Sopenharmony_ci      } else {
263bf215546Sopenharmony_ci         *low = nir_imm_floatN_t(b, ilow, src_bit_size);
264bf215546Sopenharmony_ci         *high = nir_imm_floatN_t(b, ihigh, src_bit_size);
265bf215546Sopenharmony_ci      }
266bf215546Sopenharmony_ci      break;
267bf215546Sopenharmony_ci   }
268bf215546Sopenharmony_ci   case nir_type_uint: {
269bf215546Sopenharmony_ci      uint64_t uhigh = dest_bit_size == 64 ?
270bf215546Sopenharmony_ci         ~0ull : (1ull << dest_bit_size) - 1;
271bf215546Sopenharmony_ci      if (src_base_type != nir_type_float) {
272bf215546Sopenharmony_ci         *low = nir_imm_intN_t(b, 0, src_bit_size);
273bf215546Sopenharmony_ci         if (src_base_type == nir_type_uint || src_bit_size > dest_bit_size)
274bf215546Sopenharmony_ci            *high = nir_imm_intN_t(b, uhigh, src_bit_size);
275bf215546Sopenharmony_ci      } else {
276bf215546Sopenharmony_ci         *low = nir_imm_floatN_t(b, 0.0f, src_bit_size);
277bf215546Sopenharmony_ci         *high = nir_imm_floatN_t(b, uhigh, src_bit_size);
278bf215546Sopenharmony_ci      }
279bf215546Sopenharmony_ci      break;
280bf215546Sopenharmony_ci   }
281bf215546Sopenharmony_ci   case nir_type_float: {
282bf215546Sopenharmony_ci      double flow, fhigh;
283bf215546Sopenharmony_ci      switch (dest_bit_size) {
284bf215546Sopenharmony_ci      case 16:
285bf215546Sopenharmony_ci         flow = -65504.0f;
286bf215546Sopenharmony_ci         fhigh = 65504.0f;
287bf215546Sopenharmony_ci         break;
288bf215546Sopenharmony_ci      case 32:
289bf215546Sopenharmony_ci         flow = -FLT_MAX;
290bf215546Sopenharmony_ci         fhigh = FLT_MAX;
291bf215546Sopenharmony_ci         break;
292bf215546Sopenharmony_ci      case 64:
293bf215546Sopenharmony_ci         flow = -DBL_MAX;
294bf215546Sopenharmony_ci         fhigh = DBL_MAX;
295bf215546Sopenharmony_ci         break;
296bf215546Sopenharmony_ci      default:
297bf215546Sopenharmony_ci         unreachable("Unhandled bit size");
298bf215546Sopenharmony_ci      }
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci      switch (src_base_type) {
301bf215546Sopenharmony_ci      case nir_type_int: {
302bf215546Sopenharmony_ci         int64_t src_ilow, src_ihigh;
303bf215546Sopenharmony_ci         if (src_bit_size == 64) {
304bf215546Sopenharmony_ci            src_ilow = INT64_MIN;
305bf215546Sopenharmony_ci            src_ihigh = INT64_MAX;
306bf215546Sopenharmony_ci         } else {
307bf215546Sopenharmony_ci            src_ilow = -(1ll << (src_bit_size - 1));
308bf215546Sopenharmony_ci            src_ihigh = (1ll << (src_bit_size - 1)) - 1;
309bf215546Sopenharmony_ci         }
310bf215546Sopenharmony_ci         if (src_ilow < flow)
311bf215546Sopenharmony_ci            *low = nir_imm_intN_t(b, flow, src_bit_size);
312bf215546Sopenharmony_ci         if (src_ihigh > fhigh)
313bf215546Sopenharmony_ci            *high = nir_imm_intN_t(b, fhigh, src_bit_size);
314bf215546Sopenharmony_ci         break;
315bf215546Sopenharmony_ci      }
316bf215546Sopenharmony_ci      case nir_type_uint: {
317bf215546Sopenharmony_ci         uint64_t src_uhigh = src_bit_size == 64 ?
318bf215546Sopenharmony_ci            ~0ull : (1ull << src_bit_size) - 1;
319bf215546Sopenharmony_ci         if (src_uhigh > fhigh)
320bf215546Sopenharmony_ci            *high = nir_imm_intN_t(b, fhigh, src_bit_size);
321bf215546Sopenharmony_ci         break;
322bf215546Sopenharmony_ci      }
323bf215546Sopenharmony_ci      case nir_type_float:
324bf215546Sopenharmony_ci         *low = nir_imm_floatN_t(b, flow, src_bit_size);
325bf215546Sopenharmony_ci         *high = nir_imm_floatN_t(b, fhigh, src_bit_size);
326bf215546Sopenharmony_ci         break;
327bf215546Sopenharmony_ci      default:
328bf215546Sopenharmony_ci         unreachable("Clamping from unknown type");
329bf215546Sopenharmony_ci      }
330bf215546Sopenharmony_ci      break;
331bf215546Sopenharmony_ci   }
332bf215546Sopenharmony_ci   default:
333bf215546Sopenharmony_ci      unreachable("clamping to unknown type");
334bf215546Sopenharmony_ci      break;
335bf215546Sopenharmony_ci   }
336bf215546Sopenharmony_ci}
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci/**
339bf215546Sopenharmony_ci * Clamp the value into the widest representatble range of the
340bf215546Sopenharmony_ci * destination type with cmp + bcsel.
341bf215546Sopenharmony_ci *
342bf215546Sopenharmony_ci * val/val_type: The variables used for bcsel
343bf215546Sopenharmony_ci * src/src_type: The variables used for comparison
344bf215546Sopenharmony_ci * dest_type: The type which determines the range used for comparison
345bf215546Sopenharmony_ci */
346bf215546Sopenharmony_cistatic inline nir_ssa_def *
347bf215546Sopenharmony_cinir_clamp_to_type_range(nir_builder *b,
348bf215546Sopenharmony_ci                        nir_ssa_def *val, nir_alu_type val_type,
349bf215546Sopenharmony_ci                        nir_ssa_def *src, nir_alu_type src_type,
350bf215546Sopenharmony_ci                        nir_alu_type dest_type)
351bf215546Sopenharmony_ci{
352bf215546Sopenharmony_ci   assert(nir_alu_type_get_type_size(src_type) == 0 ||
353bf215546Sopenharmony_ci          nir_alu_type_get_type_size(src_type) == src->bit_size);
354bf215546Sopenharmony_ci   src_type |= src->bit_size;
355bf215546Sopenharmony_ci   if (nir_alu_type_range_contains_type_range(dest_type, src_type))
356bf215546Sopenharmony_ci      return val;
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci   /* limits of the destination type, expressed in the source type */
359bf215546Sopenharmony_ci   nir_ssa_def *low = NULL, *high = NULL;
360bf215546Sopenharmony_ci   nir_get_clamp_limits(b, src_type, dest_type, &low, &high);
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   nir_ssa_def *low_cond = NULL, *high_cond = NULL;
363bf215546Sopenharmony_ci   switch (nir_alu_type_get_base_type(src_type)) {
364bf215546Sopenharmony_ci   case nir_type_int:
365bf215546Sopenharmony_ci      low_cond = low ? nir_ilt(b, src, low) : NULL;
366bf215546Sopenharmony_ci      high_cond = high ? nir_ilt(b, high, src) : NULL;
367bf215546Sopenharmony_ci      break;
368bf215546Sopenharmony_ci   case nir_type_uint:
369bf215546Sopenharmony_ci      low_cond = low ? nir_ult(b, src, low) : NULL;
370bf215546Sopenharmony_ci      high_cond = high ? nir_ult(b, high, src) : NULL;
371bf215546Sopenharmony_ci      break;
372bf215546Sopenharmony_ci   case nir_type_float:
373bf215546Sopenharmony_ci      low_cond = low ? nir_fge(b, low, src) : NULL;
374bf215546Sopenharmony_ci      high_cond = high ? nir_fge(b, src, high) : NULL;
375bf215546Sopenharmony_ci      break;
376bf215546Sopenharmony_ci   default:
377bf215546Sopenharmony_ci      unreachable("clamping from unknown type");
378bf215546Sopenharmony_ci   }
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci   nir_ssa_def *val_low = low, *val_high = high;
381bf215546Sopenharmony_ci   if (val_type != src_type) {
382bf215546Sopenharmony_ci      nir_get_clamp_limits(b, val_type, dest_type, &val_low, &val_high);
383bf215546Sopenharmony_ci   }
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   nir_ssa_def *res = val;
386bf215546Sopenharmony_ci   if (low_cond && val_low)
387bf215546Sopenharmony_ci      res = nir_bcsel(b, low_cond, val_low, res);
388bf215546Sopenharmony_ci   if (high_cond && val_high)
389bf215546Sopenharmony_ci      res = nir_bcsel(b, high_cond, val_high, res);
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci   return res;
392bf215546Sopenharmony_ci}
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_cistatic inline nir_rounding_mode
395bf215546Sopenharmony_cinir_simplify_conversion_rounding(nir_alu_type src_type,
396bf215546Sopenharmony_ci                                 nir_alu_type dest_type,
397bf215546Sopenharmony_ci                                 nir_rounding_mode rounding)
398bf215546Sopenharmony_ci{
399bf215546Sopenharmony_ci   nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type);
400bf215546Sopenharmony_ci   nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
401bf215546Sopenharmony_ci   unsigned src_bit_size = nir_alu_type_get_type_size(src_type);
402bf215546Sopenharmony_ci   unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
403bf215546Sopenharmony_ci   assert(src_bit_size > 0 && dest_bit_size > 0);
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci   if (rounding == nir_rounding_mode_undef)
406bf215546Sopenharmony_ci      return rounding;
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci   /* Pure integer conversion doesn't have any rounding */
409bf215546Sopenharmony_ci   if (src_base_type != nir_type_float &&
410bf215546Sopenharmony_ci       dest_base_type != nir_type_float)
411bf215546Sopenharmony_ci      return nir_rounding_mode_undef;
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci   /* Float down-casts don't round */
414bf215546Sopenharmony_ci   if (src_base_type == nir_type_float &&
415bf215546Sopenharmony_ci       dest_base_type == nir_type_float &&
416bf215546Sopenharmony_ci       dest_bit_size >= src_bit_size)
417bf215546Sopenharmony_ci      return nir_rounding_mode_undef;
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci   /* Regular float to int conversions are RTZ */
420bf215546Sopenharmony_ci   if (src_base_type == nir_type_float &&
421bf215546Sopenharmony_ci       dest_base_type != nir_type_float &&
422bf215546Sopenharmony_ci       rounding == nir_rounding_mode_rtz)
423bf215546Sopenharmony_ci      return nir_rounding_mode_undef;
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   /* The CL spec requires regular conversions to float to be RTNE */
426bf215546Sopenharmony_ci   if (dest_base_type == nir_type_float &&
427bf215546Sopenharmony_ci       rounding == nir_rounding_mode_rtne)
428bf215546Sopenharmony_ci      return nir_rounding_mode_undef;
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_ci   /* Couldn't simplify */
431bf215546Sopenharmony_ci   return rounding;
432bf215546Sopenharmony_ci}
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_cistatic inline nir_ssa_def *
435bf215546Sopenharmony_cinir_convert_with_rounding(nir_builder *b,
436bf215546Sopenharmony_ci                          nir_ssa_def *src, nir_alu_type src_type,
437bf215546Sopenharmony_ci                          nir_alu_type dest_type,
438bf215546Sopenharmony_ci                          nir_rounding_mode round,
439bf215546Sopenharmony_ci                          bool clamp)
440bf215546Sopenharmony_ci{
441bf215546Sopenharmony_ci   /* Some stuff wants sized types */
442bf215546Sopenharmony_ci   assert(nir_alu_type_get_type_size(src_type) == 0 ||
443bf215546Sopenharmony_ci          nir_alu_type_get_type_size(src_type) == src->bit_size);
444bf215546Sopenharmony_ci   src_type |= src->bit_size;
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   /* Split types from bit sizes */
447bf215546Sopenharmony_ci   nir_alu_type src_base_type = nir_alu_type_get_base_type(src_type);
448bf215546Sopenharmony_ci   nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
449bf215546Sopenharmony_ci   unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   /* Try to simplify the conversion if we can */
452bf215546Sopenharmony_ci   clamp = clamp &&
453bf215546Sopenharmony_ci      !nir_alu_type_range_contains_type_range(dest_type, src_type);
454bf215546Sopenharmony_ci   round = nir_simplify_conversion_rounding(src_type, dest_type, round);
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci   /* For float -> int/uint conversions, we might not be able to represent
457bf215546Sopenharmony_ci    * the destination range in the source float accurately. For these cases,
458bf215546Sopenharmony_ci    * do the comparison in float range, but the bcsel in the destination range.
459bf215546Sopenharmony_ci    */
460bf215546Sopenharmony_ci   bool clamp_after_conversion = clamp &&
461bf215546Sopenharmony_ci      src_base_type == nir_type_float &&
462bf215546Sopenharmony_ci      dest_base_type != nir_type_float;
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci   /*
465bf215546Sopenharmony_ci    * If we don't care about rounding and clamping, we can just use NIR's
466bf215546Sopenharmony_ci    * built-in ops. There is also a special case for SPIR-V in shaders, where
467bf215546Sopenharmony_ci    * f32/f64 -> f16 conversions can have one of two rounding modes applied,
468bf215546Sopenharmony_ci    * which NIR has built-in opcodes for.
469bf215546Sopenharmony_ci    *
470bf215546Sopenharmony_ci    * For the rest, we have our own implementation of rounding and clamping.
471bf215546Sopenharmony_ci    */
472bf215546Sopenharmony_ci   bool trivial_convert;
473bf215546Sopenharmony_ci   if (!clamp && round == nir_rounding_mode_undef) {
474bf215546Sopenharmony_ci      trivial_convert = true;
475bf215546Sopenharmony_ci   } else if (!clamp && src_type == nir_type_float32 &&
476bf215546Sopenharmony_ci                        dest_type == nir_type_float16 &&
477bf215546Sopenharmony_ci                        (round == nir_rounding_mode_rtne ||
478bf215546Sopenharmony_ci                         round == nir_rounding_mode_rtz)) {
479bf215546Sopenharmony_ci      trivial_convert = true;
480bf215546Sopenharmony_ci   } else {
481bf215546Sopenharmony_ci      trivial_convert = false;
482bf215546Sopenharmony_ci   }
483bf215546Sopenharmony_ci   if (trivial_convert) {
484bf215546Sopenharmony_ci      nir_op op = nir_type_conversion_op(src_type, dest_type, round);
485bf215546Sopenharmony_ci      return nir_build_alu(b, op, src, NULL, NULL, NULL);
486bf215546Sopenharmony_ci   }
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   nir_ssa_def *dest = src;
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci   /* clamp the result into range */
491bf215546Sopenharmony_ci   if (clamp && !clamp_after_conversion)
492bf215546Sopenharmony_ci      dest = nir_clamp_to_type_range(b, src, src_type, src, src_type, dest_type);
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   /* round with selected rounding mode */
495bf215546Sopenharmony_ci   if (!trivial_convert && round != nir_rounding_mode_undef) {
496bf215546Sopenharmony_ci      if (src_base_type == nir_type_float) {
497bf215546Sopenharmony_ci         if (dest_base_type == nir_type_float) {
498bf215546Sopenharmony_ci            dest = nir_round_float_to_float(b, dest, dest_bit_size, round);
499bf215546Sopenharmony_ci         } else {
500bf215546Sopenharmony_ci            dest = nir_round_float_to_int(b, dest, round);
501bf215546Sopenharmony_ci         }
502bf215546Sopenharmony_ci      } else {
503bf215546Sopenharmony_ci         dest = nir_round_int_to_float(b, dest, src_type, dest_bit_size, round);
504bf215546Sopenharmony_ci      }
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci      round = nir_rounding_mode_undef;
507bf215546Sopenharmony_ci   }
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_ci   /* now we can convert the value */
510bf215546Sopenharmony_ci   nir_op op = nir_type_conversion_op(src_type, dest_type, round);
511bf215546Sopenharmony_ci   dest = nir_build_alu(b, op, dest, NULL, NULL, NULL);
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci   if (clamp_after_conversion)
514bf215546Sopenharmony_ci      dest = nir_clamp_to_type_range(b, dest, dest_type, src, src_type, dest_type);
515bf215546Sopenharmony_ci
516bf215546Sopenharmony_ci   return dest;
517bf215546Sopenharmony_ci}
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci#ifdef __cplusplus
520bf215546Sopenharmony_ci}
521bf215546Sopenharmony_ci#endif
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci#endif /* NIR_CONVERSION_BUILDER_H */
524