1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nir.h"
25bf215546Sopenharmony_ci#include "nir_builder.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#define COND_LOWER_OP(b, name, ...)                                   \
28bf215546Sopenharmony_ci        (b->shader->options->lower_int64_options &                    \
29bf215546Sopenharmony_ci         nir_lower_int64_op_to_options_mask(nir_op_##name)) ?         \
30bf215546Sopenharmony_ci        lower_##name##64(b, __VA_ARGS__) : nir_##name(b, __VA_ARGS__)
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#define COND_LOWER_CMP(b, name, ...)                                  \
33bf215546Sopenharmony_ci        (b->shader->options->lower_int64_options &                    \
34bf215546Sopenharmony_ci         nir_lower_int64_op_to_options_mask(nir_op_##name)) ?         \
35bf215546Sopenharmony_ci        lower_int64_compare(b, nir_op_##name, __VA_ARGS__) :          \
36bf215546Sopenharmony_ci        nir_##name(b, __VA_ARGS__)
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#define COND_LOWER_CAST(b, name, ...)                                 \
39bf215546Sopenharmony_ci        (b->shader->options->lower_int64_options &                    \
40bf215546Sopenharmony_ci         nir_lower_int64_op_to_options_mask(nir_op_##name)) ?         \
41bf215546Sopenharmony_ci        lower_##name(b, __VA_ARGS__) :                                \
42bf215546Sopenharmony_ci        nir_##name(b, __VA_ARGS__)
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_cistatic nir_ssa_def *
45bf215546Sopenharmony_cilower_b2i64(nir_builder *b, nir_ssa_def *x)
46bf215546Sopenharmony_ci{
47bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, nir_b2i32(b, x), nir_imm_int(b, 0));
48bf215546Sopenharmony_ci}
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cistatic nir_ssa_def *
51bf215546Sopenharmony_cilower_i2b(nir_builder *b, nir_ssa_def *x)
52bf215546Sopenharmony_ci{
53bf215546Sopenharmony_ci   return nir_ine(b, nir_ior(b, nir_unpack_64_2x32_split_x(b, x),
54bf215546Sopenharmony_ci                                nir_unpack_64_2x32_split_y(b, x)),
55bf215546Sopenharmony_ci                     nir_imm_int(b, 0));
56bf215546Sopenharmony_ci}
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_cistatic nir_ssa_def *
59bf215546Sopenharmony_cilower_i2i8(nir_builder *b, nir_ssa_def *x)
60bf215546Sopenharmony_ci{
61bf215546Sopenharmony_ci   return nir_i2i8(b, nir_unpack_64_2x32_split_x(b, x));
62bf215546Sopenharmony_ci}
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cistatic nir_ssa_def *
65bf215546Sopenharmony_cilower_i2i16(nir_builder *b, nir_ssa_def *x)
66bf215546Sopenharmony_ci{
67bf215546Sopenharmony_ci   return nir_i2i16(b, nir_unpack_64_2x32_split_x(b, x));
68bf215546Sopenharmony_ci}
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_cistatic nir_ssa_def *
72bf215546Sopenharmony_cilower_i2i32(nir_builder *b, nir_ssa_def *x)
73bf215546Sopenharmony_ci{
74bf215546Sopenharmony_ci   return nir_unpack_64_2x32_split_x(b, x);
75bf215546Sopenharmony_ci}
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_cistatic nir_ssa_def *
78bf215546Sopenharmony_cilower_i2i64(nir_builder *b, nir_ssa_def *x)
79bf215546Sopenharmony_ci{
80bf215546Sopenharmony_ci   nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_i2i32(b, x);
81bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, x32, nir_ishr_imm(b, x32, 31));
82bf215546Sopenharmony_ci}
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_cistatic nir_ssa_def *
85bf215546Sopenharmony_cilower_u2u8(nir_builder *b, nir_ssa_def *x)
86bf215546Sopenharmony_ci{
87bf215546Sopenharmony_ci   return nir_u2u8(b, nir_unpack_64_2x32_split_x(b, x));
88bf215546Sopenharmony_ci}
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_cistatic nir_ssa_def *
91bf215546Sopenharmony_cilower_u2u16(nir_builder *b, nir_ssa_def *x)
92bf215546Sopenharmony_ci{
93bf215546Sopenharmony_ci   return nir_u2u16(b, nir_unpack_64_2x32_split_x(b, x));
94bf215546Sopenharmony_ci}
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_cistatic nir_ssa_def *
97bf215546Sopenharmony_cilower_u2u32(nir_builder *b, nir_ssa_def *x)
98bf215546Sopenharmony_ci{
99bf215546Sopenharmony_ci   return nir_unpack_64_2x32_split_x(b, x);
100bf215546Sopenharmony_ci}
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_cistatic nir_ssa_def *
103bf215546Sopenharmony_cilower_u2u64(nir_builder *b, nir_ssa_def *x)
104bf215546Sopenharmony_ci{
105bf215546Sopenharmony_ci   nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_u2u32(b, x);
106bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, x32, nir_imm_int(b, 0));
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_cistatic nir_ssa_def *
110bf215546Sopenharmony_cilower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, nir_ssa_def *y)
111bf215546Sopenharmony_ci{
112bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
113bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
114bf215546Sopenharmony_ci   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
115bf215546Sopenharmony_ci   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
118bf215546Sopenharmony_ci                                    nir_bcsel(b, cond, x_hi, y_hi));
119bf215546Sopenharmony_ci}
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_cistatic nir_ssa_def *
122bf215546Sopenharmony_cilower_inot64(nir_builder *b, nir_ssa_def *x)
123bf215546Sopenharmony_ci{
124bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
125bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi));
128bf215546Sopenharmony_ci}
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_cistatic nir_ssa_def *
131bf215546Sopenharmony_cilower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
132bf215546Sopenharmony_ci{
133bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
134bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
135bf215546Sopenharmony_ci   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
136bf215546Sopenharmony_ci   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
139bf215546Sopenharmony_ci                                    nir_iand(b, x_hi, y_hi));
140bf215546Sopenharmony_ci}
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_cistatic nir_ssa_def *
143bf215546Sopenharmony_cilower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
144bf215546Sopenharmony_ci{
145bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
146bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
147bf215546Sopenharmony_ci   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
148bf215546Sopenharmony_ci   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
151bf215546Sopenharmony_ci                                    nir_ior(b, x_hi, y_hi));
152bf215546Sopenharmony_ci}
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_cistatic nir_ssa_def *
155bf215546Sopenharmony_cilower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
156bf215546Sopenharmony_ci{
157bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
158bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
159bf215546Sopenharmony_ci   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
160bf215546Sopenharmony_ci   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
163bf215546Sopenharmony_ci                                    nir_ixor(b, x_hi, y_hi));
164bf215546Sopenharmony_ci}
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_cistatic nir_ssa_def *
167bf215546Sopenharmony_cilower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
168bf215546Sopenharmony_ci{
169bf215546Sopenharmony_ci   /* Implemented as
170bf215546Sopenharmony_ci    *
171bf215546Sopenharmony_ci    * uint64_t lshift(uint64_t x, int c)
172bf215546Sopenharmony_ci    * {
173bf215546Sopenharmony_ci    *    if (c == 0) return x;
174bf215546Sopenharmony_ci    *
175bf215546Sopenharmony_ci    *    uint32_t lo = LO(x), hi = HI(x);
176bf215546Sopenharmony_ci    *
177bf215546Sopenharmony_ci    *    if (c < 32) {
178bf215546Sopenharmony_ci    *       uint32_t lo_shifted = lo << c;
179bf215546Sopenharmony_ci    *       uint32_t hi_shifted = hi << c;
180bf215546Sopenharmony_ci    *       uint32_t lo_shifted_hi = lo >> abs(32 - c);
181bf215546Sopenharmony_ci    *       return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
182bf215546Sopenharmony_ci    *    } else {
183bf215546Sopenharmony_ci    *       uint32_t lo_shifted_hi = lo << abs(32 - c);
184bf215546Sopenharmony_ci    *       return pack_64(0, lo_shifted_hi);
185bf215546Sopenharmony_ci    *    }
186bf215546Sopenharmony_ci    * }
187bf215546Sopenharmony_ci    */
188bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
189bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
192bf215546Sopenharmony_ci   nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
193bf215546Sopenharmony_ci   nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
194bf215546Sopenharmony_ci   nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   nir_ssa_def *res_if_lt_32 =
197bf215546Sopenharmony_ci      nir_pack_64_2x32_split(b, lo_shifted,
198bf215546Sopenharmony_ci                                nir_ior(b, hi_shifted, lo_shifted_hi));
199bf215546Sopenharmony_ci   nir_ssa_def *res_if_ge_32 =
200bf215546Sopenharmony_ci      nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
201bf215546Sopenharmony_ci                                nir_ishl(b, x_lo, reverse_count));
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   return nir_bcsel(b, nir_ieq_imm(b, y, 0), x,
204bf215546Sopenharmony_ci                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
205bf215546Sopenharmony_ci                                 res_if_ge_32, res_if_lt_32));
206bf215546Sopenharmony_ci}
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_cistatic nir_ssa_def *
209bf215546Sopenharmony_cilower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
210bf215546Sopenharmony_ci{
211bf215546Sopenharmony_ci   /* Implemented as
212bf215546Sopenharmony_ci    *
213bf215546Sopenharmony_ci    * uint64_t arshift(uint64_t x, int c)
214bf215546Sopenharmony_ci    * {
215bf215546Sopenharmony_ci    *    if (c == 0) return x;
216bf215546Sopenharmony_ci    *
217bf215546Sopenharmony_ci    *    uint32_t lo = LO(x);
218bf215546Sopenharmony_ci    *    int32_t  hi = HI(x);
219bf215546Sopenharmony_ci    *
220bf215546Sopenharmony_ci    *    if (c < 32) {
221bf215546Sopenharmony_ci    *       uint32_t lo_shifted = lo >> c;
222bf215546Sopenharmony_ci    *       uint32_t hi_shifted = hi >> c;
223bf215546Sopenharmony_ci    *       uint32_t hi_shifted_lo = hi << abs(32 - c);
224bf215546Sopenharmony_ci    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
225bf215546Sopenharmony_ci    *    } else {
226bf215546Sopenharmony_ci    *       uint32_t hi_shifted = hi >> 31;
227bf215546Sopenharmony_ci    *       uint32_t hi_shifted_lo = hi >> abs(32 - c);
228bf215546Sopenharmony_ci    *       return pack_64(hi_shifted, hi_shifted_lo);
229bf215546Sopenharmony_ci    *    }
230bf215546Sopenharmony_ci    * }
231bf215546Sopenharmony_ci    */
232bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
233bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
236bf215546Sopenharmony_ci   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
237bf215546Sopenharmony_ci   nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
238bf215546Sopenharmony_ci   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   nir_ssa_def *res_if_lt_32 =
241bf215546Sopenharmony_ci      nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
242bf215546Sopenharmony_ci                                hi_shifted);
243bf215546Sopenharmony_ci   nir_ssa_def *res_if_ge_32 =
244bf215546Sopenharmony_ci      nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
245bf215546Sopenharmony_ci                                nir_ishr(b, x_hi, nir_imm_int(b, 31)));
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci   return nir_bcsel(b, nir_ieq_imm(b, y, 0), x,
248bf215546Sopenharmony_ci                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
249bf215546Sopenharmony_ci                                 res_if_ge_32, res_if_lt_32));
250bf215546Sopenharmony_ci}
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_cistatic nir_ssa_def *
253bf215546Sopenharmony_cilower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
254bf215546Sopenharmony_ci{
255bf215546Sopenharmony_ci   /* Implemented as
256bf215546Sopenharmony_ci    *
257bf215546Sopenharmony_ci    * uint64_t rshift(uint64_t x, int c)
258bf215546Sopenharmony_ci    * {
259bf215546Sopenharmony_ci    *    if (c == 0) return x;
260bf215546Sopenharmony_ci    *
261bf215546Sopenharmony_ci    *    uint32_t lo = LO(x), hi = HI(x);
262bf215546Sopenharmony_ci    *
263bf215546Sopenharmony_ci    *    if (c < 32) {
264bf215546Sopenharmony_ci    *       uint32_t lo_shifted = lo >> c;
265bf215546Sopenharmony_ci    *       uint32_t hi_shifted = hi >> c;
266bf215546Sopenharmony_ci    *       uint32_t hi_shifted_lo = hi << abs(32 - c);
267bf215546Sopenharmony_ci    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
268bf215546Sopenharmony_ci    *    } else {
269bf215546Sopenharmony_ci    *       uint32_t hi_shifted_lo = hi >> abs(32 - c);
270bf215546Sopenharmony_ci    *       return pack_64(0, hi_shifted_lo);
271bf215546Sopenharmony_ci    *    }
272bf215546Sopenharmony_ci    * }
273bf215546Sopenharmony_ci    */
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
276bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
279bf215546Sopenharmony_ci   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
280bf215546Sopenharmony_ci   nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y);
281bf215546Sopenharmony_ci   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci   nir_ssa_def *res_if_lt_32 =
284bf215546Sopenharmony_ci      nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
285bf215546Sopenharmony_ci                                hi_shifted);
286bf215546Sopenharmony_ci   nir_ssa_def *res_if_ge_32 =
287bf215546Sopenharmony_ci      nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count),
288bf215546Sopenharmony_ci                                nir_imm_int(b, 0));
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci   return nir_bcsel(b, nir_ieq_imm(b, y, 0), x,
291bf215546Sopenharmony_ci                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
292bf215546Sopenharmony_ci                                 res_if_ge_32, res_if_lt_32));
293bf215546Sopenharmony_ci}
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_cistatic nir_ssa_def *
296bf215546Sopenharmony_cilower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
297bf215546Sopenharmony_ci{
298bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
299bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
300bf215546Sopenharmony_ci   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
301bf215546Sopenharmony_ci   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci   nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
304bf215546Sopenharmony_ci   nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, x_lo));
305bf215546Sopenharmony_ci   nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, res_lo, res_hi);
308bf215546Sopenharmony_ci}
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_cistatic nir_ssa_def *
311bf215546Sopenharmony_cilower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
312bf215546Sopenharmony_ci{
313bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
314bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
315bf215546Sopenharmony_ci   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
316bf215546Sopenharmony_ci   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
319bf215546Sopenharmony_ci   nir_ssa_def *borrow = nir_ineg(b, nir_b2i32(b, nir_ult(b, x_lo, y_lo)));
320bf215546Sopenharmony_ci   nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, res_lo, res_hi);
323bf215546Sopenharmony_ci}
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_cistatic nir_ssa_def *
326bf215546Sopenharmony_cilower_ineg64(nir_builder *b, nir_ssa_def *x)
327bf215546Sopenharmony_ci{
328bf215546Sopenharmony_ci   /* Since isub is the same number of instructions (with better dependencies)
329bf215546Sopenharmony_ci    * as iadd, subtraction is actually more efficient for ineg than the usual
330bf215546Sopenharmony_ci    * 2's complement "flip the bits and add one".
331bf215546Sopenharmony_ci    */
332bf215546Sopenharmony_ci   return lower_isub64(b, nir_imm_int64(b, 0), x);
333bf215546Sopenharmony_ci}
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_cistatic nir_ssa_def *
336bf215546Sopenharmony_cilower_iabs64(nir_builder *b, nir_ssa_def *x)
337bf215546Sopenharmony_ci{
338bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
339bf215546Sopenharmony_ci   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
340bf215546Sopenharmony_ci   return nir_bcsel(b, x_is_neg, nir_ineg(b, x), x);
341bf215546Sopenharmony_ci}
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_cistatic nir_ssa_def *
344bf215546Sopenharmony_cilower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y)
345bf215546Sopenharmony_ci{
346bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
347bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
348bf215546Sopenharmony_ci   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
349bf215546Sopenharmony_ci   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci   switch (op) {
352bf215546Sopenharmony_ci   case nir_op_ieq:
353bf215546Sopenharmony_ci      return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, y_lo));
354bf215546Sopenharmony_ci   case nir_op_ine:
355bf215546Sopenharmony_ci      return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, y_lo));
356bf215546Sopenharmony_ci   case nir_op_ult:
357bf215546Sopenharmony_ci      return nir_ior(b, nir_ult(b, x_hi, y_hi),
358bf215546Sopenharmony_ci                        nir_iand(b, nir_ieq(b, x_hi, y_hi),
359bf215546Sopenharmony_ci                                    nir_ult(b, x_lo, y_lo)));
360bf215546Sopenharmony_ci   case nir_op_ilt:
361bf215546Sopenharmony_ci      return nir_ior(b, nir_ilt(b, x_hi, y_hi),
362bf215546Sopenharmony_ci                        nir_iand(b, nir_ieq(b, x_hi, y_hi),
363bf215546Sopenharmony_ci                                    nir_ult(b, x_lo, y_lo)));
364bf215546Sopenharmony_ci      break;
365bf215546Sopenharmony_ci   case nir_op_uge:
366bf215546Sopenharmony_ci      /* Lower as !(x < y) in the hopes of better CSE */
367bf215546Sopenharmony_ci      return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y));
368bf215546Sopenharmony_ci   case nir_op_ige:
369bf215546Sopenharmony_ci      /* Lower as !(x < y) in the hopes of better CSE */
370bf215546Sopenharmony_ci      return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y));
371bf215546Sopenharmony_ci   default:
372bf215546Sopenharmony_ci      unreachable("Invalid comparison");
373bf215546Sopenharmony_ci   }
374bf215546Sopenharmony_ci}
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_cistatic nir_ssa_def *
377bf215546Sopenharmony_cilower_umax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
378bf215546Sopenharmony_ci{
379bf215546Sopenharmony_ci   return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), y, x);
380bf215546Sopenharmony_ci}
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_cistatic nir_ssa_def *
383bf215546Sopenharmony_cilower_imax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
384bf215546Sopenharmony_ci{
385bf215546Sopenharmony_ci   return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), y, x);
386bf215546Sopenharmony_ci}
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_cistatic nir_ssa_def *
389bf215546Sopenharmony_cilower_umin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
390bf215546Sopenharmony_ci{
391bf215546Sopenharmony_ci   return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), x, y);
392bf215546Sopenharmony_ci}
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_cistatic nir_ssa_def *
395bf215546Sopenharmony_cilower_imin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
396bf215546Sopenharmony_ci{
397bf215546Sopenharmony_ci   return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y);
398bf215546Sopenharmony_ci}
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_cistatic nir_ssa_def *
401bf215546Sopenharmony_cilower_mul_2x32_64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
402bf215546Sopenharmony_ci                  bool sign_extend)
403bf215546Sopenharmony_ci{
404bf215546Sopenharmony_ci   nir_ssa_def *res_hi = sign_extend ? nir_imul_high(b, x, y)
405bf215546Sopenharmony_ci                                     : nir_umul_high(b, x, y);
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, nir_imul(b, x, y), res_hi);
408bf215546Sopenharmony_ci}
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_cistatic nir_ssa_def *
411bf215546Sopenharmony_cilower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
412bf215546Sopenharmony_ci{
413bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
414bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
415bf215546Sopenharmony_ci   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
416bf215546Sopenharmony_ci   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci   nir_ssa_def *mul_lo = nir_umul_2x32_64(b, x_lo, y_lo);
419bf215546Sopenharmony_ci   nir_ssa_def *res_hi = nir_iadd(b, nir_unpack_64_2x32_split_y(b, mul_lo),
420bf215546Sopenharmony_ci                         nir_iadd(b, nir_imul(b, x_lo, y_hi),
421bf215546Sopenharmony_ci                                     nir_imul(b, x_hi, y_lo)));
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, nir_unpack_64_2x32_split_x(b, mul_lo),
424bf215546Sopenharmony_ci                                 res_hi);
425bf215546Sopenharmony_ci}
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_cistatic nir_ssa_def *
428bf215546Sopenharmony_cilower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
429bf215546Sopenharmony_ci                 bool sign_extend)
430bf215546Sopenharmony_ci{
431bf215546Sopenharmony_ci   nir_ssa_def *x32[4], *y32[4];
432bf215546Sopenharmony_ci   x32[0] = nir_unpack_64_2x32_split_x(b, x);
433bf215546Sopenharmony_ci   x32[1] = nir_unpack_64_2x32_split_y(b, x);
434bf215546Sopenharmony_ci   if (sign_extend) {
435bf215546Sopenharmony_ci      x32[2] = x32[3] = nir_ishr_imm(b, x32[1], 31);
436bf215546Sopenharmony_ci   } else {
437bf215546Sopenharmony_ci      x32[2] = x32[3] = nir_imm_int(b, 0);
438bf215546Sopenharmony_ci   }
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   y32[0] = nir_unpack_64_2x32_split_x(b, y);
441bf215546Sopenharmony_ci   y32[1] = nir_unpack_64_2x32_split_y(b, y);
442bf215546Sopenharmony_ci   if (sign_extend) {
443bf215546Sopenharmony_ci      y32[2] = y32[3] = nir_ishr_imm(b, y32[1], 31);
444bf215546Sopenharmony_ci   } else {
445bf215546Sopenharmony_ci      y32[2] = y32[3] = nir_imm_int(b, 0);
446bf215546Sopenharmony_ci   }
447bf215546Sopenharmony_ci
448bf215546Sopenharmony_ci   nir_ssa_def *res[8] = { NULL, };
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci   /* Yes, the following generates a pile of code.  However, we throw res[0]
451bf215546Sopenharmony_ci    * and res[1] away in the end and, if we're in the umul case, four of our
452bf215546Sopenharmony_ci    * eight dword operands will be constant zero and opt_algebraic will clean
453bf215546Sopenharmony_ci    * this up nicely.
454bf215546Sopenharmony_ci    */
455bf215546Sopenharmony_ci   for (unsigned i = 0; i < 4; i++) {
456bf215546Sopenharmony_ci      nir_ssa_def *carry = NULL;
457bf215546Sopenharmony_ci      for (unsigned j = 0; j < 4; j++) {
458bf215546Sopenharmony_ci         /* The maximum values of x32[i] and y32[j] are UINT32_MAX so the
459bf215546Sopenharmony_ci          * maximum value of tmp is UINT32_MAX * UINT32_MAX.  The maximum
460bf215546Sopenharmony_ci          * value that will fit in tmp is
461bf215546Sopenharmony_ci          *
462bf215546Sopenharmony_ci          *    UINT64_MAX = UINT32_MAX << 32 + UINT32_MAX
463bf215546Sopenharmony_ci          *               = UINT32_MAX * (UINT32_MAX + 1) + UINT32_MAX
464bf215546Sopenharmony_ci          *               = UINT32_MAX * UINT32_MAX + 2 * UINT32_MAX
465bf215546Sopenharmony_ci          *
466bf215546Sopenharmony_ci          * so we're guaranteed that we can add in two more 32-bit values
467bf215546Sopenharmony_ci          * without overflowing tmp.
468bf215546Sopenharmony_ci          */
469bf215546Sopenharmony_ci         nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[j]);
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci         if (res[i + j])
472bf215546Sopenharmony_ci            tmp = nir_iadd(b, tmp, nir_u2u64(b, res[i + j]));
473bf215546Sopenharmony_ci         if (carry)
474bf215546Sopenharmony_ci            tmp = nir_iadd(b, tmp, carry);
475bf215546Sopenharmony_ci         res[i + j] = nir_u2u32(b, tmp);
476bf215546Sopenharmony_ci         carry = nir_ushr_imm(b, tmp, 32);
477bf215546Sopenharmony_ci      }
478bf215546Sopenharmony_ci      res[i + 4] = nir_u2u32(b, carry);
479bf215546Sopenharmony_ci   }
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, res[2], res[3]);
482bf215546Sopenharmony_ci}
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_cistatic nir_ssa_def *
485bf215546Sopenharmony_cilower_isign64(nir_builder *b, nir_ssa_def *x)
486bf215546Sopenharmony_ci{
487bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
488bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci   nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
491bf215546Sopenharmony_ci   nir_ssa_def *res_hi = nir_ishr_imm(b, x_hi, 31);
492bf215546Sopenharmony_ci   nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i32(b, is_non_zero));
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, res_lo, res_hi);
495bf215546Sopenharmony_ci}
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_cistatic void
498bf215546Sopenharmony_cilower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d,
499bf215546Sopenharmony_ci                   nir_ssa_def **q, nir_ssa_def **r)
500bf215546Sopenharmony_ci{
501bf215546Sopenharmony_ci   /* TODO: We should specially handle the case where the denominator is a
502bf215546Sopenharmony_ci    * constant.  In that case, we should be able to reduce it to a multiply by
503bf215546Sopenharmony_ci    * a constant, some shifts, and an add.
504bf215546Sopenharmony_ci    */
505bf215546Sopenharmony_ci   nir_ssa_def *n_lo = nir_unpack_64_2x32_split_x(b, n);
506bf215546Sopenharmony_ci   nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
507bf215546Sopenharmony_ci   nir_ssa_def *d_lo = nir_unpack_64_2x32_split_x(b, d);
508bf215546Sopenharmony_ci   nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci   nir_ssa_def *q_lo = nir_imm_zero(b, n->num_components, 32);
511bf215546Sopenharmony_ci   nir_ssa_def *q_hi = nir_imm_zero(b, n->num_components, 32);
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci   nir_ssa_def *n_hi_before_if = n_hi;
514bf215546Sopenharmony_ci   nir_ssa_def *q_hi_before_if = q_hi;
515bf215546Sopenharmony_ci
516bf215546Sopenharmony_ci   /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
517bf215546Sopenharmony_ci    * greater than 32 bits to occur.  If the upper 32 bits of the numerator
518bf215546Sopenharmony_ci    * are zero, it is impossible for (denom << [63, 32]) <= numer unless
519bf215546Sopenharmony_ci    * denom == 0.
520bf215546Sopenharmony_ci    */
521bf215546Sopenharmony_ci   nir_ssa_def *need_high_div =
522bf215546Sopenharmony_ci      nir_iand(b, nir_ieq_imm(b, d_hi, 0), nir_uge(b, n_hi, d_lo));
523bf215546Sopenharmony_ci   nir_push_if(b, nir_bany(b, need_high_div));
524bf215546Sopenharmony_ci   {
525bf215546Sopenharmony_ci      /* If we only have one component, then the bany above goes away and
526bf215546Sopenharmony_ci       * this is always true within the if statement.
527bf215546Sopenharmony_ci       */
528bf215546Sopenharmony_ci      if (n->num_components == 1)
529bf215546Sopenharmony_ci         need_high_div = nir_imm_true(b);
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci      nir_ssa_def *log2_d_lo = nir_ufind_msb(b, d_lo);
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci      for (int i = 31; i >= 0; i--) {
534bf215546Sopenharmony_ci         /* if ((d.x << i) <= n.y) {
535bf215546Sopenharmony_ci          *    n.y -= d.x << i;
536bf215546Sopenharmony_ci          *    quot.y |= 1U << i;
537bf215546Sopenharmony_ci          * }
538bf215546Sopenharmony_ci          */
539bf215546Sopenharmony_ci         nir_ssa_def *d_shift = nir_ishl(b, d_lo, nir_imm_int(b, i));
540bf215546Sopenharmony_ci         nir_ssa_def *new_n_hi = nir_isub(b, n_hi, d_shift);
541bf215546Sopenharmony_ci         nir_ssa_def *new_q_hi = nir_ior(b, q_hi, nir_imm_int(b, 1u << i));
542bf215546Sopenharmony_ci         nir_ssa_def *cond = nir_iand(b, need_high_div,
543bf215546Sopenharmony_ci                                         nir_uge(b, n_hi, d_shift));
544bf215546Sopenharmony_ci         if (i != 0) {
545bf215546Sopenharmony_ci            /* log2_d_lo is always <= 31, so we don't need to bother with it
546bf215546Sopenharmony_ci             * in the last iteration.
547bf215546Sopenharmony_ci             */
548bf215546Sopenharmony_ci            cond = nir_iand(b, cond,
549bf215546Sopenharmony_ci                               nir_ige(b, nir_imm_int(b, 31 - i), log2_d_lo));
550bf215546Sopenharmony_ci         }
551bf215546Sopenharmony_ci         n_hi = nir_bcsel(b, cond, new_n_hi, n_hi);
552bf215546Sopenharmony_ci         q_hi = nir_bcsel(b, cond, new_q_hi, q_hi);
553bf215546Sopenharmony_ci      }
554bf215546Sopenharmony_ci   }
555bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
556bf215546Sopenharmony_ci   n_hi = nir_if_phi(b, n_hi, n_hi_before_if);
557bf215546Sopenharmony_ci   q_hi = nir_if_phi(b, q_hi, q_hi_before_if);
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci   nir_ssa_def *log2_denom = nir_ufind_msb(b, d_hi);
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   n = nir_pack_64_2x32_split(b, n_lo, n_hi);
562bf215546Sopenharmony_ci   d = nir_pack_64_2x32_split(b, d_lo, d_hi);
563bf215546Sopenharmony_ci   for (int i = 31; i >= 0; i--) {
564bf215546Sopenharmony_ci      /* if ((d64 << i) <= n64) {
565bf215546Sopenharmony_ci       *    n64 -= d64 << i;
566bf215546Sopenharmony_ci       *    quot.x |= 1U << i;
567bf215546Sopenharmony_ci       * }
568bf215546Sopenharmony_ci       */
569bf215546Sopenharmony_ci      nir_ssa_def *d_shift = nir_ishl(b, d, nir_imm_int(b, i));
570bf215546Sopenharmony_ci      nir_ssa_def *new_n = nir_isub(b, n, d_shift);
571bf215546Sopenharmony_ci      nir_ssa_def *new_q_lo = nir_ior(b, q_lo, nir_imm_int(b, 1u << i));
572bf215546Sopenharmony_ci      nir_ssa_def *cond = nir_uge(b, n, d_shift);
573bf215546Sopenharmony_ci      if (i != 0) {
574bf215546Sopenharmony_ci         /* log2_denom is always <= 31, so we don't need to bother with it
575bf215546Sopenharmony_ci          * in the last iteration.
576bf215546Sopenharmony_ci          */
577bf215546Sopenharmony_ci         cond = nir_iand(b, cond,
578bf215546Sopenharmony_ci                            nir_ige(b, nir_imm_int(b, 31 - i), log2_denom));
579bf215546Sopenharmony_ci      }
580bf215546Sopenharmony_ci      n = nir_bcsel(b, cond, new_n, n);
581bf215546Sopenharmony_ci      q_lo = nir_bcsel(b, cond, new_q_lo, q_lo);
582bf215546Sopenharmony_ci   }
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci   *q = nir_pack_64_2x32_split(b, q_lo, q_hi);
585bf215546Sopenharmony_ci   *r = n;
586bf215546Sopenharmony_ci}
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_cistatic nir_ssa_def *
589bf215546Sopenharmony_cilower_udiv64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
590bf215546Sopenharmony_ci{
591bf215546Sopenharmony_ci   nir_ssa_def *q, *r;
592bf215546Sopenharmony_ci   lower_udiv64_mod64(b, n, d, &q, &r);
593bf215546Sopenharmony_ci   return q;
594bf215546Sopenharmony_ci}
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_cistatic nir_ssa_def *
597bf215546Sopenharmony_cilower_idiv64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
598bf215546Sopenharmony_ci{
599bf215546Sopenharmony_ci   nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
600bf215546Sopenharmony_ci   nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci   nir_ssa_def *negate = nir_ine(b, nir_ilt(b, n_hi, nir_imm_int(b, 0)),
603bf215546Sopenharmony_ci                                    nir_ilt(b, d_hi, nir_imm_int(b, 0)));
604bf215546Sopenharmony_ci   nir_ssa_def *q, *r;
605bf215546Sopenharmony_ci   lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
606bf215546Sopenharmony_ci   return nir_bcsel(b, negate, nir_ineg(b, q), q);
607bf215546Sopenharmony_ci}
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_cistatic nir_ssa_def *
610bf215546Sopenharmony_cilower_umod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
611bf215546Sopenharmony_ci{
612bf215546Sopenharmony_ci   nir_ssa_def *q, *r;
613bf215546Sopenharmony_ci   lower_udiv64_mod64(b, n, d, &q, &r);
614bf215546Sopenharmony_ci   return r;
615bf215546Sopenharmony_ci}
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_cistatic nir_ssa_def *
618bf215546Sopenharmony_cilower_imod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
619bf215546Sopenharmony_ci{
620bf215546Sopenharmony_ci   nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
621bf215546Sopenharmony_ci   nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
622bf215546Sopenharmony_ci   nir_ssa_def *n_is_neg = nir_ilt(b, n_hi, nir_imm_int(b, 0));
623bf215546Sopenharmony_ci   nir_ssa_def *d_is_neg = nir_ilt(b, d_hi, nir_imm_int(b, 0));
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci   nir_ssa_def *q, *r;
626bf215546Sopenharmony_ci   lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci   nir_ssa_def *rem = nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci   return nir_bcsel(b, nir_ieq_imm(b, r, 0), nir_imm_int64(b, 0),
631bf215546Sopenharmony_ci          nir_bcsel(b, nir_ieq(b, n_is_neg, d_is_neg), rem,
632bf215546Sopenharmony_ci                       nir_iadd(b, rem, d)));
633bf215546Sopenharmony_ci}
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_cistatic nir_ssa_def *
636bf215546Sopenharmony_cilower_irem64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
637bf215546Sopenharmony_ci{
638bf215546Sopenharmony_ci   nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
639bf215546Sopenharmony_ci   nir_ssa_def *n_is_neg = nir_ilt(b, n_hi, nir_imm_int(b, 0));
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci   nir_ssa_def *q, *r;
642bf215546Sopenharmony_ci   lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
643bf215546Sopenharmony_ci   return nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
644bf215546Sopenharmony_ci}
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_cistatic nir_ssa_def *
647bf215546Sopenharmony_cilower_extract(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *c)
648bf215546Sopenharmony_ci{
649bf215546Sopenharmony_ci   assert(op == nir_op_extract_u8 || op == nir_op_extract_i8 ||
650bf215546Sopenharmony_ci          op == nir_op_extract_u16 || op == nir_op_extract_i16);
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci   const int chunk = nir_src_as_uint(nir_src_for_ssa(c));
653bf215546Sopenharmony_ci   const int chunk_bits =
654bf215546Sopenharmony_ci      (op == nir_op_extract_u8 || op == nir_op_extract_i8) ? 8 : 16;
655bf215546Sopenharmony_ci   const int num_chunks_in_32 = 32 / chunk_bits;
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci   nir_ssa_def *extract32;
658bf215546Sopenharmony_ci   if (chunk < num_chunks_in_32) {
659bf215546Sopenharmony_ci      extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_x(b, x),
660bf215546Sopenharmony_ci                                   nir_imm_int(b, chunk),
661bf215546Sopenharmony_ci                                   NULL, NULL);
662bf215546Sopenharmony_ci   } else {
663bf215546Sopenharmony_ci      extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_y(b, x),
664bf215546Sopenharmony_ci                                   nir_imm_int(b, chunk - num_chunks_in_32),
665bf215546Sopenharmony_ci                                   NULL, NULL);
666bf215546Sopenharmony_ci   }
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci   if (op == nir_op_extract_i8 || op == nir_op_extract_i16)
669bf215546Sopenharmony_ci      return lower_i2i64(b, extract32);
670bf215546Sopenharmony_ci   else
671bf215546Sopenharmony_ci      return lower_u2u64(b, extract32);
672bf215546Sopenharmony_ci}
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_cistatic nir_ssa_def *
675bf215546Sopenharmony_cilower_ufind_msb64(nir_builder *b, nir_ssa_def *x)
676bf215546Sopenharmony_ci{
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
679bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
680bf215546Sopenharmony_ci   nir_ssa_def *lo_count = nir_ufind_msb(b, x_lo);
681bf215546Sopenharmony_ci   nir_ssa_def *hi_count = nir_ufind_msb(b, x_hi);
682bf215546Sopenharmony_ci   nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0));
683bf215546Sopenharmony_ci   nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count);
684bf215546Sopenharmony_ci   return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
685bf215546Sopenharmony_ci}
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_cistatic nir_ssa_def *
688bf215546Sopenharmony_cilower_2f(nir_builder *b, nir_ssa_def *x, unsigned dest_bit_size,
689bf215546Sopenharmony_ci         bool src_is_signed)
690bf215546Sopenharmony_ci{
691bf215546Sopenharmony_ci   nir_ssa_def *x_sign = NULL;
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_ci   if (src_is_signed) {
694bf215546Sopenharmony_ci      x_sign = nir_bcsel(b, COND_LOWER_CMP(b, ilt, x, nir_imm_int64(b, 0)),
695bf215546Sopenharmony_ci                         nir_imm_floatN_t(b, -1, dest_bit_size),
696bf215546Sopenharmony_ci                         nir_imm_floatN_t(b, 1, dest_bit_size));
697bf215546Sopenharmony_ci      x = COND_LOWER_OP(b, iabs, x);
698bf215546Sopenharmony_ci   }
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci   nir_ssa_def *exp = COND_LOWER_OP(b, ufind_msb, x);
701bf215546Sopenharmony_ci   unsigned significand_bits;
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   switch (dest_bit_size) {
704bf215546Sopenharmony_ci   case 32:
705bf215546Sopenharmony_ci      significand_bits = 23;
706bf215546Sopenharmony_ci      break;
707bf215546Sopenharmony_ci   case 16:
708bf215546Sopenharmony_ci      significand_bits = 10;
709bf215546Sopenharmony_ci      break;
710bf215546Sopenharmony_ci   default:
711bf215546Sopenharmony_ci      unreachable("Invalid dest_bit_size");
712bf215546Sopenharmony_ci   }
713bf215546Sopenharmony_ci
714bf215546Sopenharmony_ci   nir_ssa_def *discard =
715bf215546Sopenharmony_ci      nir_imax(b, nir_isub(b, exp, nir_imm_int(b, significand_bits)),
716bf215546Sopenharmony_ci                  nir_imm_int(b, 0));
717bf215546Sopenharmony_ci   nir_ssa_def *significand =
718bf215546Sopenharmony_ci      COND_LOWER_CAST(b, u2u32, COND_LOWER_OP(b, ushr, x, discard));
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci   /* Round-to-nearest-even implementation:
721bf215546Sopenharmony_ci    * - if the non-representable part of the significand is higher than half
722bf215546Sopenharmony_ci    *   the minimum representable significand, we round-up
723bf215546Sopenharmony_ci    * - if the non-representable part of the significand is equal to half the
724bf215546Sopenharmony_ci    *   minimum representable significand and the representable part of the
725bf215546Sopenharmony_ci    *   significand is odd, we round-up
726bf215546Sopenharmony_ci    * - in any other case, we round-down
727bf215546Sopenharmony_ci    */
728bf215546Sopenharmony_ci   nir_ssa_def *lsb_mask = COND_LOWER_OP(b, ishl, nir_imm_int64(b, 1), discard);
729bf215546Sopenharmony_ci   nir_ssa_def *rem_mask = COND_LOWER_OP(b, isub, lsb_mask, nir_imm_int64(b, 1));
730bf215546Sopenharmony_ci   nir_ssa_def *half = COND_LOWER_OP(b, ishr, lsb_mask, nir_imm_int(b, 1));
731bf215546Sopenharmony_ci   nir_ssa_def *rem = COND_LOWER_OP(b, iand, x, rem_mask);
732bf215546Sopenharmony_ci   nir_ssa_def *halfway = nir_iand(b, COND_LOWER_CMP(b, ieq, rem, half),
733bf215546Sopenharmony_ci                                   nir_ine(b, discard, nir_imm_int(b, 0)));
734bf215546Sopenharmony_ci   nir_ssa_def *is_odd = nir_i2b(b, nir_iand(b, significand, nir_imm_int(b, 1)));
735bf215546Sopenharmony_ci   nir_ssa_def *round_up = nir_ior(b, COND_LOWER_CMP(b, ilt, half, rem),
736bf215546Sopenharmony_ci                                   nir_iand(b, halfway, is_odd));
737bf215546Sopenharmony_ci   significand = nir_iadd(b, significand, nir_b2i32(b, round_up));
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci   nir_ssa_def *res;
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci   if (dest_bit_size == 32)
742bf215546Sopenharmony_ci      res = nir_fmul(b, nir_u2f32(b, significand),
743bf215546Sopenharmony_ci                     nir_fexp2(b, nir_u2f32(b, discard)));
744bf215546Sopenharmony_ci   else
745bf215546Sopenharmony_ci      res = nir_fmul(b, nir_u2f16(b, significand),
746bf215546Sopenharmony_ci                     nir_fexp2(b, nir_u2f16(b, discard)));
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci   if (src_is_signed)
749bf215546Sopenharmony_ci      res = nir_fmul(b, res, x_sign);
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci   return res;
752bf215546Sopenharmony_ci}
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_cistatic nir_ssa_def *
755bf215546Sopenharmony_cilower_f2(nir_builder *b, nir_ssa_def *x, bool dst_is_signed)
756bf215546Sopenharmony_ci{
757bf215546Sopenharmony_ci   assert(x->bit_size == 16 || x->bit_size == 32);
758bf215546Sopenharmony_ci   nir_ssa_def *x_sign = NULL;
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ci   if (dst_is_signed)
761bf215546Sopenharmony_ci      x_sign = nir_fsign(b, x);
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ci   x = nir_ftrunc(b, x);
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ci   if (dst_is_signed)
766bf215546Sopenharmony_ci      x = nir_fabs(b, x);
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci   nir_ssa_def *res;
769bf215546Sopenharmony_ci   if (x->bit_size < 32) {
770bf215546Sopenharmony_ci      res = nir_pack_64_2x32_split(b, nir_f2u32(b, x), nir_imm_int(b, 0));
771bf215546Sopenharmony_ci   } else {
772bf215546Sopenharmony_ci      nir_ssa_def *div = nir_imm_floatN_t(b, 1ULL << 32, x->bit_size);
773bf215546Sopenharmony_ci      nir_ssa_def *res_hi = nir_f2u32(b, nir_fdiv(b, x, div));
774bf215546Sopenharmony_ci      nir_ssa_def *res_lo = nir_f2u32(b, nir_frem(b, x, div));
775bf215546Sopenharmony_ci      res = nir_pack_64_2x32_split(b, res_lo, res_hi);
776bf215546Sopenharmony_ci   }
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci   if (dst_is_signed)
779bf215546Sopenharmony_ci      res = nir_bcsel(b, nir_flt(b, x_sign, nir_imm_floatN_t(b, 0, x->bit_size)),
780bf215546Sopenharmony_ci                      nir_ineg(b, res), res);
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci   return res;
783bf215546Sopenharmony_ci}
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_cistatic nir_ssa_def *
786bf215546Sopenharmony_cilower_bit_count64(nir_builder *b, nir_ssa_def *x)
787bf215546Sopenharmony_ci{
788bf215546Sopenharmony_ci   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
789bf215546Sopenharmony_ci   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
790bf215546Sopenharmony_ci   nir_ssa_def *lo_count = nir_bit_count(b, x_lo);
791bf215546Sopenharmony_ci   nir_ssa_def *hi_count = nir_bit_count(b, x_hi);
792bf215546Sopenharmony_ci   return nir_iadd(b, lo_count, hi_count);
793bf215546Sopenharmony_ci}
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_cinir_lower_int64_options
796bf215546Sopenharmony_cinir_lower_int64_op_to_options_mask(nir_op opcode)
797bf215546Sopenharmony_ci{
798bf215546Sopenharmony_ci   switch (opcode) {
799bf215546Sopenharmony_ci   case nir_op_imul:
800bf215546Sopenharmony_ci   case nir_op_amul:
801bf215546Sopenharmony_ci      return nir_lower_imul64;
802bf215546Sopenharmony_ci   case nir_op_imul_2x32_64:
803bf215546Sopenharmony_ci   case nir_op_umul_2x32_64:
804bf215546Sopenharmony_ci      return nir_lower_imul_2x32_64;
805bf215546Sopenharmony_ci   case nir_op_imul_high:
806bf215546Sopenharmony_ci   case nir_op_umul_high:
807bf215546Sopenharmony_ci      return nir_lower_imul_high64;
808bf215546Sopenharmony_ci   case nir_op_isign:
809bf215546Sopenharmony_ci      return nir_lower_isign64;
810bf215546Sopenharmony_ci   case nir_op_udiv:
811bf215546Sopenharmony_ci   case nir_op_idiv:
812bf215546Sopenharmony_ci   case nir_op_umod:
813bf215546Sopenharmony_ci   case nir_op_imod:
814bf215546Sopenharmony_ci   case nir_op_irem:
815bf215546Sopenharmony_ci      return nir_lower_divmod64;
816bf215546Sopenharmony_ci   case nir_op_b2i64:
817bf215546Sopenharmony_ci   case nir_op_i2b1:
818bf215546Sopenharmony_ci   case nir_op_i2i8:
819bf215546Sopenharmony_ci   case nir_op_i2i16:
820bf215546Sopenharmony_ci   case nir_op_i2i32:
821bf215546Sopenharmony_ci   case nir_op_i2i64:
822bf215546Sopenharmony_ci   case nir_op_u2u8:
823bf215546Sopenharmony_ci   case nir_op_u2u16:
824bf215546Sopenharmony_ci   case nir_op_u2u32:
825bf215546Sopenharmony_ci   case nir_op_u2u64:
826bf215546Sopenharmony_ci   case nir_op_i2f32:
827bf215546Sopenharmony_ci   case nir_op_u2f32:
828bf215546Sopenharmony_ci   case nir_op_i2f16:
829bf215546Sopenharmony_ci   case nir_op_u2f16:
830bf215546Sopenharmony_ci   case nir_op_f2i64:
831bf215546Sopenharmony_ci   case nir_op_f2u64:
832bf215546Sopenharmony_ci   case nir_op_bcsel:
833bf215546Sopenharmony_ci      return nir_lower_mov64;
834bf215546Sopenharmony_ci   case nir_op_ieq:
835bf215546Sopenharmony_ci   case nir_op_ine:
836bf215546Sopenharmony_ci   case nir_op_ult:
837bf215546Sopenharmony_ci   case nir_op_ilt:
838bf215546Sopenharmony_ci   case nir_op_uge:
839bf215546Sopenharmony_ci   case nir_op_ige:
840bf215546Sopenharmony_ci      return nir_lower_icmp64;
841bf215546Sopenharmony_ci   case nir_op_iadd:
842bf215546Sopenharmony_ci   case nir_op_isub:
843bf215546Sopenharmony_ci      return nir_lower_iadd64;
844bf215546Sopenharmony_ci   case nir_op_imin:
845bf215546Sopenharmony_ci   case nir_op_imax:
846bf215546Sopenharmony_ci   case nir_op_umin:
847bf215546Sopenharmony_ci   case nir_op_umax:
848bf215546Sopenharmony_ci      return nir_lower_minmax64;
849bf215546Sopenharmony_ci   case nir_op_iabs:
850bf215546Sopenharmony_ci      return nir_lower_iabs64;
851bf215546Sopenharmony_ci   case nir_op_ineg:
852bf215546Sopenharmony_ci      return nir_lower_ineg64;
853bf215546Sopenharmony_ci   case nir_op_iand:
854bf215546Sopenharmony_ci   case nir_op_ior:
855bf215546Sopenharmony_ci   case nir_op_ixor:
856bf215546Sopenharmony_ci   case nir_op_inot:
857bf215546Sopenharmony_ci      return nir_lower_logic64;
858bf215546Sopenharmony_ci   case nir_op_ishl:
859bf215546Sopenharmony_ci   case nir_op_ishr:
860bf215546Sopenharmony_ci   case nir_op_ushr:
861bf215546Sopenharmony_ci      return nir_lower_shift64;
862bf215546Sopenharmony_ci   case nir_op_extract_u8:
863bf215546Sopenharmony_ci   case nir_op_extract_i8:
864bf215546Sopenharmony_ci   case nir_op_extract_u16:
865bf215546Sopenharmony_ci   case nir_op_extract_i16:
866bf215546Sopenharmony_ci      return nir_lower_extract64;
867bf215546Sopenharmony_ci   case nir_op_ufind_msb:
868bf215546Sopenharmony_ci      return nir_lower_ufind_msb64;
869bf215546Sopenharmony_ci   case nir_op_bit_count:
870bf215546Sopenharmony_ci      return nir_lower_bit_count64;
871bf215546Sopenharmony_ci   default:
872bf215546Sopenharmony_ci      return 0;
873bf215546Sopenharmony_ci   }
874bf215546Sopenharmony_ci}
875bf215546Sopenharmony_ci
876bf215546Sopenharmony_cistatic nir_ssa_def *
877bf215546Sopenharmony_cilower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu)
878bf215546Sopenharmony_ci{
879bf215546Sopenharmony_ci   nir_ssa_def *src[4];
880bf215546Sopenharmony_ci   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
881bf215546Sopenharmony_ci      src[i] = nir_ssa_for_alu_src(b, alu, i);
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci   switch (alu->op) {
884bf215546Sopenharmony_ci   case nir_op_imul:
885bf215546Sopenharmony_ci   case nir_op_amul:
886bf215546Sopenharmony_ci      return lower_imul64(b, src[0], src[1]);
887bf215546Sopenharmony_ci   case nir_op_imul_2x32_64:
888bf215546Sopenharmony_ci      return lower_mul_2x32_64(b, src[0], src[1], true);
889bf215546Sopenharmony_ci   case nir_op_umul_2x32_64:
890bf215546Sopenharmony_ci      return lower_mul_2x32_64(b, src[0], src[1], false);
891bf215546Sopenharmony_ci   case nir_op_imul_high:
892bf215546Sopenharmony_ci      return lower_mul_high64(b, src[0], src[1], true);
893bf215546Sopenharmony_ci   case nir_op_umul_high:
894bf215546Sopenharmony_ci      return lower_mul_high64(b, src[0], src[1], false);
895bf215546Sopenharmony_ci   case nir_op_isign:
896bf215546Sopenharmony_ci      return lower_isign64(b, src[0]);
897bf215546Sopenharmony_ci   case nir_op_udiv:
898bf215546Sopenharmony_ci      return lower_udiv64(b, src[0], src[1]);
899bf215546Sopenharmony_ci   case nir_op_idiv:
900bf215546Sopenharmony_ci      return lower_idiv64(b, src[0], src[1]);
901bf215546Sopenharmony_ci   case nir_op_umod:
902bf215546Sopenharmony_ci      return lower_umod64(b, src[0], src[1]);
903bf215546Sopenharmony_ci   case nir_op_imod:
904bf215546Sopenharmony_ci      return lower_imod64(b, src[0], src[1]);
905bf215546Sopenharmony_ci   case nir_op_irem:
906bf215546Sopenharmony_ci      return lower_irem64(b, src[0], src[1]);
907bf215546Sopenharmony_ci   case nir_op_b2i64:
908bf215546Sopenharmony_ci      return lower_b2i64(b, src[0]);
909bf215546Sopenharmony_ci   case nir_op_i2b1:
910bf215546Sopenharmony_ci      return lower_i2b(b, src[0]);
911bf215546Sopenharmony_ci   case nir_op_i2i8:
912bf215546Sopenharmony_ci      return lower_i2i8(b, src[0]);
913bf215546Sopenharmony_ci   case nir_op_i2i16:
914bf215546Sopenharmony_ci      return lower_i2i16(b, src[0]);
915bf215546Sopenharmony_ci   case nir_op_i2i32:
916bf215546Sopenharmony_ci      return lower_i2i32(b, src[0]);
917bf215546Sopenharmony_ci   case nir_op_i2i64:
918bf215546Sopenharmony_ci      return lower_i2i64(b, src[0]);
919bf215546Sopenharmony_ci   case nir_op_u2u8:
920bf215546Sopenharmony_ci      return lower_u2u8(b, src[0]);
921bf215546Sopenharmony_ci   case nir_op_u2u16:
922bf215546Sopenharmony_ci      return lower_u2u16(b, src[0]);
923bf215546Sopenharmony_ci   case nir_op_u2u32:
924bf215546Sopenharmony_ci      return lower_u2u32(b, src[0]);
925bf215546Sopenharmony_ci   case nir_op_u2u64:
926bf215546Sopenharmony_ci      return lower_u2u64(b, src[0]);
927bf215546Sopenharmony_ci   case nir_op_bcsel:
928bf215546Sopenharmony_ci      return lower_bcsel64(b, src[0], src[1], src[2]);
929bf215546Sopenharmony_ci   case nir_op_ieq:
930bf215546Sopenharmony_ci   case nir_op_ine:
931bf215546Sopenharmony_ci   case nir_op_ult:
932bf215546Sopenharmony_ci   case nir_op_ilt:
933bf215546Sopenharmony_ci   case nir_op_uge:
934bf215546Sopenharmony_ci   case nir_op_ige:
935bf215546Sopenharmony_ci      return lower_int64_compare(b, alu->op, src[0], src[1]);
936bf215546Sopenharmony_ci   case nir_op_iadd:
937bf215546Sopenharmony_ci      return lower_iadd64(b, src[0], src[1]);
938bf215546Sopenharmony_ci   case nir_op_isub:
939bf215546Sopenharmony_ci      return lower_isub64(b, src[0], src[1]);
940bf215546Sopenharmony_ci   case nir_op_imin:
941bf215546Sopenharmony_ci      return lower_imin64(b, src[0], src[1]);
942bf215546Sopenharmony_ci   case nir_op_imax:
943bf215546Sopenharmony_ci      return lower_imax64(b, src[0], src[1]);
944bf215546Sopenharmony_ci   case nir_op_umin:
945bf215546Sopenharmony_ci      return lower_umin64(b, src[0], src[1]);
946bf215546Sopenharmony_ci   case nir_op_umax:
947bf215546Sopenharmony_ci      return lower_umax64(b, src[0], src[1]);
948bf215546Sopenharmony_ci   case nir_op_iabs:
949bf215546Sopenharmony_ci      return lower_iabs64(b, src[0]);
950bf215546Sopenharmony_ci   case nir_op_ineg:
951bf215546Sopenharmony_ci      return lower_ineg64(b, src[0]);
952bf215546Sopenharmony_ci   case nir_op_iand:
953bf215546Sopenharmony_ci      return lower_iand64(b, src[0], src[1]);
954bf215546Sopenharmony_ci   case nir_op_ior:
955bf215546Sopenharmony_ci      return lower_ior64(b, src[0], src[1]);
956bf215546Sopenharmony_ci   case nir_op_ixor:
957bf215546Sopenharmony_ci      return lower_ixor64(b, src[0], src[1]);
958bf215546Sopenharmony_ci   case nir_op_inot:
959bf215546Sopenharmony_ci      return lower_inot64(b, src[0]);
960bf215546Sopenharmony_ci   case nir_op_ishl:
961bf215546Sopenharmony_ci      return lower_ishl64(b, src[0], src[1]);
962bf215546Sopenharmony_ci   case nir_op_ishr:
963bf215546Sopenharmony_ci      return lower_ishr64(b, src[0], src[1]);
964bf215546Sopenharmony_ci   case nir_op_ushr:
965bf215546Sopenharmony_ci      return lower_ushr64(b, src[0], src[1]);
966bf215546Sopenharmony_ci   case nir_op_extract_u8:
967bf215546Sopenharmony_ci   case nir_op_extract_i8:
968bf215546Sopenharmony_ci   case nir_op_extract_u16:
969bf215546Sopenharmony_ci   case nir_op_extract_i16:
970bf215546Sopenharmony_ci      return lower_extract(b, alu->op, src[0], src[1]);
971bf215546Sopenharmony_ci   case nir_op_ufind_msb:
972bf215546Sopenharmony_ci      return lower_ufind_msb64(b, src[0]);
973bf215546Sopenharmony_ci   case nir_op_bit_count:
974bf215546Sopenharmony_ci      return lower_bit_count64(b, src[0]);
975bf215546Sopenharmony_ci   case nir_op_i2f64:
976bf215546Sopenharmony_ci   case nir_op_i2f32:
977bf215546Sopenharmony_ci   case nir_op_i2f16:
978bf215546Sopenharmony_ci      return lower_2f(b, src[0], nir_dest_bit_size(alu->dest.dest), true);
979bf215546Sopenharmony_ci   case nir_op_u2f64:
980bf215546Sopenharmony_ci   case nir_op_u2f32:
981bf215546Sopenharmony_ci   case nir_op_u2f16:
982bf215546Sopenharmony_ci      return lower_2f(b, src[0], nir_dest_bit_size(alu->dest.dest), false);
983bf215546Sopenharmony_ci   case nir_op_f2i64:
984bf215546Sopenharmony_ci   case nir_op_f2u64:
985bf215546Sopenharmony_ci      /* We don't support f64toi64 (yet?). */
986bf215546Sopenharmony_ci      if (src[0]->bit_size > 32)
987bf215546Sopenharmony_ci         return false;
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci      return lower_f2(b, src[0], alu->op == nir_op_f2i64);
990bf215546Sopenharmony_ci   default:
991bf215546Sopenharmony_ci      unreachable("Invalid ALU opcode to lower");
992bf215546Sopenharmony_ci   }
993bf215546Sopenharmony_ci}
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_cistatic bool
996bf215546Sopenharmony_cishould_lower_int64_alu_instr(const nir_alu_instr *alu,
997bf215546Sopenharmony_ci                             const nir_shader_compiler_options *options)
998bf215546Sopenharmony_ci{
999bf215546Sopenharmony_ci   switch (alu->op) {
1000bf215546Sopenharmony_ci   case nir_op_i2b1:
1001bf215546Sopenharmony_ci   case nir_op_i2i8:
1002bf215546Sopenharmony_ci   case nir_op_i2i16:
1003bf215546Sopenharmony_ci   case nir_op_i2i32:
1004bf215546Sopenharmony_ci   case nir_op_u2u8:
1005bf215546Sopenharmony_ci   case nir_op_u2u16:
1006bf215546Sopenharmony_ci   case nir_op_u2u32:
1007bf215546Sopenharmony_ci      assert(alu->src[0].src.is_ssa);
1008bf215546Sopenharmony_ci      if (alu->src[0].src.ssa->bit_size != 64)
1009bf215546Sopenharmony_ci         return false;
1010bf215546Sopenharmony_ci      break;
1011bf215546Sopenharmony_ci   case nir_op_bcsel:
1012bf215546Sopenharmony_ci      assert(alu->src[1].src.is_ssa);
1013bf215546Sopenharmony_ci      assert(alu->src[2].src.is_ssa);
1014bf215546Sopenharmony_ci      assert(alu->src[1].src.ssa->bit_size ==
1015bf215546Sopenharmony_ci             alu->src[2].src.ssa->bit_size);
1016bf215546Sopenharmony_ci      if (alu->src[1].src.ssa->bit_size != 64)
1017bf215546Sopenharmony_ci         return false;
1018bf215546Sopenharmony_ci      break;
1019bf215546Sopenharmony_ci   case nir_op_ieq:
1020bf215546Sopenharmony_ci   case nir_op_ine:
1021bf215546Sopenharmony_ci   case nir_op_ult:
1022bf215546Sopenharmony_ci   case nir_op_ilt:
1023bf215546Sopenharmony_ci   case nir_op_uge:
1024bf215546Sopenharmony_ci   case nir_op_ige:
1025bf215546Sopenharmony_ci      assert(alu->src[0].src.is_ssa);
1026bf215546Sopenharmony_ci      assert(alu->src[1].src.is_ssa);
1027bf215546Sopenharmony_ci      assert(alu->src[0].src.ssa->bit_size ==
1028bf215546Sopenharmony_ci             alu->src[1].src.ssa->bit_size);
1029bf215546Sopenharmony_ci      if (alu->src[0].src.ssa->bit_size != 64)
1030bf215546Sopenharmony_ci         return false;
1031bf215546Sopenharmony_ci      break;
1032bf215546Sopenharmony_ci   case nir_op_ufind_msb:
1033bf215546Sopenharmony_ci   case nir_op_bit_count:
1034bf215546Sopenharmony_ci      assert(alu->src[0].src.is_ssa);
1035bf215546Sopenharmony_ci      if (alu->src[0].src.ssa->bit_size != 64)
1036bf215546Sopenharmony_ci         return false;
1037bf215546Sopenharmony_ci      break;
1038bf215546Sopenharmony_ci   case nir_op_amul:
1039bf215546Sopenharmony_ci      assert(alu->dest.dest.is_ssa);
1040bf215546Sopenharmony_ci      if (options->has_imul24)
1041bf215546Sopenharmony_ci         return false;
1042bf215546Sopenharmony_ci      if (alu->dest.dest.ssa.bit_size != 64)
1043bf215546Sopenharmony_ci         return false;
1044bf215546Sopenharmony_ci      break;
1045bf215546Sopenharmony_ci   case nir_op_i2f64:
1046bf215546Sopenharmony_ci   case nir_op_u2f64:
1047bf215546Sopenharmony_ci   case nir_op_i2f32:
1048bf215546Sopenharmony_ci   case nir_op_u2f32:
1049bf215546Sopenharmony_ci   case nir_op_i2f16:
1050bf215546Sopenharmony_ci   case nir_op_u2f16:
1051bf215546Sopenharmony_ci      assert(alu->src[0].src.is_ssa);
1052bf215546Sopenharmony_ci      if (alu->src[0].src.ssa->bit_size != 64)
1053bf215546Sopenharmony_ci         return false;
1054bf215546Sopenharmony_ci      break;
1055bf215546Sopenharmony_ci   case nir_op_f2u64:
1056bf215546Sopenharmony_ci   case nir_op_f2i64:
1057bf215546Sopenharmony_ci      FALLTHROUGH;
1058bf215546Sopenharmony_ci   default:
1059bf215546Sopenharmony_ci      assert(alu->dest.dest.is_ssa);
1060bf215546Sopenharmony_ci      if (alu->dest.dest.ssa.bit_size != 64)
1061bf215546Sopenharmony_ci         return false;
1062bf215546Sopenharmony_ci      break;
1063bf215546Sopenharmony_ci   }
1064bf215546Sopenharmony_ci
1065bf215546Sopenharmony_ci   unsigned mask = nir_lower_int64_op_to_options_mask(alu->op);
1066bf215546Sopenharmony_ci   return (options->lower_int64_options & mask) != 0;
1067bf215546Sopenharmony_ci}
1068bf215546Sopenharmony_ci
1069bf215546Sopenharmony_cistatic nir_ssa_def *
1070bf215546Sopenharmony_cisplit_64bit_subgroup_op(nir_builder *b, const nir_intrinsic_instr *intrin)
1071bf215546Sopenharmony_ci{
1072bf215546Sopenharmony_ci   const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci   /* This works on subgroup ops with a single 64-bit source which can be
1075bf215546Sopenharmony_ci    * trivially lowered by doing the exact same op on both halves.
1076bf215546Sopenharmony_ci    */
1077bf215546Sopenharmony_ci   assert(intrin->src[0].is_ssa && intrin->src[0].ssa->bit_size == 64);
1078bf215546Sopenharmony_ci   nir_ssa_def *split_src0[2] = {
1079bf215546Sopenharmony_ci      nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
1080bf215546Sopenharmony_ci      nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa),
1081bf215546Sopenharmony_ci   };
1082bf215546Sopenharmony_ci
1083bf215546Sopenharmony_ci   assert(info->has_dest && intrin->dest.is_ssa &&
1084bf215546Sopenharmony_ci          intrin->dest.ssa.bit_size == 64);
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci   nir_ssa_def *res[2];
1087bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; i++) {
1088bf215546Sopenharmony_ci      nir_intrinsic_instr *split =
1089bf215546Sopenharmony_ci         nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
1090bf215546Sopenharmony_ci      split->num_components = intrin->num_components;
1091bf215546Sopenharmony_ci      split->src[0] = nir_src_for_ssa(split_src0[i]);
1092bf215546Sopenharmony_ci
1093bf215546Sopenharmony_ci      /* Other sources must be less than 64 bits and get copied directly */
1094bf215546Sopenharmony_ci      for (unsigned j = 1; j < info->num_srcs; j++) {
1095bf215546Sopenharmony_ci         assert(intrin->src[j].is_ssa && intrin->src[j].ssa->bit_size < 64);
1096bf215546Sopenharmony_ci         split->src[j] = nir_src_for_ssa(intrin->src[j].ssa);
1097bf215546Sopenharmony_ci      }
1098bf215546Sopenharmony_ci
1099bf215546Sopenharmony_ci      /* Copy const indices, if any */
1100bf215546Sopenharmony_ci      memcpy(split->const_index, intrin->const_index,
1101bf215546Sopenharmony_ci             sizeof(intrin->const_index));
1102bf215546Sopenharmony_ci
1103bf215546Sopenharmony_ci      nir_ssa_dest_init(&split->instr, &split->dest,
1104bf215546Sopenharmony_ci                        intrin->dest.ssa.num_components, 32, NULL);
1105bf215546Sopenharmony_ci      nir_builder_instr_insert(b, &split->instr);
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci      res[i] = &split->dest.ssa;
1108bf215546Sopenharmony_ci   }
1109bf215546Sopenharmony_ci
1110bf215546Sopenharmony_ci   return nir_pack_64_2x32_split(b, res[0], res[1]);
1111bf215546Sopenharmony_ci}
1112bf215546Sopenharmony_ci
1113bf215546Sopenharmony_cistatic nir_ssa_def *
1114bf215546Sopenharmony_cibuild_vote_ieq(nir_builder *b, nir_ssa_def *x)
1115bf215546Sopenharmony_ci{
1116bf215546Sopenharmony_ci   nir_intrinsic_instr *vote =
1117bf215546Sopenharmony_ci      nir_intrinsic_instr_create(b->shader, nir_intrinsic_vote_ieq);
1118bf215546Sopenharmony_ci   vote->src[0] = nir_src_for_ssa(x);
1119bf215546Sopenharmony_ci   vote->num_components = x->num_components;
1120bf215546Sopenharmony_ci   nir_ssa_dest_init(&vote->instr, &vote->dest, 1, 1, NULL);
1121bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &vote->instr);
1122bf215546Sopenharmony_ci   return &vote->dest.ssa;
1123bf215546Sopenharmony_ci}
1124bf215546Sopenharmony_ci
1125bf215546Sopenharmony_cistatic nir_ssa_def *
1126bf215546Sopenharmony_cilower_vote_ieq(nir_builder *b, nir_ssa_def *x)
1127bf215546Sopenharmony_ci{
1128bf215546Sopenharmony_ci   return nir_iand(b, build_vote_ieq(b, nir_unpack_64_2x32_split_x(b, x)),
1129bf215546Sopenharmony_ci                      build_vote_ieq(b, nir_unpack_64_2x32_split_y(b, x)));
1130bf215546Sopenharmony_ci}
1131bf215546Sopenharmony_ci
1132bf215546Sopenharmony_cistatic nir_ssa_def *
1133bf215546Sopenharmony_cibuild_scan_intrinsic(nir_builder *b, nir_intrinsic_op scan_op,
1134bf215546Sopenharmony_ci                     nir_op reduction_op, unsigned cluster_size,
1135bf215546Sopenharmony_ci                     nir_ssa_def *val)
1136bf215546Sopenharmony_ci{
1137bf215546Sopenharmony_ci   nir_intrinsic_instr *scan =
1138bf215546Sopenharmony_ci      nir_intrinsic_instr_create(b->shader, scan_op);
1139bf215546Sopenharmony_ci   scan->num_components = val->num_components;
1140bf215546Sopenharmony_ci   scan->src[0] = nir_src_for_ssa(val);
1141bf215546Sopenharmony_ci   nir_intrinsic_set_reduction_op(scan, reduction_op);
1142bf215546Sopenharmony_ci   if (scan_op == nir_intrinsic_reduce)
1143bf215546Sopenharmony_ci      nir_intrinsic_set_cluster_size(scan, cluster_size);
1144bf215546Sopenharmony_ci   nir_ssa_dest_init(&scan->instr, &scan->dest,
1145bf215546Sopenharmony_ci                     val->num_components, val->bit_size, NULL);
1146bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &scan->instr);
1147bf215546Sopenharmony_ci   return &scan->dest.ssa;
1148bf215546Sopenharmony_ci}
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_cistatic nir_ssa_def *
1151bf215546Sopenharmony_cilower_scan_iadd64(nir_builder *b, const nir_intrinsic_instr *intrin)
1152bf215546Sopenharmony_ci{
1153bf215546Sopenharmony_ci   unsigned cluster_size =
1154bf215546Sopenharmony_ci      intrin->intrinsic == nir_intrinsic_reduce ?
1155bf215546Sopenharmony_ci      nir_intrinsic_cluster_size(intrin) : 0;
1156bf215546Sopenharmony_ci
1157bf215546Sopenharmony_ci   /* Split it into three chunks of no more than 24 bits each.  With 8 bits
1158bf215546Sopenharmony_ci    * of headroom, we're guaranteed that there will never be overflow in the
1159bf215546Sopenharmony_ci    * individual subgroup operations.  (Assuming, of course, a subgroup size
1160bf215546Sopenharmony_ci    * no larger than 256 which seems reasonable.)  We can then scan on each of
1161bf215546Sopenharmony_ci    * the chunks and add them back together at the end.
1162bf215546Sopenharmony_ci    */
1163bf215546Sopenharmony_ci   assert(intrin->src[0].is_ssa);
1164bf215546Sopenharmony_ci   nir_ssa_def *x = intrin->src[0].ssa;
1165bf215546Sopenharmony_ci   nir_ssa_def *x_low =
1166bf215546Sopenharmony_ci      nir_u2u32(b, nir_iand_imm(b, x, 0xffffff));
1167bf215546Sopenharmony_ci   nir_ssa_def *x_mid =
1168bf215546Sopenharmony_ci      nir_u2u32(b, nir_iand_imm(b, nir_ushr(b, x, nir_imm_int(b, 24)),
1169bf215546Sopenharmony_ci                                   0xffffff));
1170bf215546Sopenharmony_ci   nir_ssa_def *x_hi =
1171bf215546Sopenharmony_ci      nir_u2u32(b, nir_ushr(b, x, nir_imm_int(b, 48)));
1172bf215546Sopenharmony_ci
1173bf215546Sopenharmony_ci   nir_ssa_def *scan_low =
1174bf215546Sopenharmony_ci      build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd,
1175bf215546Sopenharmony_ci                              cluster_size, x_low);
1176bf215546Sopenharmony_ci   nir_ssa_def *scan_mid =
1177bf215546Sopenharmony_ci      build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd,
1178bf215546Sopenharmony_ci                              cluster_size, x_mid);
1179bf215546Sopenharmony_ci   nir_ssa_def *scan_hi =
1180bf215546Sopenharmony_ci      build_scan_intrinsic(b, intrin->intrinsic, nir_op_iadd,
1181bf215546Sopenharmony_ci                              cluster_size, x_hi);
1182bf215546Sopenharmony_ci
1183bf215546Sopenharmony_ci   scan_low = nir_u2u64(b, scan_low);
1184bf215546Sopenharmony_ci   scan_mid = nir_ishl(b, nir_u2u64(b, scan_mid), nir_imm_int(b, 24));
1185bf215546Sopenharmony_ci   scan_hi = nir_ishl(b, nir_u2u64(b, scan_hi), nir_imm_int(b, 48));
1186bf215546Sopenharmony_ci
1187bf215546Sopenharmony_ci   return nir_iadd(b, scan_hi, nir_iadd(b, scan_mid, scan_low));
1188bf215546Sopenharmony_ci}
1189bf215546Sopenharmony_ci
1190bf215546Sopenharmony_cistatic bool
1191bf215546Sopenharmony_cishould_lower_int64_intrinsic(const nir_intrinsic_instr *intrin,
1192bf215546Sopenharmony_ci                             const nir_shader_compiler_options *options)
1193bf215546Sopenharmony_ci{
1194bf215546Sopenharmony_ci   switch (intrin->intrinsic) {
1195bf215546Sopenharmony_ci   case nir_intrinsic_read_invocation:
1196bf215546Sopenharmony_ci   case nir_intrinsic_read_first_invocation:
1197bf215546Sopenharmony_ci   case nir_intrinsic_shuffle:
1198bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_xor:
1199bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_up:
1200bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_down:
1201bf215546Sopenharmony_ci   case nir_intrinsic_quad_broadcast:
1202bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_horizontal:
1203bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_vertical:
1204bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_diagonal:
1205bf215546Sopenharmony_ci      assert(intrin->dest.is_ssa);
1206bf215546Sopenharmony_ci      return intrin->dest.ssa.bit_size == 64 &&
1207bf215546Sopenharmony_ci             (options->lower_int64_options & nir_lower_subgroup_shuffle64);
1208bf215546Sopenharmony_ci
1209bf215546Sopenharmony_ci   case nir_intrinsic_vote_ieq:
1210bf215546Sopenharmony_ci      assert(intrin->src[0].is_ssa);
1211bf215546Sopenharmony_ci      return intrin->src[0].ssa->bit_size == 64 &&
1212bf215546Sopenharmony_ci             (options->lower_int64_options & nir_lower_vote_ieq64);
1213bf215546Sopenharmony_ci
1214bf215546Sopenharmony_ci   case nir_intrinsic_reduce:
1215bf215546Sopenharmony_ci   case nir_intrinsic_inclusive_scan:
1216bf215546Sopenharmony_ci   case nir_intrinsic_exclusive_scan:
1217bf215546Sopenharmony_ci      assert(intrin->dest.is_ssa);
1218bf215546Sopenharmony_ci      if (intrin->dest.ssa.bit_size != 64)
1219bf215546Sopenharmony_ci         return false;
1220bf215546Sopenharmony_ci
1221bf215546Sopenharmony_ci      switch (nir_intrinsic_reduction_op(intrin)) {
1222bf215546Sopenharmony_ci      case nir_op_iadd:
1223bf215546Sopenharmony_ci         return options->lower_int64_options & nir_lower_scan_reduce_iadd64;
1224bf215546Sopenharmony_ci      case nir_op_iand:
1225bf215546Sopenharmony_ci      case nir_op_ior:
1226bf215546Sopenharmony_ci      case nir_op_ixor:
1227bf215546Sopenharmony_ci         return options->lower_int64_options & nir_lower_scan_reduce_bitwise64;
1228bf215546Sopenharmony_ci      default:
1229bf215546Sopenharmony_ci         return false;
1230bf215546Sopenharmony_ci      }
1231bf215546Sopenharmony_ci      break;
1232bf215546Sopenharmony_ci
1233bf215546Sopenharmony_ci   default:
1234bf215546Sopenharmony_ci      return false;
1235bf215546Sopenharmony_ci   }
1236bf215546Sopenharmony_ci}
1237bf215546Sopenharmony_ci
1238bf215546Sopenharmony_cistatic nir_ssa_def *
1239bf215546Sopenharmony_cilower_int64_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
1240bf215546Sopenharmony_ci{
1241bf215546Sopenharmony_ci   switch (intrin->intrinsic) {
1242bf215546Sopenharmony_ci   case nir_intrinsic_read_invocation:
1243bf215546Sopenharmony_ci   case nir_intrinsic_read_first_invocation:
1244bf215546Sopenharmony_ci   case nir_intrinsic_shuffle:
1245bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_xor:
1246bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_up:
1247bf215546Sopenharmony_ci   case nir_intrinsic_shuffle_down:
1248bf215546Sopenharmony_ci   case nir_intrinsic_quad_broadcast:
1249bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_horizontal:
1250bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_vertical:
1251bf215546Sopenharmony_ci   case nir_intrinsic_quad_swap_diagonal:
1252bf215546Sopenharmony_ci      return split_64bit_subgroup_op(b, intrin);
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_ci   case nir_intrinsic_vote_ieq:
1255bf215546Sopenharmony_ci      assert(intrin->src[0].is_ssa);
1256bf215546Sopenharmony_ci      return lower_vote_ieq(b, intrin->src[0].ssa);
1257bf215546Sopenharmony_ci
1258bf215546Sopenharmony_ci   case nir_intrinsic_reduce:
1259bf215546Sopenharmony_ci   case nir_intrinsic_inclusive_scan:
1260bf215546Sopenharmony_ci   case nir_intrinsic_exclusive_scan:
1261bf215546Sopenharmony_ci      switch (nir_intrinsic_reduction_op(intrin)) {
1262bf215546Sopenharmony_ci      case nir_op_iadd:
1263bf215546Sopenharmony_ci         return lower_scan_iadd64(b, intrin);
1264bf215546Sopenharmony_ci      case nir_op_iand:
1265bf215546Sopenharmony_ci      case nir_op_ior:
1266bf215546Sopenharmony_ci      case nir_op_ixor:
1267bf215546Sopenharmony_ci         return split_64bit_subgroup_op(b, intrin);
1268bf215546Sopenharmony_ci      default:
1269bf215546Sopenharmony_ci         unreachable("Unsupported subgroup scan/reduce op");
1270bf215546Sopenharmony_ci      }
1271bf215546Sopenharmony_ci      break;
1272bf215546Sopenharmony_ci
1273bf215546Sopenharmony_ci   default:
1274bf215546Sopenharmony_ci      unreachable("Unsupported intrinsic");
1275bf215546Sopenharmony_ci   }
1276bf215546Sopenharmony_ci}
1277bf215546Sopenharmony_ci
1278bf215546Sopenharmony_cistatic bool
1279bf215546Sopenharmony_cishould_lower_int64_instr(const nir_instr *instr, const void *_options)
1280bf215546Sopenharmony_ci{
1281bf215546Sopenharmony_ci   switch (instr->type) {
1282bf215546Sopenharmony_ci   case nir_instr_type_alu:
1283bf215546Sopenharmony_ci      return should_lower_int64_alu_instr(nir_instr_as_alu(instr), _options);
1284bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
1285bf215546Sopenharmony_ci      return should_lower_int64_intrinsic(nir_instr_as_intrinsic(instr),
1286bf215546Sopenharmony_ci                                          _options);
1287bf215546Sopenharmony_ci   default:
1288bf215546Sopenharmony_ci      return false;
1289bf215546Sopenharmony_ci   }
1290bf215546Sopenharmony_ci}
1291bf215546Sopenharmony_ci
1292bf215546Sopenharmony_cistatic nir_ssa_def *
1293bf215546Sopenharmony_cilower_int64_instr(nir_builder *b, nir_instr *instr, void *_options)
1294bf215546Sopenharmony_ci{
1295bf215546Sopenharmony_ci   switch (instr->type) {
1296bf215546Sopenharmony_ci   case nir_instr_type_alu:
1297bf215546Sopenharmony_ci      return lower_int64_alu_instr(b, nir_instr_as_alu(instr));
1298bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
1299bf215546Sopenharmony_ci      return lower_int64_intrinsic(b, nir_instr_as_intrinsic(instr));
1300bf215546Sopenharmony_ci   default:
1301bf215546Sopenharmony_ci      return NULL;
1302bf215546Sopenharmony_ci   }
1303bf215546Sopenharmony_ci}
1304bf215546Sopenharmony_ci
1305bf215546Sopenharmony_cibool
1306bf215546Sopenharmony_cinir_lower_int64(nir_shader *shader)
1307bf215546Sopenharmony_ci{
1308bf215546Sopenharmony_ci   return nir_shader_lower_instructions(shader, should_lower_int64_instr,
1309bf215546Sopenharmony_ci                                        lower_int64_instr,
1310bf215546Sopenharmony_ci                                        (void *)shader->options);
1311bf215546Sopenharmony_ci}
1312