1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd.
4bf215546Sopenharmony_ci * Copyright © 2016 Broadcom
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
9bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
11bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
12bf215546Sopenharmony_ci *
13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
15bf215546Sopenharmony_ci * Software.
16bf215546Sopenharmony_ci *
17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23bf215546Sopenharmony_ci * SOFTWARE.
24bf215546Sopenharmony_ci */
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "main/glheader.h"
27bf215546Sopenharmony_ci#include "compiler/nir_types.h"
28bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h"
29bf215546Sopenharmony_ci#include "util/u_debug.h"
30bf215546Sopenharmony_ci#include "util/fast_idiv_by_const.h"
31bf215546Sopenharmony_ci#include "agx_compile.h"
32bf215546Sopenharmony_ci#include "agx_compiler.h"
33bf215546Sopenharmony_ci#include "agx_builder.h"
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_cistatic const struct debug_named_value agx_debug_options[] = {
36bf215546Sopenharmony_ci   {"msgs",      AGX_DBG_MSGS,		"Print debug messages"},
37bf215546Sopenharmony_ci   {"shaders",   AGX_DBG_SHADERS,	"Dump shaders in NIR and AIR"},
38bf215546Sopenharmony_ci   {"shaderdb",  AGX_DBG_SHADERDB,	"Print statistics"},
39bf215546Sopenharmony_ci   {"verbose",   AGX_DBG_VERBOSE,	"Disassemble verbosely"},
40bf215546Sopenharmony_ci   {"internal",  AGX_DBG_INTERNAL,	"Dump even internal shaders"},
41bf215546Sopenharmony_ci   {"novalidate",AGX_DBG_NOVALIDATE,"Skip IR validation in debug builds"},
42bf215546Sopenharmony_ci   DEBUG_NAMED_VALUE_END
43bf215546Sopenharmony_ci};
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ciDEBUG_GET_ONCE_FLAGS_OPTION(agx_debug, "AGX_MESA_DEBUG", agx_debug_options, 0)
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ciint agx_debug = 0;
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci#define DBG(fmt, ...) \
50bf215546Sopenharmony_ci   do { if (agx_debug & AGX_DBG_MSGS) \
51bf215546Sopenharmony_ci      fprintf(stderr, "%s:%d: "fmt, \
52bf215546Sopenharmony_ci            __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci/* Builds a 64-bit hash table key for an index */
55bf215546Sopenharmony_cistatic uint64_t
56bf215546Sopenharmony_ciagx_index_to_key(agx_index idx)
57bf215546Sopenharmony_ci{
58bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(idx) <= sizeof(uint64_t));
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci   uint64_t key = 0;
61bf215546Sopenharmony_ci   memcpy(&key, &idx, sizeof(idx));
62bf215546Sopenharmony_ci   return key;
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci/*
66bf215546Sopenharmony_ci * Extract a single channel out of a vector source. We split vectors with
67bf215546Sopenharmony_ci * p_split so we can use the split components directly, without emitting a
68bf215546Sopenharmony_ci * machine instruction. This has advantages of RA, as the split can usually be
69bf215546Sopenharmony_ci * optimized away.
70bf215546Sopenharmony_ci */
71bf215546Sopenharmony_cistatic agx_index
72bf215546Sopenharmony_ciagx_emit_extract(agx_builder *b, agx_index vec, unsigned channel)
73bf215546Sopenharmony_ci{
74bf215546Sopenharmony_ci   agx_index *components = _mesa_hash_table_u64_search(b->shader->allocated_vec,
75bf215546Sopenharmony_ci                                                       agx_index_to_key(vec));
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   assert(components != NULL && "missing agx_emit_combine_to");
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   return components[channel];
80bf215546Sopenharmony_ci}
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_cistatic void
83bf215546Sopenharmony_ciagx_cache_combine(agx_builder *b, agx_index dst,
84bf215546Sopenharmony_ci                  agx_index s0, agx_index s1, agx_index s2, agx_index s3)
85bf215546Sopenharmony_ci{
86bf215546Sopenharmony_ci   /* Lifetime of a hash table entry has to be at least as long as the table */
87bf215546Sopenharmony_ci   agx_index *channels = ralloc_array(b->shader, agx_index, 4);
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   channels[0] = s0;
90bf215546Sopenharmony_ci   channels[1] = s1;
91bf215546Sopenharmony_ci   channels[2] = s2;
92bf215546Sopenharmony_ci   channels[3] = s3;
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   _mesa_hash_table_u64_insert(b->shader->allocated_vec, agx_index_to_key(dst),
95bf215546Sopenharmony_ci                               channels);
96bf215546Sopenharmony_ci}
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci/*
99bf215546Sopenharmony_ci * Combine multiple scalars into a vector destination. This corresponds to
100bf215546Sopenharmony_ci * p_combine, lowered to moves (a shuffle in general) after register allocation.
101bf215546Sopenharmony_ci *
102bf215546Sopenharmony_ci * To optimize vector extractions, we record the individual channels
103bf215546Sopenharmony_ci */
104bf215546Sopenharmony_cistatic agx_instr *
105bf215546Sopenharmony_ciagx_emit_combine_to(agx_builder *b, agx_index dst,
106bf215546Sopenharmony_ci                    agx_index s0, agx_index s1, agx_index s2, agx_index s3)
107bf215546Sopenharmony_ci{
108bf215546Sopenharmony_ci   agx_cache_combine(b, dst, s0, s1, s2, s3);
109bf215546Sopenharmony_ci   return agx_p_combine_to(b, dst, s0, s1, s2, s3);
110bf215546Sopenharmony_ci}
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_cistatic void
113bf215546Sopenharmony_ciagx_block_add_successor(agx_block *block, agx_block *successor)
114bf215546Sopenharmony_ci{
115bf215546Sopenharmony_ci   assert(block != NULL && successor != NULL);
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci   /* Cull impossible edges */
118bf215546Sopenharmony_ci   if (block->unconditional_jumps)
119bf215546Sopenharmony_ci      return;
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
122bf215546Sopenharmony_ci      if (block->successors[i]) {
123bf215546Sopenharmony_ci         if (block->successors[i] == successor)
124bf215546Sopenharmony_ci            return;
125bf215546Sopenharmony_ci         else
126bf215546Sopenharmony_ci            continue;
127bf215546Sopenharmony_ci      }
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci      block->successors[i] = successor;
130bf215546Sopenharmony_ci      util_dynarray_append(&successor->predecessors, agx_block *, block);
131bf215546Sopenharmony_ci      return;
132bf215546Sopenharmony_ci   }
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   unreachable("Too many successors");
135bf215546Sopenharmony_ci}
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci/*
138bf215546Sopenharmony_ci * Splits an n-component vector (vec) into n scalar destinations (dests) using a
139bf215546Sopenharmony_ci * split pseudo-instruction.
140bf215546Sopenharmony_ci *
141bf215546Sopenharmony_ci * Pre-condition: dests is filled with agx_null().
142bf215546Sopenharmony_ci */
143bf215546Sopenharmony_cistatic void
144bf215546Sopenharmony_ciagx_emit_split(agx_builder *b, agx_index *dests, agx_index vec, unsigned n)
145bf215546Sopenharmony_ci{
146bf215546Sopenharmony_ci   /* Setup the destinations */
147bf215546Sopenharmony_ci   for (unsigned i = 0; i < n; ++i) {
148bf215546Sopenharmony_ci      dests[i] = agx_temp(b->shader, vec.size);
149bf215546Sopenharmony_ci   }
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci   /* Emit the split */
152bf215546Sopenharmony_ci   agx_p_split_to(b, dests[0], dests[1], dests[2], dests[3], vec);
153bf215546Sopenharmony_ci}
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_cistatic void
156bf215546Sopenharmony_ciagx_emit_cached_split(agx_builder *b, agx_index vec, unsigned n)
157bf215546Sopenharmony_ci{
158bf215546Sopenharmony_ci   agx_index dests[4] = { agx_null(), agx_null(), agx_null(), agx_null() };
159bf215546Sopenharmony_ci   agx_emit_split(b, dests, vec, n);
160bf215546Sopenharmony_ci   agx_cache_combine(b, vec, dests[0], dests[1], dests[2], dests[3]);
161bf215546Sopenharmony_ci}
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_cistatic void
164bf215546Sopenharmony_ciagx_emit_load_const(agx_builder *b, nir_load_const_instr *instr)
165bf215546Sopenharmony_ci{
166bf215546Sopenharmony_ci   /* Ensure we've been scalarized and bit size lowered */
167bf215546Sopenharmony_ci   unsigned bit_size = instr->def.bit_size;
168bf215546Sopenharmony_ci   assert(instr->def.num_components == 1);
169bf215546Sopenharmony_ci   assert(bit_size == 1 || bit_size == 16 || bit_size == 32);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   /* Emit move, later passes can inline/push if useful */
172bf215546Sopenharmony_ci   agx_mov_imm_to(b,
173bf215546Sopenharmony_ci                  agx_get_index(instr->def.index, agx_size_for_bits(bit_size)),
174bf215546Sopenharmony_ci                  nir_const_value_as_uint(instr->value[0], bit_size));
175bf215546Sopenharmony_ci}
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci/* Emit code dividing P by Q */
178bf215546Sopenharmony_cistatic agx_index
179bf215546Sopenharmony_ciagx_udiv_const(agx_builder *b, agx_index P, uint32_t Q)
180bf215546Sopenharmony_ci{
181bf215546Sopenharmony_ci   /* P / 1 = P */
182bf215546Sopenharmony_ci   if (Q == 1) {
183bf215546Sopenharmony_ci      return P;
184bf215546Sopenharmony_ci   }
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   /* P / UINT32_MAX = 0, unless P = UINT32_MAX when it's one */
187bf215546Sopenharmony_ci   if (Q == UINT32_MAX) {
188bf215546Sopenharmony_ci      agx_index max = agx_mov_imm(b, 32, UINT32_MAX);
189bf215546Sopenharmony_ci      agx_index one = agx_mov_imm(b, 32, 1);
190bf215546Sopenharmony_ci      return agx_icmpsel(b, P, max, one, agx_zero(), AGX_ICOND_UEQ);
191bf215546Sopenharmony_ci   }
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci   /* P / 2^N = P >> N */
194bf215546Sopenharmony_ci   if (util_is_power_of_two_or_zero(Q)) {
195bf215546Sopenharmony_ci      return agx_ushr(b, P, agx_mov_imm(b, 32, util_logbase2(Q)));
196bf215546Sopenharmony_ci   }
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci   /* Fall back on multiplication by a magic number */
199bf215546Sopenharmony_ci   struct util_fast_udiv_info info = util_compute_fast_udiv_info(Q, 32, 32);
200bf215546Sopenharmony_ci   agx_index preshift = agx_mov_imm(b, 32, info.pre_shift);
201bf215546Sopenharmony_ci   agx_index increment = agx_mov_imm(b, 32, info.increment);
202bf215546Sopenharmony_ci   agx_index postshift = agx_mov_imm(b, 32, info.post_shift);
203bf215546Sopenharmony_ci   agx_index multiplier = agx_mov_imm(b, 32, info.multiplier);
204bf215546Sopenharmony_ci   agx_index multiplied = agx_temp(b->shader, AGX_SIZE_64);
205bf215546Sopenharmony_ci   agx_index n = P;
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   if (info.pre_shift != 0) n = agx_ushr(b, n, preshift);
208bf215546Sopenharmony_ci   if (info.increment != 0) n = agx_iadd(b, n, increment, 0);
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   /* 64-bit multiplication, zero extending 32-bit x 32-bit, get the top word */
211bf215546Sopenharmony_ci   agx_imad_to(b, multiplied, agx_abs(n), agx_abs(multiplier), agx_zero(), 0);
212bf215546Sopenharmony_ci   n = agx_temp(b->shader, AGX_SIZE_32);
213bf215546Sopenharmony_ci   agx_p_extract_to(b, n, multiplied, 1);
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   if (info.post_shift != 0) n = agx_ushr(b, n, postshift);
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   return n;
218bf215546Sopenharmony_ci}
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci/* AGX appears to lack support for vertex attributes. Lower to global loads. */
221bf215546Sopenharmony_cistatic void
222bf215546Sopenharmony_ciagx_emit_load_attr(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
223bf215546Sopenharmony_ci{
224bf215546Sopenharmony_ci   nir_src *offset_src = nir_get_io_offset_src(instr);
225bf215546Sopenharmony_ci   assert(nir_src_is_const(*offset_src) && "no attribute indirects");
226bf215546Sopenharmony_ci   unsigned index = nir_intrinsic_base(instr) +
227bf215546Sopenharmony_ci                    nir_src_as_uint(*offset_src);
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci   struct agx_shader_key *key = b->shader->key;
230bf215546Sopenharmony_ci   struct agx_attribute attrib = key->vs.attributes[index];
231bf215546Sopenharmony_ci
232bf215546Sopenharmony_ci   /* address = base + (stride * vertex_id) + src_offset */
233bf215546Sopenharmony_ci   unsigned buf = attrib.buf;
234bf215546Sopenharmony_ci   unsigned stride = key->vs.vbuf_strides[buf];
235bf215546Sopenharmony_ci   unsigned shift = agx_format_shift(attrib.format);
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   agx_index shifted_stride = agx_mov_imm(b, 32, stride >> shift);
238bf215546Sopenharmony_ci   agx_index src_offset = agx_mov_imm(b, 32, attrib.src_offset);
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   agx_index vertex_id = agx_register(10, AGX_SIZE_32);
241bf215546Sopenharmony_ci   agx_index instance_id = agx_register(12, AGX_SIZE_32);
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci   /* A nonzero divisor requires dividing the instance ID. A zero divisor
244bf215546Sopenharmony_ci    * specifies per-instance data. */
245bf215546Sopenharmony_ci   agx_index element_id = (attrib.divisor == 0) ? vertex_id :
246bf215546Sopenharmony_ci                          agx_udiv_const(b, instance_id, attrib.divisor);
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci   agx_index offset = agx_imad(b, element_id, shifted_stride, src_offset, 0);
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   /* Each VBO has a 64-bit = 4 x 16-bit address, lookup the base address as a sysval */
251bf215546Sopenharmony_ci   unsigned num_vbos = key->vs.num_vbufs;
252bf215546Sopenharmony_ci   unsigned base_length = (num_vbos * 4);
253bf215546Sopenharmony_ci   agx_index base = agx_indexed_sysval(b->shader,
254bf215546Sopenharmony_ci                                       AGX_PUSH_VBO_BASES, AGX_SIZE_64, buf * 4, base_length);
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci   /* Load the data */
257bf215546Sopenharmony_ci   assert(instr->num_components <= 4);
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   unsigned actual_comps = (attrib.nr_comps_minus_1 + 1);
260bf215546Sopenharmony_ci   agx_index vec = agx_vec_for_dest(b->shader, &instr->dest);
261bf215546Sopenharmony_ci   agx_device_load_to(b, vec, base, offset, attrib.format,
262bf215546Sopenharmony_ci                      BITFIELD_MASK(attrib.nr_comps_minus_1 + 1), 0);
263bf215546Sopenharmony_ci   agx_wait(b, 0);
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci   agx_emit_split(b, dests, vec, actual_comps);
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci   agx_index one = agx_mov_imm(b, 32, fui(1.0));
268bf215546Sopenharmony_ci   agx_index zero = agx_mov_imm(b, 32, 0);
269bf215546Sopenharmony_ci   agx_index default_value[4] = { zero, zero, zero, one };
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   for (unsigned i = actual_comps; i < instr->num_components; ++i)
272bf215546Sopenharmony_ci      dests[i] = default_value[i];
273bf215546Sopenharmony_ci}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_cistatic void
276bf215546Sopenharmony_ciagx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
277bf215546Sopenharmony_ci{
278bf215546Sopenharmony_ci   unsigned components = instr->num_components;
279bf215546Sopenharmony_ci   assert(components >= 1 && components <= 4);
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   nir_src *offset = nir_get_io_offset_src(instr);
282bf215546Sopenharmony_ci   assert(nir_src_is_const(*offset) && "no indirects");
283bf215546Sopenharmony_ci   unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
284bf215546Sopenharmony_ci   imm_index += nir_src_as_uint(*offset);
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci   assert(nir_dest_bit_size(instr->dest) == 32 && "no 16-bit flat shading");
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   for (unsigned i = 0; i < components; ++i) {
289bf215546Sopenharmony_ci      /* vec3 for each vertex, unknown what first 2 channels are for */
290bf215546Sopenharmony_ci      agx_index values = agx_ld_vary_flat(b, agx_immediate(imm_index + i), 1);
291bf215546Sopenharmony_ci      dests[i] = agx_p_extract(b, values, 2);
292bf215546Sopenharmony_ci   }
293bf215546Sopenharmony_ci}
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_cistatic void
296bf215546Sopenharmony_ciagx_emit_load_vary(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
297bf215546Sopenharmony_ci{
298bf215546Sopenharmony_ci   ASSERTED unsigned components = instr->num_components;
299bf215546Sopenharmony_ci   ASSERTED nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci   assert(components >= 1 && components <= 4);
302bf215546Sopenharmony_ci   assert(parent);
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci   /* TODO: Interpolation modes */
305bf215546Sopenharmony_ci   assert(parent->intrinsic == nir_intrinsic_load_barycentric_pixel);
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   nir_src *offset = nir_get_io_offset_src(instr);
308bf215546Sopenharmony_ci   assert(nir_src_is_const(*offset) && "no indirects");
309bf215546Sopenharmony_ci   unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
310bf215546Sopenharmony_ci   imm_index += nir_src_as_uint(*offset) * 4;
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   agx_index vec = agx_vec_for_intr(b->shader, instr);
313bf215546Sopenharmony_ci   agx_ld_vary_to(b, vec, agx_immediate(imm_index), components, true);
314bf215546Sopenharmony_ci   agx_emit_split(b, dests, vec, components);
315bf215546Sopenharmony_ci}
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_cistatic agx_instr *
318bf215546Sopenharmony_ciagx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr)
319bf215546Sopenharmony_ci{
320bf215546Sopenharmony_ci   nir_src *offset = nir_get_io_offset_src(instr);
321bf215546Sopenharmony_ci   assert(nir_src_is_const(*offset) && "todo: indirects");
322bf215546Sopenharmony_ci   unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)];
323bf215546Sopenharmony_ci   imm_index += nir_intrinsic_component(instr);
324bf215546Sopenharmony_ci   imm_index += nir_src_as_uint(*offset);
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci   /* nir_lower_io_to_scalar */
327bf215546Sopenharmony_ci   assert(nir_intrinsic_write_mask(instr) == 0x1);
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci   return agx_st_vary(b,
330bf215546Sopenharmony_ci               agx_immediate(imm_index),
331bf215546Sopenharmony_ci               agx_src_index(&instr->src[0]));
332bf215546Sopenharmony_ci}
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_cistatic agx_instr *
335bf215546Sopenharmony_ciagx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr)
336bf215546Sopenharmony_ci{
337bf215546Sopenharmony_ci   const nir_variable *var =
338bf215546Sopenharmony_ci      nir_find_variable_with_driver_location(b->shader->nir,
339bf215546Sopenharmony_ci            nir_var_shader_out, nir_intrinsic_base(instr));
340bf215546Sopenharmony_ci   assert(var);
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci   unsigned loc = var->data.location;
343bf215546Sopenharmony_ci   assert(var->data.index == 0 && "todo: dual-source blending");
344bf215546Sopenharmony_ci   assert(loc == FRAG_RESULT_DATA0 && "todo: MRT");
345bf215546Sopenharmony_ci   unsigned rt = (loc - FRAG_RESULT_DATA0);
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   /* TODO: Reverse-engineer interactions with MRT */
348bf215546Sopenharmony_ci   if (b->shader->nir->info.internal) {
349bf215546Sopenharmony_ci      /* clear */
350bf215546Sopenharmony_ci   } else if (b->shader->did_writeout) {
351bf215546Sopenharmony_ci	   agx_writeout(b, 0x0004);
352bf215546Sopenharmony_ci   } else {
353bf215546Sopenharmony_ci	   agx_writeout(b, 0xC200);
354bf215546Sopenharmony_ci	   agx_writeout(b, 0x000C);
355bf215546Sopenharmony_ci   }
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   if (b->shader->nir->info.fs.uses_discard) {
358bf215546Sopenharmony_ci      /* If the shader uses discard, the sample mask must be written by the
359bf215546Sopenharmony_ci       * shader on all exeuction paths. If we've reached the end of the shader,
360bf215546Sopenharmony_ci       * we are therefore still active and need to write a full sample mask.
361bf215546Sopenharmony_ci       * TODO: interactions with MSAA and gl_SampleMask writes
362bf215546Sopenharmony_ci       */
363bf215546Sopenharmony_ci      agx_sample_mask(b, agx_immediate(1));
364bf215546Sopenharmony_ci   }
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci   b->shader->did_writeout = true;
367bf215546Sopenharmony_ci   return agx_st_tile(b, agx_src_index(&instr->src[0]),
368bf215546Sopenharmony_ci             b->shader->key->fs.tib_formats[rt]);
369bf215546Sopenharmony_ci}
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_cistatic void
372bf215546Sopenharmony_ciagx_emit_load_tile(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
373bf215546Sopenharmony_ci{
374bf215546Sopenharmony_ci   const nir_variable *var =
375bf215546Sopenharmony_ci      nir_find_variable_with_driver_location(b->shader->nir,
376bf215546Sopenharmony_ci            nir_var_shader_out, nir_intrinsic_base(instr));
377bf215546Sopenharmony_ci   assert(var);
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci   unsigned loc = var->data.location;
380bf215546Sopenharmony_ci   assert(var->data.index == 0 && "todo: dual-source blending");
381bf215546Sopenharmony_ci   assert(loc == FRAG_RESULT_DATA0 && "todo: MRT");
382bf215546Sopenharmony_ci   unsigned rt = (loc - FRAG_RESULT_DATA0);
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci   /* TODO: Reverse-engineer interactions with MRT */
385bf215546Sopenharmony_ci   agx_writeout(b, 0xC200);
386bf215546Sopenharmony_ci   agx_writeout(b, 0x0008);
387bf215546Sopenharmony_ci   b->shader->did_writeout = true;
388bf215546Sopenharmony_ci   b->shader->out->reads_tib = true;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   agx_index vec = agx_vec_for_dest(b->shader, &instr->dest);
391bf215546Sopenharmony_ci   agx_ld_tile_to(b, vec, b->shader->key->fs.tib_formats[rt]);
392bf215546Sopenharmony_ci   agx_emit_split(b, dests, vec, 4);
393bf215546Sopenharmony_ci}
394bf215546Sopenharmony_ci
395bf215546Sopenharmony_cistatic enum agx_format
396bf215546Sopenharmony_ciagx_format_for_bits(unsigned bits)
397bf215546Sopenharmony_ci{
398bf215546Sopenharmony_ci   switch (bits) {
399bf215546Sopenharmony_ci   case 8: return AGX_FORMAT_I8;
400bf215546Sopenharmony_ci   case 16: return AGX_FORMAT_I16;
401bf215546Sopenharmony_ci   case 32: return AGX_FORMAT_I32;
402bf215546Sopenharmony_ci   default: unreachable("Invalid bit size for load/store");
403bf215546Sopenharmony_ci   }
404bf215546Sopenharmony_ci}
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_cistatic agx_instr *
407bf215546Sopenharmony_ciagx_emit_load_ubo(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr)
408bf215546Sopenharmony_ci{
409bf215546Sopenharmony_ci   bool kernel_input = (instr->intrinsic == nir_intrinsic_load_kernel_input);
410bf215546Sopenharmony_ci   nir_src *offset = nir_get_io_offset_src(instr);
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci   if (!kernel_input && !nir_src_is_const(instr->src[0]))
413bf215546Sopenharmony_ci      unreachable("todo: indirect UBO access");
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci   /* UBO blocks are specified (kernel inputs are always 0) */
416bf215546Sopenharmony_ci   uint32_t block = kernel_input ? 0 : nir_src_as_uint(instr->src[0]);
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci   /* Each UBO has a 64-bit = 4 x 16-bit address */
419bf215546Sopenharmony_ci   unsigned num_ubos = b->shader->nir->info.num_ubos;
420bf215546Sopenharmony_ci   unsigned base_length = (num_ubos * 4);
421bf215546Sopenharmony_ci   unsigned index = block * 4; /* 16 bit units */
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   /* Lookup the base address (TODO: indirection) */
424bf215546Sopenharmony_ci   agx_index base = agx_indexed_sysval(b->shader,
425bf215546Sopenharmony_ci                                       AGX_PUSH_UBO_BASES, AGX_SIZE_64,
426bf215546Sopenharmony_ci                                       index, base_length);
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci   /* Load the data */
429bf215546Sopenharmony_ci   assert(instr->num_components <= 4);
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci   agx_device_load_to(b, dst, base, agx_src_index(offset),
432bf215546Sopenharmony_ci                      agx_format_for_bits(nir_dest_bit_size(instr->dest)),
433bf215546Sopenharmony_ci                      BITFIELD_MASK(instr->num_components), 0);
434bf215546Sopenharmony_ci   agx_wait(b, 0);
435bf215546Sopenharmony_ci   agx_emit_cached_split(b, dst, instr->num_components);
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   return NULL;
438bf215546Sopenharmony_ci}
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_cistatic void
441bf215546Sopenharmony_ciagx_emit_load_frag_coord(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
442bf215546Sopenharmony_ci{
443bf215546Sopenharmony_ci   /* xy */
444bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; ++i) {
445bf215546Sopenharmony_ci      dests[i] = agx_fadd(b, agx_convert(b, agx_immediate(AGX_CONVERT_U32_TO_F),
446bf215546Sopenharmony_ci               agx_get_sr(b, 32, AGX_SR_THREAD_POSITION_IN_GRID_X + i),
447bf215546Sopenharmony_ci               AGX_ROUND_RTE), agx_immediate_f(0.5f));
448bf215546Sopenharmony_ci   }
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci   dests[2] = agx_ld_vary(b, agx_immediate(1), 1, false); /* z */
451bf215546Sopenharmony_ci   dests[3] = agx_ld_vary(b, agx_immediate(0), 1, false); /* w */
452bf215546Sopenharmony_ci}
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_cistatic agx_instr *
455bf215546Sopenharmony_ciagx_blend_const(agx_builder *b, agx_index dst, unsigned comp)
456bf215546Sopenharmony_ci{
457bf215546Sopenharmony_ci     agx_index val = agx_indexed_sysval(b->shader,
458bf215546Sopenharmony_ci           AGX_PUSH_BLEND_CONST, AGX_SIZE_32, comp * 2, 4 * 2);
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci     return agx_mov_to(b, dst, val);
461bf215546Sopenharmony_ci}
462bf215546Sopenharmony_ci
463bf215546Sopenharmony_ci/*
464bf215546Sopenharmony_ci * Demoting a helper invocation is logically equivalent to zeroing the sample
465bf215546Sopenharmony_ci * mask. Metal implement discard as such.
466bf215546Sopenharmony_ci *
467bf215546Sopenharmony_ci * XXX: Actually, Metal's "discard" is a demote, and what is implemented here
468bf215546Sopenharmony_ci * is a demote. There might be a better way to implement this to get correct
469bf215546Sopenharmony_ci * helper invocation semantics. For now, I'm kicking the can down the road.
470bf215546Sopenharmony_ci */
471bf215546Sopenharmony_cistatic agx_instr *
472bf215546Sopenharmony_ciagx_emit_discard(agx_builder *b, nir_intrinsic_instr *instr)
473bf215546Sopenharmony_ci{
474bf215546Sopenharmony_ci   agx_writeout(b, 0xC200);
475bf215546Sopenharmony_ci   agx_writeout(b, 0x0001);
476bf215546Sopenharmony_ci   b->shader->did_writeout = true;
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci   b->shader->out->writes_sample_mask = true;
479bf215546Sopenharmony_ci   return agx_sample_mask(b, agx_immediate(0));
480bf215546Sopenharmony_ci}
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_cistatic agx_instr *
483bf215546Sopenharmony_ciagx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
484bf215546Sopenharmony_ci{
485bf215546Sopenharmony_ci  agx_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest ?
486bf215546Sopenharmony_ci     agx_dest_index(&instr->dest) : agx_null();
487bf215546Sopenharmony_ci  gl_shader_stage stage = b->shader->stage;
488bf215546Sopenharmony_ci  agx_index dests[4] = { agx_null() };
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci  switch (instr->intrinsic) {
491bf215546Sopenharmony_ci  case nir_intrinsic_load_barycentric_pixel:
492bf215546Sopenharmony_ci  case nir_intrinsic_load_barycentric_centroid:
493bf215546Sopenharmony_ci  case nir_intrinsic_load_barycentric_sample:
494bf215546Sopenharmony_ci  case nir_intrinsic_load_barycentric_at_sample:
495bf215546Sopenharmony_ci  case nir_intrinsic_load_barycentric_at_offset:
496bf215546Sopenharmony_ci     /* handled later via load_vary */
497bf215546Sopenharmony_ci     return NULL;
498bf215546Sopenharmony_ci  case nir_intrinsic_load_interpolated_input:
499bf215546Sopenharmony_ci     assert(stage == MESA_SHADER_FRAGMENT);
500bf215546Sopenharmony_ci     agx_emit_load_vary(b, dests, instr);
501bf215546Sopenharmony_ci     break;
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_ci  case nir_intrinsic_load_input:
504bf215546Sopenharmony_ci     if (stage == MESA_SHADER_FRAGMENT)
505bf215546Sopenharmony_ci        agx_emit_load_vary_flat(b, dests, instr);
506bf215546Sopenharmony_ci     else if (stage == MESA_SHADER_VERTEX)
507bf215546Sopenharmony_ci        agx_emit_load_attr(b, dests, instr);
508bf215546Sopenharmony_ci     else
509bf215546Sopenharmony_ci        unreachable("Unsupported shader stage");
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci     break;
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci  case nir_intrinsic_store_output:
514bf215546Sopenharmony_ci     if (stage == MESA_SHADER_FRAGMENT)
515bf215546Sopenharmony_ci        return agx_emit_fragment_out(b, instr);
516bf215546Sopenharmony_ci     else if (stage == MESA_SHADER_VERTEX)
517bf215546Sopenharmony_ci        return agx_emit_store_vary(b, instr);
518bf215546Sopenharmony_ci     else
519bf215546Sopenharmony_ci        unreachable("Unsupported shader stage");
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci  case nir_intrinsic_load_output:
522bf215546Sopenharmony_ci     assert(stage == MESA_SHADER_FRAGMENT);
523bf215546Sopenharmony_ci     agx_emit_load_tile(b, dests, instr);
524bf215546Sopenharmony_ci     break;
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci  case nir_intrinsic_load_ubo:
527bf215546Sopenharmony_ci  case nir_intrinsic_load_kernel_input:
528bf215546Sopenharmony_ci     return agx_emit_load_ubo(b, dst, instr);
529bf215546Sopenharmony_ci
530bf215546Sopenharmony_ci  case nir_intrinsic_load_frag_coord:
531bf215546Sopenharmony_ci     agx_emit_load_frag_coord(b, dests, instr);
532bf215546Sopenharmony_ci     break;
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci  case nir_intrinsic_discard:
535bf215546Sopenharmony_ci     return agx_emit_discard(b, instr);
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci  case nir_intrinsic_load_back_face_agx:
538bf215546Sopenharmony_ci     return agx_get_sr_to(b, dst, AGX_SR_BACKFACING);
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci  case nir_intrinsic_load_vertex_id:
541bf215546Sopenharmony_ci     return agx_mov_to(b, dst, agx_abs(agx_register(10, AGX_SIZE_32)));
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci  case nir_intrinsic_load_instance_id:
544bf215546Sopenharmony_ci     return agx_mov_to(b, dst, agx_abs(agx_register(12, AGX_SIZE_32)));
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_ci  case nir_intrinsic_load_blend_const_color_r_float: return agx_blend_const(b, dst, 0);
547bf215546Sopenharmony_ci  case nir_intrinsic_load_blend_const_color_g_float: return agx_blend_const(b, dst, 1);
548bf215546Sopenharmony_ci  case nir_intrinsic_load_blend_const_color_b_float: return agx_blend_const(b, dst, 2);
549bf215546Sopenharmony_ci  case nir_intrinsic_load_blend_const_color_a_float: return agx_blend_const(b, dst, 3);
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci  default:
552bf215546Sopenharmony_ci       fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name);
553bf215546Sopenharmony_ci       unreachable("Unhandled intrinsic");
554bf215546Sopenharmony_ci  }
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_ci  /* If we got here, there is a vector destination for the intrinsic composed
557bf215546Sopenharmony_ci   * of separate scalars. Its components are specified separately in the dests
558bf215546Sopenharmony_ci   * array. We need to combine them so the vector destination itself is valid.
559bf215546Sopenharmony_ci   * If only individual components are accessed, this combine will be dead code
560bf215546Sopenharmony_ci   * eliminated.
561bf215546Sopenharmony_ci   */
562bf215546Sopenharmony_ci  return agx_emit_combine_to(b, dst, dests[0], dests[1], dests[2], dests[3]);
563bf215546Sopenharmony_ci}
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_cistatic agx_index
566bf215546Sopenharmony_ciagx_alu_src_index(agx_builder *b, nir_alu_src src)
567bf215546Sopenharmony_ci{
568bf215546Sopenharmony_ci   /* Check well-formedness of the input NIR */
569bf215546Sopenharmony_ci   ASSERTED unsigned bitsize = nir_src_bit_size(src.src);
570bf215546Sopenharmony_ci   unsigned comps = nir_src_num_components(src.src);
571bf215546Sopenharmony_ci   unsigned channel = src.swizzle[0];
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_ci   assert(bitsize == 1 || bitsize == 16 || bitsize == 32 || bitsize == 64);
574bf215546Sopenharmony_ci   assert(!(src.negate || src.abs));
575bf215546Sopenharmony_ci   assert(channel < comps);
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci   agx_index idx = agx_src_index(&src.src);
578bf215546Sopenharmony_ci
579bf215546Sopenharmony_ci   /* We only deal with scalars, extract a single scalar if needed */
580bf215546Sopenharmony_ci   if (comps > 1)
581bf215546Sopenharmony_ci      return agx_emit_extract(b, idx, channel);
582bf215546Sopenharmony_ci   else
583bf215546Sopenharmony_ci      return idx;
584bf215546Sopenharmony_ci}
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_cistatic agx_instr *
587bf215546Sopenharmony_ciagx_emit_alu_bool(agx_builder *b, nir_op op,
588bf215546Sopenharmony_ci      agx_index dst, agx_index s0, agx_index s1, agx_index s2)
589bf215546Sopenharmony_ci{
590bf215546Sopenharmony_ci   /* Handle 1-bit bools as zero/nonzero rather than specifically 0/1 or 0/~0.
591bf215546Sopenharmony_ci    * This will give the optimizer flexibility. */
592bf215546Sopenharmony_ci   agx_index f = agx_immediate(0);
593bf215546Sopenharmony_ci   agx_index t = agx_immediate(0x1);
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci   switch (op) {
596bf215546Sopenharmony_ci   case nir_op_feq: return agx_fcmpsel_to(b, dst, s0, s1, t, f, AGX_FCOND_EQ);
597bf215546Sopenharmony_ci   case nir_op_flt: return agx_fcmpsel_to(b, dst, s0, s1, t, f, AGX_FCOND_LT);
598bf215546Sopenharmony_ci   case nir_op_fge: return agx_fcmpsel_to(b, dst, s0, s1, t, f, AGX_FCOND_GE);
599bf215546Sopenharmony_ci   case nir_op_fneu: return agx_fcmpsel_to(b, dst, s0, s1, f, t, AGX_FCOND_EQ);
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   case nir_op_ieq: return agx_icmpsel_to(b, dst, s0, s1, t, f, AGX_ICOND_UEQ);
602bf215546Sopenharmony_ci   case nir_op_ine: return agx_icmpsel_to(b, dst, s0, s1, f, t, AGX_ICOND_UEQ);
603bf215546Sopenharmony_ci   case nir_op_ilt: return agx_icmpsel_to(b, dst, s0, s1, t, f, AGX_ICOND_SLT);
604bf215546Sopenharmony_ci   case nir_op_ige: return agx_icmpsel_to(b, dst, s0, s1, f, t, AGX_ICOND_SLT);
605bf215546Sopenharmony_ci   case nir_op_ult: return agx_icmpsel_to(b, dst, s0, s1, t, f, AGX_ICOND_ULT);
606bf215546Sopenharmony_ci   case nir_op_uge: return agx_icmpsel_to(b, dst, s0, s1, f, t, AGX_ICOND_ULT);
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   case nir_op_mov: return agx_mov_to(b, dst, s0);
609bf215546Sopenharmony_ci   case nir_op_iand: return agx_and_to(b, dst, s0, s1);
610bf215546Sopenharmony_ci   case nir_op_ior: return agx_or_to(b, dst, s0, s1);
611bf215546Sopenharmony_ci   case nir_op_ixor: return agx_xor_to(b, dst, s0, s1);
612bf215546Sopenharmony_ci   case nir_op_inot: return agx_xor_to(b, dst, s0, t);
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci   case nir_op_f2b1: return agx_fcmpsel_to(b, dst, s0, f, f, t, AGX_FCOND_EQ);
615bf215546Sopenharmony_ci   case nir_op_i2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ);
616bf215546Sopenharmony_ci   case nir_op_b2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ);
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci   case nir_op_bcsel:
619bf215546Sopenharmony_ci      return agx_icmpsel_to(b, dst, s0, f, s2, s1, AGX_ICOND_UEQ);
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci   default:
622bf215546Sopenharmony_ci      fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[op].name);
623bf215546Sopenharmony_ci      unreachable("Unhandled boolean ALU instruction");
624bf215546Sopenharmony_ci   }
625bf215546Sopenharmony_ci}
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_cistatic agx_instr *
628bf215546Sopenharmony_ciagx_emit_alu(agx_builder *b, nir_alu_instr *instr)
629bf215546Sopenharmony_ci{
630bf215546Sopenharmony_ci   unsigned srcs = nir_op_infos[instr->op].num_inputs;
631bf215546Sopenharmony_ci   unsigned sz = nir_dest_bit_size(instr->dest.dest);
632bf215546Sopenharmony_ci   unsigned src_sz = srcs ? nir_src_bit_size(instr->src[0].src) : 0;
633bf215546Sopenharmony_ci   ASSERTED unsigned comps = nir_dest_num_components(instr->dest.dest);
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_ci   assert(comps == 1 || nir_op_is_vec(instr->op));
636bf215546Sopenharmony_ci   assert(sz == 1 || sz == 16 || sz == 32 || sz == 64);
637bf215546Sopenharmony_ci
638bf215546Sopenharmony_ci   agx_index dst = agx_dest_index(&instr->dest.dest);
639bf215546Sopenharmony_ci   agx_index s0 = srcs > 0 ? agx_alu_src_index(b, instr->src[0]) : agx_null();
640bf215546Sopenharmony_ci   agx_index s1 = srcs > 1 ? agx_alu_src_index(b, instr->src[1]) : agx_null();
641bf215546Sopenharmony_ci   agx_index s2 = srcs > 2 ? agx_alu_src_index(b, instr->src[2]) : agx_null();
642bf215546Sopenharmony_ci   agx_index s3 = srcs > 3 ? agx_alu_src_index(b, instr->src[3]) : agx_null();
643bf215546Sopenharmony_ci
644bf215546Sopenharmony_ci   /* 1-bit bools are a bit special, only handle with select ops */
645bf215546Sopenharmony_ci   if (sz == 1)
646bf215546Sopenharmony_ci      return agx_emit_alu_bool(b, instr->op, dst, s0, s1, s2);
647bf215546Sopenharmony_ci
648bf215546Sopenharmony_ci#define UNOP(nop, aop) \
649bf215546Sopenharmony_ci   case nir_op_ ## nop: return agx_ ## aop ## _to(b, dst, s0);
650bf215546Sopenharmony_ci#define BINOP(nop, aop) \
651bf215546Sopenharmony_ci   case nir_op_ ## nop: return agx_ ## aop ## _to(b, dst, s0, s1);
652bf215546Sopenharmony_ci#define TRIOP(nop, aop) \
653bf215546Sopenharmony_ci   case nir_op_ ## nop: return agx_ ## aop ## _to(b, dst, s0, s1, s2);
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_ci   switch (instr->op) {
656bf215546Sopenharmony_ci   BINOP(fadd, fadd);
657bf215546Sopenharmony_ci   BINOP(fmul, fmul);
658bf215546Sopenharmony_ci   TRIOP(ffma, fma);
659bf215546Sopenharmony_ci
660bf215546Sopenharmony_ci   UNOP(f2f16, fmov);
661bf215546Sopenharmony_ci   UNOP(f2f32, fmov);
662bf215546Sopenharmony_ci   UNOP(fround_even, roundeven);
663bf215546Sopenharmony_ci   UNOP(ftrunc, trunc);
664bf215546Sopenharmony_ci   UNOP(ffloor, floor);
665bf215546Sopenharmony_ci   UNOP(fceil, ceil);
666bf215546Sopenharmony_ci   UNOP(frcp, rcp);
667bf215546Sopenharmony_ci   UNOP(frsq, rsqrt);
668bf215546Sopenharmony_ci   UNOP(flog2, log2);
669bf215546Sopenharmony_ci   UNOP(fexp2, exp2);
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_ci   UNOP(fddx, dfdx);
672bf215546Sopenharmony_ci   UNOP(fddx_coarse, dfdx);
673bf215546Sopenharmony_ci   UNOP(fddx_fine, dfdx);
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_ci   UNOP(fddy, dfdy);
676bf215546Sopenharmony_ci   UNOP(fddy_coarse, dfdy);
677bf215546Sopenharmony_ci   UNOP(fddy_fine, dfdy);
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci   UNOP(mov, mov);
680bf215546Sopenharmony_ci   UNOP(u2u16, mov);
681bf215546Sopenharmony_ci   UNOP(u2u32, mov);
682bf215546Sopenharmony_ci   UNOP(inot, not);
683bf215546Sopenharmony_ci   BINOP(iand, and);
684bf215546Sopenharmony_ci   BINOP(ior, or);
685bf215546Sopenharmony_ci   BINOP(ixor, xor);
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci   case nir_op_fsqrt: return agx_fmul_to(b, dst, s0, agx_srsqrt(b, s0));
688bf215546Sopenharmony_ci   case nir_op_fsub: return agx_fadd_to(b, dst, s0, agx_neg(s1));
689bf215546Sopenharmony_ci   case nir_op_fabs: return agx_fmov_to(b, dst, agx_abs(s0));
690bf215546Sopenharmony_ci   case nir_op_fneg: return agx_fmov_to(b, dst, agx_neg(s0));
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci   case nir_op_fmin: return agx_fcmpsel_to(b, dst, s0, s1, s0, s1, AGX_FCOND_LTN);
693bf215546Sopenharmony_ci   case nir_op_fmax: return agx_fcmpsel_to(b, dst, s0, s1, s0, s1, AGX_FCOND_GTN);
694bf215546Sopenharmony_ci   case nir_op_imin: return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_SLT);
695bf215546Sopenharmony_ci   case nir_op_imax: return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_SGT);
696bf215546Sopenharmony_ci   case nir_op_umin: return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_ULT);
697bf215546Sopenharmony_ci   case nir_op_umax: return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_UGT);
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci   case nir_op_iadd: return agx_iadd_to(b, dst, s0, s1, 0);
700bf215546Sopenharmony_ci   case nir_op_isub: return agx_iadd_to(b, dst, s0, agx_neg(s1), 0);
701bf215546Sopenharmony_ci   case nir_op_ineg: return agx_iadd_to(b, dst, agx_zero(), agx_neg(s0), 0);
702bf215546Sopenharmony_ci   case nir_op_imul: return agx_imad_to(b, dst, s0, s1, agx_zero(), 0);
703bf215546Sopenharmony_ci
704bf215546Sopenharmony_ci   case nir_op_ishl: return agx_bfi_to(b, dst, agx_zero(), s0, s1, 0);
705bf215546Sopenharmony_ci   case nir_op_ushr: return agx_ushr_to(b, dst, s0, s1);
706bf215546Sopenharmony_ci   case nir_op_ishr: return agx_asr_to(b, dst, s0, s1);
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci   case nir_op_bcsel:
709bf215546Sopenharmony_ci      return agx_icmpsel_to(b, dst, s0, agx_zero(), s2, s1, AGX_ICOND_UEQ);
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_ci   case nir_op_b2i32:
712bf215546Sopenharmony_ci   case nir_op_b2i16:
713bf215546Sopenharmony_ci      return agx_icmpsel_to(b, dst, s0, agx_zero(), agx_zero(), agx_immediate(1), AGX_ICOND_UEQ);
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci   case nir_op_b2f16:
716bf215546Sopenharmony_ci   case nir_op_b2f32:
717bf215546Sopenharmony_ci   {
718bf215546Sopenharmony_ci      /* At this point, boolean is just zero/nonzero, so compare with zero */
719bf215546Sopenharmony_ci      agx_index one = (sz == 16) ?
720bf215546Sopenharmony_ci         agx_mov_imm(b, 16, _mesa_float_to_half(1.0)) :
721bf215546Sopenharmony_ci         agx_mov_imm(b, 32, fui(1.0));
722bf215546Sopenharmony_ci
723bf215546Sopenharmony_ci      agx_index zero = agx_zero();
724bf215546Sopenharmony_ci
725bf215546Sopenharmony_ci      return agx_fcmpsel_to(b, dst, s0, zero, zero, one, AGX_FCOND_EQ);
726bf215546Sopenharmony_ci   }
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci   case nir_op_i2i32:
729bf215546Sopenharmony_ci   {
730bf215546Sopenharmony_ci      if (s0.size != AGX_SIZE_16)
731bf215546Sopenharmony_ci         unreachable("todo: more conversions");
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci      return agx_iadd_to(b, dst, s0, agx_zero(), 0);
734bf215546Sopenharmony_ci   }
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci   case nir_op_i2i16:
737bf215546Sopenharmony_ci   {
738bf215546Sopenharmony_ci      if (s0.size != AGX_SIZE_32)
739bf215546Sopenharmony_ci         unreachable("todo: more conversions");
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci      return agx_iadd_to(b, dst, s0, agx_zero(), 0);
742bf215546Sopenharmony_ci   }
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci   case nir_op_iadd_sat:
745bf215546Sopenharmony_ci   {
746bf215546Sopenharmony_ci      agx_instr *I = agx_iadd_to(b, dst, s0, s1, 0);
747bf215546Sopenharmony_ci      I->saturate = true;
748bf215546Sopenharmony_ci      return I;
749bf215546Sopenharmony_ci   }
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci   case nir_op_isub_sat:
752bf215546Sopenharmony_ci   {
753bf215546Sopenharmony_ci      agx_instr *I = agx_iadd_to(b, dst, s0, agx_neg(s1), 0);
754bf215546Sopenharmony_ci      I->saturate = true;
755bf215546Sopenharmony_ci      return I;
756bf215546Sopenharmony_ci   }
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci   case nir_op_uadd_sat:
759bf215546Sopenharmony_ci   {
760bf215546Sopenharmony_ci      agx_instr *I = agx_iadd_to(b, dst, agx_abs(s0), agx_abs(s1), 0);
761bf215546Sopenharmony_ci      I->saturate = true;
762bf215546Sopenharmony_ci      return I;
763bf215546Sopenharmony_ci   }
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ci   case nir_op_usub_sat:
766bf215546Sopenharmony_ci   {
767bf215546Sopenharmony_ci      agx_instr *I = agx_iadd_to(b, dst, agx_abs(s0), agx_neg(agx_abs(s1)), 0);
768bf215546Sopenharmony_ci      I->saturate = true;
769bf215546Sopenharmony_ci      return I;
770bf215546Sopenharmony_ci   }
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci   case nir_op_fsat:
773bf215546Sopenharmony_ci   {
774bf215546Sopenharmony_ci      agx_instr *I = agx_fadd_to(b, dst, s0, agx_negzero());
775bf215546Sopenharmony_ci      I->saturate = true;
776bf215546Sopenharmony_ci      return I;
777bf215546Sopenharmony_ci   }
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_ci   case nir_op_fsin_agx:
780bf215546Sopenharmony_ci   {
781bf215546Sopenharmony_ci      agx_index fixup = agx_sin_pt_1(b, s0);
782bf215546Sopenharmony_ci      agx_index sinc = agx_sin_pt_2(b, fixup);
783bf215546Sopenharmony_ci      return agx_fmul_to(b, dst, sinc, fixup);
784bf215546Sopenharmony_ci   }
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci   case nir_op_f2i16:
787bf215546Sopenharmony_ci      return agx_convert_to(b, dst,
788bf215546Sopenharmony_ci            agx_immediate(AGX_CONVERT_F_TO_S16), s0, AGX_ROUND_RTZ);
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_ci   case nir_op_f2i32:
791bf215546Sopenharmony_ci      return agx_convert_to(b, dst,
792bf215546Sopenharmony_ci            agx_immediate(AGX_CONVERT_F_TO_S32), s0, AGX_ROUND_RTZ);
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   case nir_op_f2u16:
795bf215546Sopenharmony_ci      return agx_convert_to(b, dst,
796bf215546Sopenharmony_ci            agx_immediate(AGX_CONVERT_F_TO_U16), s0, AGX_ROUND_RTZ);
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci   case nir_op_f2u32:
799bf215546Sopenharmony_ci      return agx_convert_to(b, dst,
800bf215546Sopenharmony_ci            agx_immediate(AGX_CONVERT_F_TO_U32), s0, AGX_ROUND_RTZ);
801bf215546Sopenharmony_ci
802bf215546Sopenharmony_ci   case nir_op_u2f16:
803bf215546Sopenharmony_ci   case nir_op_u2f32:
804bf215546Sopenharmony_ci   {
805bf215546Sopenharmony_ci      if (src_sz == 64)
806bf215546Sopenharmony_ci         unreachable("64-bit conversions unimplemented");
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci      enum agx_convert mode =
809bf215546Sopenharmony_ci         (src_sz == 32) ? AGX_CONVERT_U32_TO_F :
810bf215546Sopenharmony_ci         (src_sz == 16) ? AGX_CONVERT_U16_TO_F :
811bf215546Sopenharmony_ci                          AGX_CONVERT_U8_TO_F;
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci      return agx_convert_to(b, dst, agx_immediate(mode), s0, AGX_ROUND_RTE);
814bf215546Sopenharmony_ci   }
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci   case nir_op_i2f16:
817bf215546Sopenharmony_ci   case nir_op_i2f32:
818bf215546Sopenharmony_ci   {
819bf215546Sopenharmony_ci      if (src_sz == 64)
820bf215546Sopenharmony_ci         unreachable("64-bit conversions unimplemented");
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_ci      enum agx_convert mode =
823bf215546Sopenharmony_ci         (src_sz == 32) ? AGX_CONVERT_S32_TO_F :
824bf215546Sopenharmony_ci         (src_sz == 16) ? AGX_CONVERT_S16_TO_F :
825bf215546Sopenharmony_ci                          AGX_CONVERT_S8_TO_F;
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci      return agx_convert_to(b, dst, agx_immediate(mode), s0, AGX_ROUND_RTE);
828bf215546Sopenharmony_ci   }
829bf215546Sopenharmony_ci
830bf215546Sopenharmony_ci   case nir_op_vec2:
831bf215546Sopenharmony_ci   case nir_op_vec3:
832bf215546Sopenharmony_ci   case nir_op_vec4:
833bf215546Sopenharmony_ci      return agx_emit_combine_to(b, dst, s0, s1, s2, s3);
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci   case nir_op_vec8:
836bf215546Sopenharmony_ci   case nir_op_vec16:
837bf215546Sopenharmony_ci      unreachable("should've been lowered");
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_ci   default:
840bf215546Sopenharmony_ci      fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
841bf215546Sopenharmony_ci      unreachable("Unhandled ALU instruction");
842bf215546Sopenharmony_ci   }
843bf215546Sopenharmony_ci}
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_cistatic enum agx_dim
846bf215546Sopenharmony_ciagx_tex_dim(enum glsl_sampler_dim dim, bool array)
847bf215546Sopenharmony_ci{
848bf215546Sopenharmony_ci   switch (dim) {
849bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_1D:
850bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_BUF:
851bf215546Sopenharmony_ci      return array ? AGX_DIM_TEX_1D_ARRAY : AGX_DIM_TEX_1D;
852bf215546Sopenharmony_ci
853bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_2D:
854bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_RECT:
855bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_EXTERNAL:
856bf215546Sopenharmony_ci      return array ? AGX_DIM_TEX_2D_ARRAY : AGX_DIM_TEX_2D;
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_MS:
859bf215546Sopenharmony_ci      assert(!array && "multisampled arrays unsupported");
860bf215546Sopenharmony_ci      return AGX_DIM_TEX_2D_MS;
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_3D:
863bf215546Sopenharmony_ci      assert(!array && "3D arrays unsupported");
864bf215546Sopenharmony_ci      return AGX_DIM_TEX_3D;
865bf215546Sopenharmony_ci
866bf215546Sopenharmony_ci   case GLSL_SAMPLER_DIM_CUBE:
867bf215546Sopenharmony_ci      return array ? AGX_DIM_TEX_CUBE_ARRAY : AGX_DIM_TEX_CUBE;
868bf215546Sopenharmony_ci
869bf215546Sopenharmony_ci   default:
870bf215546Sopenharmony_ci      unreachable("Invalid sampler dim\n");
871bf215546Sopenharmony_ci   }
872bf215546Sopenharmony_ci}
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_cistatic enum agx_lod_mode
875bf215546Sopenharmony_ciagx_lod_mode_for_nir(nir_texop op)
876bf215546Sopenharmony_ci{
877bf215546Sopenharmony_ci   switch (op) {
878bf215546Sopenharmony_ci   case nir_texop_tex: return AGX_LOD_MODE_AUTO_LOD;
879bf215546Sopenharmony_ci   case nir_texop_txb: return AGX_LOD_MODE_AUTO_LOD_BIAS;
880bf215546Sopenharmony_ci   case nir_texop_txl: return AGX_LOD_MODE_LOD_MIN;
881bf215546Sopenharmony_ci   default: unreachable("Unhandled texture op");
882bf215546Sopenharmony_ci   }
883bf215546Sopenharmony_ci}
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_cistatic void
886bf215546Sopenharmony_ciagx_emit_tex(agx_builder *b, nir_tex_instr *instr)
887bf215546Sopenharmony_ci{
888bf215546Sopenharmony_ci   switch (instr->op) {
889bf215546Sopenharmony_ci   case nir_texop_tex:
890bf215546Sopenharmony_ci   case nir_texop_txl:
891bf215546Sopenharmony_ci   case nir_texop_txb:
892bf215546Sopenharmony_ci      break;
893bf215546Sopenharmony_ci   default:
894bf215546Sopenharmony_ci      unreachable("Unhandled texture op");
895bf215546Sopenharmony_ci   }
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci   agx_index coords = agx_null(),
898bf215546Sopenharmony_ci             texture = agx_immediate(instr->texture_index),
899bf215546Sopenharmony_ci             sampler = agx_immediate(instr->sampler_index),
900bf215546Sopenharmony_ci             lod = agx_immediate(0),
901bf215546Sopenharmony_ci             offset = agx_null();
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci   for (unsigned i = 0; i < instr->num_srcs; ++i) {
904bf215546Sopenharmony_ci      agx_index index = agx_src_index(&instr->src[i].src);
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci      switch (instr->src[i].src_type) {
907bf215546Sopenharmony_ci      case nir_tex_src_coord:
908bf215546Sopenharmony_ci         coords = index;
909bf215546Sopenharmony_ci
910bf215546Sopenharmony_ci         /* Array textures are indexed by a floating-point in NIR, but by an
911bf215546Sopenharmony_ci          * integer in AGX. Convert the array index from float-to-int for array
912bf215546Sopenharmony_ci          * textures. The array index is the last source in NIR. The conversion
913bf215546Sopenharmony_ci          * is according to the rule from 8.9 ("Texture Functions") of the GLSL
914bf215546Sopenharmony_ci          * ES 3.20 specification:
915bf215546Sopenharmony_ci          *
916bf215546Sopenharmony_ci          *     max(0, min(d - 1, floor(layer + 0.5))) =
917bf215546Sopenharmony_ci          *     max(0, min(d - 1, f32_to_u32(layer + 0.5))) =
918bf215546Sopenharmony_ci          *     min(d - 1, f32_to_u32(layer + 0.5))
919bf215546Sopenharmony_ci          */
920bf215546Sopenharmony_ci         if (instr->is_array) {
921bf215546Sopenharmony_ci            unsigned nr = nir_src_num_components(instr->src[i].src);
922bf215546Sopenharmony_ci            agx_index channels[4] = {};
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci            for (unsigned i = 0; i < nr; ++i)
925bf215546Sopenharmony_ci               channels[i] = agx_emit_extract(b, index, i);
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci            agx_index layer = agx_fadd(b, channels[nr - 1],
928bf215546Sopenharmony_ci                                          agx_immediate_f(0.5f));
929bf215546Sopenharmony_ci
930bf215546Sopenharmony_ci            agx_index d1 = agx_indexed_sysval(b->shader,
931bf215546Sopenharmony_ci                  AGX_PUSH_ARRAY_SIZE_MINUS_1, AGX_SIZE_16,
932bf215546Sopenharmony_ci                  instr->texture_index, 1);
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci            layer = agx_convert(b, agx_immediate(AGX_CONVERT_F_TO_U32), layer,
935bf215546Sopenharmony_ci                                   AGX_ROUND_RTZ);
936bf215546Sopenharmony_ci
937bf215546Sopenharmony_ci            agx_index layer16 = agx_temp(b->shader, AGX_SIZE_16);
938bf215546Sopenharmony_ci            agx_mov_to(b, layer16, layer);
939bf215546Sopenharmony_ci
940bf215546Sopenharmony_ci            layer = agx_icmpsel(b, layer16, d1, layer16, d1, AGX_ICOND_ULT);
941bf215546Sopenharmony_ci
942bf215546Sopenharmony_ci            agx_index layer32 = agx_temp(b->shader, AGX_SIZE_32);
943bf215546Sopenharmony_ci            agx_mov_to(b, layer32, layer);
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci            channels[nr - 1] = layer32;
946bf215546Sopenharmony_ci            coords = agx_p_combine(b, channels[0], channels[1], channels[2], channels[3]);
947bf215546Sopenharmony_ci         } else {
948bf215546Sopenharmony_ci            coords = index;
949bf215546Sopenharmony_ci         }
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_ci         break;
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci      case nir_tex_src_lod:
954bf215546Sopenharmony_ci      case nir_tex_src_bias:
955bf215546Sopenharmony_ci         lod = index;
956bf215546Sopenharmony_ci         break;
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci      case nir_tex_src_ms_index:
959bf215546Sopenharmony_ci      case nir_tex_src_offset:
960bf215546Sopenharmony_ci      case nir_tex_src_comparator:
961bf215546Sopenharmony_ci      case nir_tex_src_texture_offset:
962bf215546Sopenharmony_ci      case nir_tex_src_sampler_offset:
963bf215546Sopenharmony_ci      default:
964bf215546Sopenharmony_ci         unreachable("todo");
965bf215546Sopenharmony_ci      }
966bf215546Sopenharmony_ci   }
967bf215546Sopenharmony_ci
968bf215546Sopenharmony_ci   agx_index dst = agx_dest_index(&instr->dest);
969bf215546Sopenharmony_ci   agx_texture_sample_to(b, dst, coords, lod, texture, sampler, offset,
970bf215546Sopenharmony_ci         agx_tex_dim(instr->sampler_dim, instr->is_array),
971bf215546Sopenharmony_ci         agx_lod_mode_for_nir(instr->op),
972bf215546Sopenharmony_ci         0xF, /* TODO: wrmask */
973bf215546Sopenharmony_ci         0);
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci   agx_wait(b, 0);
976bf215546Sopenharmony_ci   agx_emit_cached_split(b, dst, 4);
977bf215546Sopenharmony_ci}
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci/*
980bf215546Sopenharmony_ci * Mark the logical end of the current block by emitting a p_logical_end marker.
981bf215546Sopenharmony_ci * Note if an unconditional jump is emitted (for instance, to break out of a
982bf215546Sopenharmony_ci * loop from inside an if), the block has already reached its logical end so we
983bf215546Sopenharmony_ci * don't re-emit p_logical_end. The validator checks this, and correct register
984bf215546Sopenharmony_ci * allocation depends on it.
985bf215546Sopenharmony_ci */
986bf215546Sopenharmony_cistatic void
987bf215546Sopenharmony_ciagx_emit_logical_end(agx_builder *b)
988bf215546Sopenharmony_ci{
989bf215546Sopenharmony_ci   if (!b->shader->current_block->unconditional_jumps)
990bf215546Sopenharmony_ci      agx_p_logical_end(b);
991bf215546Sopenharmony_ci}
992bf215546Sopenharmony_ci
993bf215546Sopenharmony_ci/* NIR loops are treated as a pair of AGX loops:
994bf215546Sopenharmony_ci *
995bf215546Sopenharmony_ci *    do {
996bf215546Sopenharmony_ci *       do {
997bf215546Sopenharmony_ci *          ...
998bf215546Sopenharmony_ci *       } while (0);
999bf215546Sopenharmony_ci *    } while (cond);
1000bf215546Sopenharmony_ci *
1001bf215546Sopenharmony_ci * By manipulating the nesting counter (r0l), we may break out of nested loops,
1002bf215546Sopenharmony_ci * so under the model, both break and continue may be implemented as breaks,
1003bf215546Sopenharmony_ci * where break breaks out of the outer loop (2 layers) and continue breaks out
1004bf215546Sopenharmony_ci * of the inner loop (1 layer).
1005bf215546Sopenharmony_ci *
1006bf215546Sopenharmony_ci * After manipulating the nesting counter directly, pop_exec #0 must be used to
1007bf215546Sopenharmony_ci * flush the update to the execution mask.
1008bf215546Sopenharmony_ci */
1009bf215546Sopenharmony_ci
1010bf215546Sopenharmony_cistatic void
1011bf215546Sopenharmony_ciagx_emit_jump(agx_builder *b, nir_jump_instr *instr)
1012bf215546Sopenharmony_ci{
1013bf215546Sopenharmony_ci   agx_context *ctx = b->shader;
1014bf215546Sopenharmony_ci   assert (instr->type == nir_jump_break || instr->type == nir_jump_continue);
1015bf215546Sopenharmony_ci
1016bf215546Sopenharmony_ci   /* Break out of either one or two loops */
1017bf215546Sopenharmony_ci   unsigned nestings = b->shader->loop_nesting;
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_ci   if (instr->type == nir_jump_continue) {
1020bf215546Sopenharmony_ci      nestings += 1;
1021bf215546Sopenharmony_ci      agx_block_add_successor(ctx->current_block, ctx->continue_block);
1022bf215546Sopenharmony_ci   } else if (instr->type == nir_jump_break) {
1023bf215546Sopenharmony_ci      nestings += 2;
1024bf215546Sopenharmony_ci      agx_block_add_successor(ctx->current_block, ctx->break_block);
1025bf215546Sopenharmony_ci   }
1026bf215546Sopenharmony_ci
1027bf215546Sopenharmony_ci   /* Update the counter and flush */
1028bf215546Sopenharmony_ci   agx_index r0l = agx_register(0, false);
1029bf215546Sopenharmony_ci   agx_mov_to(b, r0l, agx_immediate(nestings));
1030bf215546Sopenharmony_ci
1031bf215546Sopenharmony_ci   /* Jumps must come at the end of a block */
1032bf215546Sopenharmony_ci   agx_emit_logical_end(b);
1033bf215546Sopenharmony_ci   agx_pop_exec(b, 0);
1034bf215546Sopenharmony_ci
1035bf215546Sopenharmony_ci   ctx->current_block->unconditional_jumps = true;
1036bf215546Sopenharmony_ci}
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_cistatic void
1039bf215546Sopenharmony_ciagx_emit_phi(agx_builder *b, nir_phi_instr *instr)
1040bf215546Sopenharmony_ci{
1041bf215546Sopenharmony_ci   agx_instr *I = agx_phi_to(b, agx_dest_index(&instr->dest));
1042bf215546Sopenharmony_ci
1043bf215546Sopenharmony_ci   /* Deferred */
1044bf215546Sopenharmony_ci   I->phi = instr;
1045bf215546Sopenharmony_ci}
1046bf215546Sopenharmony_ci
1047bf215546Sopenharmony_ci/* Look up the AGX block corresponding to a given NIR block. Used when
1048bf215546Sopenharmony_ci * translating phi nodes after emitting all blocks.
1049bf215546Sopenharmony_ci */
1050bf215546Sopenharmony_cistatic agx_block *
1051bf215546Sopenharmony_ciagx_from_nir_block(agx_context *ctx, nir_block *block)
1052bf215546Sopenharmony_ci{
1053bf215546Sopenharmony_ci   return ctx->indexed_nir_blocks[block->index];
1054bf215546Sopenharmony_ci}
1055bf215546Sopenharmony_ci
1056bf215546Sopenharmony_cistatic void
1057bf215546Sopenharmony_ciagx_emit_phi_deferred(agx_context *ctx, agx_block *block, agx_instr *I)
1058bf215546Sopenharmony_ci{
1059bf215546Sopenharmony_ci   nir_phi_instr *phi = I->phi;
1060bf215546Sopenharmony_ci
1061bf215546Sopenharmony_ci   /* Guaranteed by lower_phis_to_scalar */
1062bf215546Sopenharmony_ci   assert(phi->dest.ssa.num_components == 1);
1063bf215546Sopenharmony_ci
1064bf215546Sopenharmony_ci   I->nr_srcs = exec_list_length(&phi->srcs);
1065bf215546Sopenharmony_ci   I->src = rzalloc_array(I, agx_index, I->nr_srcs);
1066bf215546Sopenharmony_ci
1067bf215546Sopenharmony_ci   nir_foreach_phi_src(src, phi) {
1068bf215546Sopenharmony_ci      agx_block *pred = agx_from_nir_block(ctx, src->pred);
1069bf215546Sopenharmony_ci      unsigned i = agx_predecessor_index(block, pred);
1070bf215546Sopenharmony_ci      assert(i < I->nr_srcs);
1071bf215546Sopenharmony_ci
1072bf215546Sopenharmony_ci      I->src[i] = agx_src_index(&src->src);
1073bf215546Sopenharmony_ci   }
1074bf215546Sopenharmony_ci}
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_cistatic void
1077bf215546Sopenharmony_ciagx_emit_phis_deferred(agx_context *ctx)
1078bf215546Sopenharmony_ci{
1079bf215546Sopenharmony_ci   agx_foreach_block(ctx, block) {
1080bf215546Sopenharmony_ci      agx_foreach_instr_in_block(block, I) {
1081bf215546Sopenharmony_ci         if (I->op == AGX_OPCODE_PHI)
1082bf215546Sopenharmony_ci            agx_emit_phi_deferred(ctx, block, I);
1083bf215546Sopenharmony_ci      }
1084bf215546Sopenharmony_ci   }
1085bf215546Sopenharmony_ci}
1086bf215546Sopenharmony_ci
1087bf215546Sopenharmony_cistatic void
1088bf215546Sopenharmony_ciagx_emit_instr(agx_builder *b, struct nir_instr *instr)
1089bf215546Sopenharmony_ci{
1090bf215546Sopenharmony_ci   switch (instr->type) {
1091bf215546Sopenharmony_ci   case nir_instr_type_load_const:
1092bf215546Sopenharmony_ci      agx_emit_load_const(b, nir_instr_as_load_const(instr));
1093bf215546Sopenharmony_ci      break;
1094bf215546Sopenharmony_ci
1095bf215546Sopenharmony_ci   case nir_instr_type_intrinsic:
1096bf215546Sopenharmony_ci      agx_emit_intrinsic(b, nir_instr_as_intrinsic(instr));
1097bf215546Sopenharmony_ci      break;
1098bf215546Sopenharmony_ci
1099bf215546Sopenharmony_ci   case nir_instr_type_alu:
1100bf215546Sopenharmony_ci      agx_emit_alu(b, nir_instr_as_alu(instr));
1101bf215546Sopenharmony_ci      break;
1102bf215546Sopenharmony_ci
1103bf215546Sopenharmony_ci   case nir_instr_type_tex:
1104bf215546Sopenharmony_ci      agx_emit_tex(b, nir_instr_as_tex(instr));
1105bf215546Sopenharmony_ci      break;
1106bf215546Sopenharmony_ci
1107bf215546Sopenharmony_ci   case nir_instr_type_jump:
1108bf215546Sopenharmony_ci      agx_emit_jump(b, nir_instr_as_jump(instr));
1109bf215546Sopenharmony_ci      break;
1110bf215546Sopenharmony_ci
1111bf215546Sopenharmony_ci   case nir_instr_type_phi:
1112bf215546Sopenharmony_ci      agx_emit_phi(b, nir_instr_as_phi(instr));
1113bf215546Sopenharmony_ci      break;
1114bf215546Sopenharmony_ci
1115bf215546Sopenharmony_ci   default:
1116bf215546Sopenharmony_ci      unreachable("should've been lowered");
1117bf215546Sopenharmony_ci   }
1118bf215546Sopenharmony_ci}
1119bf215546Sopenharmony_ci
1120bf215546Sopenharmony_cistatic agx_block *
1121bf215546Sopenharmony_ciagx_create_block(agx_context *ctx)
1122bf215546Sopenharmony_ci{
1123bf215546Sopenharmony_ci   agx_block *blk = rzalloc(ctx, agx_block);
1124bf215546Sopenharmony_ci
1125bf215546Sopenharmony_ci   util_dynarray_init(&blk->predecessors, blk);
1126bf215546Sopenharmony_ci
1127bf215546Sopenharmony_ci   return blk;
1128bf215546Sopenharmony_ci}
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_cistatic agx_block *
1131bf215546Sopenharmony_ciemit_block(agx_context *ctx, nir_block *block)
1132bf215546Sopenharmony_ci{
1133bf215546Sopenharmony_ci   if (ctx->after_block) {
1134bf215546Sopenharmony_ci      ctx->current_block = ctx->after_block;
1135bf215546Sopenharmony_ci      ctx->after_block = NULL;
1136bf215546Sopenharmony_ci   } else {
1137bf215546Sopenharmony_ci      ctx->current_block = agx_create_block(ctx);
1138bf215546Sopenharmony_ci   }
1139bf215546Sopenharmony_ci
1140bf215546Sopenharmony_ci   agx_block *blk = ctx->current_block;
1141bf215546Sopenharmony_ci   list_addtail(&blk->link, &ctx->blocks);
1142bf215546Sopenharmony_ci   list_inithead(&blk->instructions);
1143bf215546Sopenharmony_ci
1144bf215546Sopenharmony_ci   ctx->indexed_nir_blocks[block->index] = blk;
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci   agx_builder _b = agx_init_builder(ctx, agx_after_block(blk));
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci   nir_foreach_instr(instr, block) {
1149bf215546Sopenharmony_ci      agx_emit_instr(&_b, instr);
1150bf215546Sopenharmony_ci   }
1151bf215546Sopenharmony_ci
1152bf215546Sopenharmony_ci   return blk;
1153bf215546Sopenharmony_ci}
1154bf215546Sopenharmony_ci
1155bf215546Sopenharmony_cistatic agx_block *
1156bf215546Sopenharmony_ciemit_cf_list(agx_context *ctx, struct exec_list *list);
1157bf215546Sopenharmony_ci
1158bf215546Sopenharmony_ci/* Emit if-else as
1159bf215546Sopenharmony_ci *
1160bf215546Sopenharmony_ci *    if_icmp cond != 0
1161bf215546Sopenharmony_ci *       ...
1162bf215546Sopenharmony_ci *    else_icmp cond == 0
1163bf215546Sopenharmony_ci *       ...
1164bf215546Sopenharmony_ci *    pop_exec
1165bf215546Sopenharmony_ci *
1166bf215546Sopenharmony_ci * If the else is empty, we can omit the else_icmp. This happens elsewhere, as
1167bf215546Sopenharmony_ci * an empty else block can become nonempty after RA due to phi lowering. This is
1168bf215546Sopenharmony_ci * not usually optimal, but it's a start.
1169bf215546Sopenharmony_ci */
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_cistatic void
1172bf215546Sopenharmony_ciemit_if(agx_context *ctx, nir_if *nif)
1173bf215546Sopenharmony_ci{
1174bf215546Sopenharmony_ci   agx_block *first_block = ctx->current_block;
1175bf215546Sopenharmony_ci   agx_builder _b = agx_init_builder(ctx, agx_after_block(first_block));
1176bf215546Sopenharmony_ci   agx_index cond = agx_src_index(&nif->condition);
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_ci   agx_emit_logical_end(&_b);
1179bf215546Sopenharmony_ci   agx_if_icmp(&_b, cond, agx_zero(), 1, AGX_ICOND_UEQ, true);
1180bf215546Sopenharmony_ci   ctx->loop_nesting++;
1181bf215546Sopenharmony_ci
1182bf215546Sopenharmony_ci   /* Emit the two subblocks. */
1183bf215546Sopenharmony_ci   agx_block *if_block = emit_cf_list(ctx, &nif->then_list);
1184bf215546Sopenharmony_ci   agx_block *end_then = ctx->current_block;
1185bf215546Sopenharmony_ci
1186bf215546Sopenharmony_ci   _b.cursor = agx_after_block(ctx->current_block);
1187bf215546Sopenharmony_ci   agx_emit_logical_end(&_b);
1188bf215546Sopenharmony_ci   agx_else_icmp(&_b, cond, agx_zero(), 1, AGX_ICOND_UEQ, false);
1189bf215546Sopenharmony_ci
1190bf215546Sopenharmony_ci   agx_block *else_block = emit_cf_list(ctx, &nif->else_list);
1191bf215546Sopenharmony_ci   agx_block *end_else = ctx->current_block;
1192bf215546Sopenharmony_ci
1193bf215546Sopenharmony_ci   ctx->after_block = agx_create_block(ctx);
1194bf215546Sopenharmony_ci
1195bf215546Sopenharmony_ci   agx_block_add_successor(first_block, if_block);
1196bf215546Sopenharmony_ci   agx_block_add_successor(first_block, else_block);
1197bf215546Sopenharmony_ci   agx_block_add_successor(end_then, ctx->after_block);
1198bf215546Sopenharmony_ci   agx_block_add_successor(end_else, ctx->after_block);
1199bf215546Sopenharmony_ci
1200bf215546Sopenharmony_ci   _b.cursor = agx_after_block(ctx->current_block);
1201bf215546Sopenharmony_ci   agx_emit_logical_end(&_b);
1202bf215546Sopenharmony_ci   agx_pop_exec(&_b, 1);
1203bf215546Sopenharmony_ci   ctx->loop_nesting--;
1204bf215546Sopenharmony_ci}
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_cistatic void
1207bf215546Sopenharmony_ciemit_loop(agx_context *ctx, nir_loop *nloop)
1208bf215546Sopenharmony_ci{
1209bf215546Sopenharmony_ci   /* We only track nesting within the innermost loop, so push and reset */
1210bf215546Sopenharmony_ci   unsigned pushed_nesting = ctx->loop_nesting;
1211bf215546Sopenharmony_ci   ctx->loop_nesting = 0;
1212bf215546Sopenharmony_ci
1213bf215546Sopenharmony_ci   agx_block *popped_break = ctx->break_block;
1214bf215546Sopenharmony_ci   agx_block *popped_continue = ctx->continue_block;
1215bf215546Sopenharmony_ci
1216bf215546Sopenharmony_ci   ctx->break_block = agx_create_block(ctx);
1217bf215546Sopenharmony_ci   ctx->continue_block = agx_create_block(ctx);
1218bf215546Sopenharmony_ci
1219bf215546Sopenharmony_ci   /* Make room for break/continue nesting (TODO: skip if no divergent CF) */
1220bf215546Sopenharmony_ci   agx_builder _b = agx_init_builder(ctx, agx_after_block(ctx->current_block));
1221bf215546Sopenharmony_ci   agx_emit_logical_end(&_b);
1222bf215546Sopenharmony_ci   agx_push_exec(&_b, 2);
1223bf215546Sopenharmony_ci
1224bf215546Sopenharmony_ci   /* Fallthrough to body */
1225bf215546Sopenharmony_ci   agx_block_add_successor(ctx->current_block, ctx->continue_block);
1226bf215546Sopenharmony_ci
1227bf215546Sopenharmony_ci   /* Emit the body */
1228bf215546Sopenharmony_ci   ctx->after_block = ctx->continue_block;
1229bf215546Sopenharmony_ci   agx_block *start_block = emit_cf_list(ctx, &nloop->body);
1230bf215546Sopenharmony_ci
1231bf215546Sopenharmony_ci   /* Fix up the nesting counter via an always true while_icmp, and branch back
1232bf215546Sopenharmony_ci    * to start of loop if any lanes are active */
1233bf215546Sopenharmony_ci   _b.cursor = agx_after_block(ctx->current_block);
1234bf215546Sopenharmony_ci   agx_emit_logical_end(&_b);
1235bf215546Sopenharmony_ci   agx_while_icmp(&_b, agx_zero(), agx_zero(), 2, AGX_ICOND_UEQ, false);
1236bf215546Sopenharmony_ci   agx_jmp_exec_any(&_b, start_block);
1237bf215546Sopenharmony_ci   agx_pop_exec(&_b, 2);
1238bf215546Sopenharmony_ci   agx_block_add_successor(ctx->current_block, ctx->continue_block);
1239bf215546Sopenharmony_ci
1240bf215546Sopenharmony_ci   /* Pop off */
1241bf215546Sopenharmony_ci   ctx->after_block = ctx->break_block;
1242bf215546Sopenharmony_ci   ctx->break_block = popped_break;
1243bf215546Sopenharmony_ci   ctx->continue_block = popped_continue;
1244bf215546Sopenharmony_ci
1245bf215546Sopenharmony_ci   /* Update shader-db stats */
1246bf215546Sopenharmony_ci   ++ctx->loop_count;
1247bf215546Sopenharmony_ci
1248bf215546Sopenharmony_ci   /* All nested control flow must have finished */
1249bf215546Sopenharmony_ci   assert(ctx->loop_nesting == 0);
1250bf215546Sopenharmony_ci
1251bf215546Sopenharmony_ci   /* Restore loop nesting (we might be inside an if inside an outer loop) */
1252bf215546Sopenharmony_ci   ctx->loop_nesting = pushed_nesting;
1253bf215546Sopenharmony_ci}
1254bf215546Sopenharmony_ci
1255bf215546Sopenharmony_ci/* Before the first control flow structure, the nesting counter (r0l) needs to
1256bf215546Sopenharmony_ci * be zeroed for correct operation. This only happens at most once, since by
1257bf215546Sopenharmony_ci * definition this occurs at the end of the first block, which dominates the
1258bf215546Sopenharmony_ci * rest of the program. */
1259bf215546Sopenharmony_ci
1260bf215546Sopenharmony_cistatic void
1261bf215546Sopenharmony_ciemit_first_cf(agx_context *ctx)
1262bf215546Sopenharmony_ci{
1263bf215546Sopenharmony_ci   if (ctx->any_cf)
1264bf215546Sopenharmony_ci      return;
1265bf215546Sopenharmony_ci
1266bf215546Sopenharmony_ci   agx_builder _b = agx_init_builder(ctx, agx_after_block(ctx->current_block));
1267bf215546Sopenharmony_ci   agx_index r0l = agx_register(0, false);
1268bf215546Sopenharmony_ci
1269bf215546Sopenharmony_ci   agx_mov_to(&_b, r0l, agx_immediate(0));
1270bf215546Sopenharmony_ci   ctx->any_cf = true;
1271bf215546Sopenharmony_ci}
1272bf215546Sopenharmony_ci
1273bf215546Sopenharmony_cistatic agx_block *
1274bf215546Sopenharmony_ciemit_cf_list(agx_context *ctx, struct exec_list *list)
1275bf215546Sopenharmony_ci{
1276bf215546Sopenharmony_ci   agx_block *start_block = NULL;
1277bf215546Sopenharmony_ci
1278bf215546Sopenharmony_ci   foreach_list_typed(nir_cf_node, node, node, list) {
1279bf215546Sopenharmony_ci      switch (node->type) {
1280bf215546Sopenharmony_ci      case nir_cf_node_block: {
1281bf215546Sopenharmony_ci         agx_block *block = emit_block(ctx, nir_cf_node_as_block(node));
1282bf215546Sopenharmony_ci
1283bf215546Sopenharmony_ci         if (!start_block)
1284bf215546Sopenharmony_ci            start_block = block;
1285bf215546Sopenharmony_ci
1286bf215546Sopenharmony_ci         break;
1287bf215546Sopenharmony_ci      }
1288bf215546Sopenharmony_ci
1289bf215546Sopenharmony_ci      case nir_cf_node_if:
1290bf215546Sopenharmony_ci         emit_first_cf(ctx);
1291bf215546Sopenharmony_ci         emit_if(ctx, nir_cf_node_as_if(node));
1292bf215546Sopenharmony_ci         break;
1293bf215546Sopenharmony_ci
1294bf215546Sopenharmony_ci      case nir_cf_node_loop:
1295bf215546Sopenharmony_ci         emit_first_cf(ctx);
1296bf215546Sopenharmony_ci         emit_loop(ctx, nir_cf_node_as_loop(node));
1297bf215546Sopenharmony_ci         break;
1298bf215546Sopenharmony_ci
1299bf215546Sopenharmony_ci      default:
1300bf215546Sopenharmony_ci         unreachable("Unknown control flow");
1301bf215546Sopenharmony_ci      }
1302bf215546Sopenharmony_ci   }
1303bf215546Sopenharmony_ci
1304bf215546Sopenharmony_ci   return start_block;
1305bf215546Sopenharmony_ci}
1306bf215546Sopenharmony_ci
1307bf215546Sopenharmony_cistatic void
1308bf215546Sopenharmony_ciagx_set_st_vary_final(agx_context *ctx)
1309bf215546Sopenharmony_ci{
1310bf215546Sopenharmony_ci   agx_foreach_instr_global_rev(ctx, I) {
1311bf215546Sopenharmony_ci      if (I->op == AGX_OPCODE_ST_VARY) {
1312bf215546Sopenharmony_ci         I->last = true;
1313bf215546Sopenharmony_ci         return;
1314bf215546Sopenharmony_ci      }
1315bf215546Sopenharmony_ci   }
1316bf215546Sopenharmony_ci}
1317bf215546Sopenharmony_ci
1318bf215546Sopenharmony_cistatic void
1319bf215546Sopenharmony_ciagx_print_stats(agx_context *ctx, unsigned size, FILE *fp)
1320bf215546Sopenharmony_ci{
1321bf215546Sopenharmony_ci   unsigned nr_ins = 0, max_reg = 0;
1322bf215546Sopenharmony_ci
1323bf215546Sopenharmony_ci   agx_foreach_instr_global(ctx, I) {
1324bf215546Sopenharmony_ci      /* Count instructions */
1325bf215546Sopenharmony_ci      nr_ins++;
1326bf215546Sopenharmony_ci
1327bf215546Sopenharmony_ci      /* Count registers */
1328bf215546Sopenharmony_ci      agx_foreach_dest(I, d) {
1329bf215546Sopenharmony_ci         if (I->dest[d].type == AGX_INDEX_REGISTER) {
1330bf215546Sopenharmony_ci            max_reg = MAX2(max_reg,
1331bf215546Sopenharmony_ci                           I->dest[d].value + agx_write_registers(I, d) - 1);
1332bf215546Sopenharmony_ci         }
1333bf215546Sopenharmony_ci      }
1334bf215546Sopenharmony_ci   }
1335bf215546Sopenharmony_ci
1336bf215546Sopenharmony_ci   /* TODO: Pipe through occupancy */
1337bf215546Sopenharmony_ci   unsigned nr_threads = 1;
1338bf215546Sopenharmony_ci
1339bf215546Sopenharmony_ci   fprintf(stderr, "%s - %s shader: %u inst, %u bytes, %u halfregs, %u threads, "
1340bf215546Sopenharmony_ci           "%u loops, %u:%u spills:fills\n",
1341bf215546Sopenharmony_ci           ctx->nir->info.label ?: "",
1342bf215546Sopenharmony_ci           gl_shader_stage_name(ctx->stage),
1343bf215546Sopenharmony_ci           nr_ins, size, max_reg, nr_threads, ctx->loop_count,
1344bf215546Sopenharmony_ci           ctx->spills, ctx->fills);
1345bf215546Sopenharmony_ci}
1346bf215546Sopenharmony_ci
1347bf215546Sopenharmony_cistatic int
1348bf215546Sopenharmony_ciglsl_type_size(const struct glsl_type *type, bool bindless)
1349bf215546Sopenharmony_ci{
1350bf215546Sopenharmony_ci   return glsl_count_attribute_slots(type, false);
1351bf215546Sopenharmony_ci}
1352bf215546Sopenharmony_ci
1353bf215546Sopenharmony_cistatic bool
1354bf215546Sopenharmony_ciagx_lower_sincos_filter(const nir_instr *instr, UNUSED const void *_)
1355bf215546Sopenharmony_ci{
1356bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_alu)
1357bf215546Sopenharmony_ci      return false;
1358bf215546Sopenharmony_ci
1359bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_instr_as_alu(instr);
1360bf215546Sopenharmony_ci   return alu->op == nir_op_fsin || alu->op == nir_op_fcos;
1361bf215546Sopenharmony_ci}
1362bf215546Sopenharmony_ci
1363bf215546Sopenharmony_ci/* Sine and cosine are implemented via the sin_pt_1 and sin_pt_2 opcodes for
1364bf215546Sopenharmony_ci * heavy lifting. sin_pt_2 implements sinc in the first quadrant, expressed in
1365bf215546Sopenharmony_ci * turns (sin (tau x) / x), while sin_pt_1 implements a piecewise sign/offset
1366bf215546Sopenharmony_ci * fixup to transform a quadrant angle [0, 4] to [-1, 1]. The NIR opcode
1367bf215546Sopenharmony_ci * fsin_agx models the fixup, sinc, and multiply to obtain sine, so we just
1368bf215546Sopenharmony_ci * need to change units from radians to quadrants modulo turns. Cosine is
1369bf215546Sopenharmony_ci * implemented by shifting by one quadrant: cos(x) = sin(x + tau/4).
1370bf215546Sopenharmony_ci */
1371bf215546Sopenharmony_ci
1372bf215546Sopenharmony_cistatic nir_ssa_def *
1373bf215546Sopenharmony_ciagx_lower_sincos_impl(struct nir_builder *b, nir_instr *instr, UNUSED void *_)
1374bf215546Sopenharmony_ci{
1375bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_instr_as_alu(instr);
1376bf215546Sopenharmony_ci   nir_ssa_def *x = nir_mov_alu(b, alu->src[0], 1);
1377bf215546Sopenharmony_ci   nir_ssa_def *turns = nir_fmul_imm(b, x, M_1_PI * 0.5f);
1378bf215546Sopenharmony_ci
1379bf215546Sopenharmony_ci   if (alu->op == nir_op_fcos)
1380bf215546Sopenharmony_ci      turns = nir_fadd_imm(b, turns, 0.25f);
1381bf215546Sopenharmony_ci
1382bf215546Sopenharmony_ci   nir_ssa_def *quadrants = nir_fmul_imm(b, nir_ffract(b, turns), 4.0);
1383bf215546Sopenharmony_ci   return nir_fsin_agx(b, quadrants);
1384bf215546Sopenharmony_ci}
1385bf215546Sopenharmony_ci
1386bf215546Sopenharmony_cistatic bool
1387bf215546Sopenharmony_ciagx_lower_sincos(nir_shader *shader)
1388bf215546Sopenharmony_ci{
1389bf215546Sopenharmony_ci   return nir_shader_lower_instructions(shader,
1390bf215546Sopenharmony_ci         agx_lower_sincos_filter, agx_lower_sincos_impl, NULL);
1391bf215546Sopenharmony_ci}
1392bf215546Sopenharmony_ci
1393bf215546Sopenharmony_cistatic bool
1394bf215546Sopenharmony_ciagx_lower_front_face(struct nir_builder *b,
1395bf215546Sopenharmony_ci                     nir_instr *instr, UNUSED void *data)
1396bf215546Sopenharmony_ci{
1397bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
1398bf215546Sopenharmony_ci      return false;
1399bf215546Sopenharmony_ci
1400bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1401bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_front_face)
1402bf215546Sopenharmony_ci      return false;
1403bf215546Sopenharmony_ci
1404bf215546Sopenharmony_ci   assert(intr->dest.is_ssa);
1405bf215546Sopenharmony_ci   nir_ssa_def *def = &intr->dest.ssa;
1406bf215546Sopenharmony_ci   assert(def->bit_size == 1);
1407bf215546Sopenharmony_ci
1408bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
1409bf215546Sopenharmony_ci   nir_ssa_def_rewrite_uses(def, nir_inot(b, nir_load_back_face_agx(b, 1)));
1410bf215546Sopenharmony_ci   return true;
1411bf215546Sopenharmony_ci}
1412bf215546Sopenharmony_ci
1413bf215546Sopenharmony_cistatic bool
1414bf215546Sopenharmony_ciagx_lower_aligned_offsets(struct nir_builder *b,
1415bf215546Sopenharmony_ci                          nir_instr *instr, UNUSED void *data)
1416bf215546Sopenharmony_ci{
1417bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_intrinsic)
1418bf215546Sopenharmony_ci      return false;
1419bf215546Sopenharmony_ci
1420bf215546Sopenharmony_ci   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1421bf215546Sopenharmony_ci   if (intr->intrinsic != nir_intrinsic_load_ubo)
1422bf215546Sopenharmony_ci      return false;
1423bf215546Sopenharmony_ci
1424bf215546Sopenharmony_ci   b->cursor = nir_before_instr(&intr->instr);
1425bf215546Sopenharmony_ci
1426bf215546Sopenharmony_ci   unsigned bytes = nir_dest_bit_size(intr->dest) / 8;
1427bf215546Sopenharmony_ci   assert(util_is_power_of_two_or_zero(bytes) && bytes != 0);
1428bf215546Sopenharmony_ci
1429bf215546Sopenharmony_ci   nir_src *offset = &intr->src[1];
1430bf215546Sopenharmony_ci
1431bf215546Sopenharmony_ci   unsigned shift = util_logbase2(bytes);
1432bf215546Sopenharmony_ci
1433bf215546Sopenharmony_ci   nir_ssa_def *old = nir_ssa_for_src(b, *offset, 1);
1434bf215546Sopenharmony_ci   nir_ssa_def *new = nir_ishr_imm(b, old, shift);
1435bf215546Sopenharmony_ci
1436bf215546Sopenharmony_ci   nir_instr_rewrite_src_ssa(instr, offset, new);
1437bf215546Sopenharmony_ci   return true;
1438bf215546Sopenharmony_ci}
1439bf215546Sopenharmony_ci
1440bf215546Sopenharmony_cistatic void
1441bf215546Sopenharmony_ciagx_optimize_nir(nir_shader *nir)
1442bf215546Sopenharmony_ci{
1443bf215546Sopenharmony_ci   bool progress;
1444bf215546Sopenharmony_ci
1445bf215546Sopenharmony_ci   nir_lower_idiv_options idiv_options = {
1446bf215546Sopenharmony_ci      .imprecise_32bit_lowering = true,
1447bf215546Sopenharmony_ci      .allow_fp16 = true,
1448bf215546Sopenharmony_ci   };
1449bf215546Sopenharmony_ci
1450bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1451bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_int64);
1452bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_idiv, &idiv_options);
1453bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1454bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
1455bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false);
1456bf215546Sopenharmony_ci   NIR_PASS_V(nir, agx_lower_sincos);
1457bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_shader_instructions_pass,
1458bf215546Sopenharmony_ci         agx_lower_front_face,
1459bf215546Sopenharmony_ci         nir_metadata_block_index | nir_metadata_dominance, NULL);
1460bf215546Sopenharmony_ci
1461bf215546Sopenharmony_ci   do {
1462bf215546Sopenharmony_ci      progress = false;
1463bf215546Sopenharmony_ci
1464bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_lower_var_copies);
1465bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
1466bf215546Sopenharmony_ci
1467bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_copy_prop);
1468bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_remove_phis);
1469bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
1470bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_dce);
1471bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_dead_cf);
1472bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_cse);
1473bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
1474bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_algebraic);
1475bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_constant_folding);
1476bf215546Sopenharmony_ci
1477bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_undef);
1478bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_lower_undef_to_zero);
1479bf215546Sopenharmony_ci
1480bf215546Sopenharmony_ci      NIR_PASS(progress, nir, nir_opt_loop_unroll);
1481bf215546Sopenharmony_ci   } while (progress);
1482bf215546Sopenharmony_ci
1483bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_algebraic_late);
1484bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_constant_folding);
1485bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_copy_prop);
1486bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_dce);
1487bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_cse);
1488bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
1489bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
1490bf215546Sopenharmony_ci
1491bf215546Sopenharmony_ci   /* Cleanup optimizations */
1492bf215546Sopenharmony_ci   nir_move_options move_all =
1493bf215546Sopenharmony_ci      nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
1494bf215546Sopenharmony_ci      nir_move_comparisons | nir_move_copies | nir_move_load_ssbo;
1495bf215546Sopenharmony_ci
1496bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_sink, move_all);
1497bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_opt_move, move_all);
1498bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_phis_to_scalar, true);
1499bf215546Sopenharmony_ci}
1500bf215546Sopenharmony_ci
1501bf215546Sopenharmony_ci/* ABI: position first, then user, then psiz */
1502bf215546Sopenharmony_cistatic void
1503bf215546Sopenharmony_ciagx_remap_varyings_vs(nir_shader *nir, struct agx_varyings *varyings,
1504bf215546Sopenharmony_ci                      unsigned *remap)
1505bf215546Sopenharmony_ci{
1506bf215546Sopenharmony_ci   unsigned base = 0;
1507bf215546Sopenharmony_ci
1508bf215546Sopenharmony_ci   nir_variable *pos = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_POS);
1509bf215546Sopenharmony_ci   if (pos) {
1510bf215546Sopenharmony_ci      assert(pos->data.driver_location < AGX_MAX_VARYINGS);
1511bf215546Sopenharmony_ci      remap[pos->data.driver_location] = base;
1512bf215546Sopenharmony_ci      base += 4;
1513bf215546Sopenharmony_ci   }
1514bf215546Sopenharmony_ci
1515bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(var, nir) {
1516bf215546Sopenharmony_ci      unsigned loc = var->data.location;
1517bf215546Sopenharmony_ci
1518bf215546Sopenharmony_ci      if(loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ) {
1519bf215546Sopenharmony_ci         continue;
1520bf215546Sopenharmony_ci      }
1521bf215546Sopenharmony_ci
1522bf215546Sopenharmony_ci      assert(var->data.driver_location < AGX_MAX_VARYINGS);
1523bf215546Sopenharmony_ci      remap[var->data.driver_location] = base;
1524bf215546Sopenharmony_ci      base += 4;
1525bf215546Sopenharmony_ci   }
1526bf215546Sopenharmony_ci
1527bf215546Sopenharmony_ci   nir_variable *psiz = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_PSIZ);
1528bf215546Sopenharmony_ci   if (psiz) {
1529bf215546Sopenharmony_ci      assert(psiz->data.driver_location < AGX_MAX_VARYINGS);
1530bf215546Sopenharmony_ci      remap[psiz->data.driver_location] = base;
1531bf215546Sopenharmony_ci      base += 1;
1532bf215546Sopenharmony_ci   }
1533bf215546Sopenharmony_ci
1534bf215546Sopenharmony_ci   varyings->nr_slots = base;
1535bf215546Sopenharmony_ci}
1536bf215546Sopenharmony_ci
1537bf215546Sopenharmony_cistatic void
1538bf215546Sopenharmony_ciagx_remap_varyings_fs(nir_shader *nir, struct agx_varyings *varyings,
1539bf215546Sopenharmony_ci                      unsigned *remap)
1540bf215546Sopenharmony_ci{
1541bf215546Sopenharmony_ci   struct agx_varying_packed *packed = varyings->packed;
1542bf215546Sopenharmony_ci   unsigned base = 0;
1543bf215546Sopenharmony_ci
1544bf215546Sopenharmony_ci   agx_pack(packed, VARYING, cfg) {
1545bf215546Sopenharmony_ci      cfg.type = AGX_VARYING_TYPE_FRAGCOORD_W;
1546bf215546Sopenharmony_ci      cfg.components = 1;
1547bf215546Sopenharmony_ci      cfg.triangle_slot = cfg.point_slot = base;
1548bf215546Sopenharmony_ci   }
1549bf215546Sopenharmony_ci
1550bf215546Sopenharmony_ci   base++;
1551bf215546Sopenharmony_ci   packed++;
1552bf215546Sopenharmony_ci
1553bf215546Sopenharmony_ci   agx_pack(packed, VARYING, cfg) {
1554bf215546Sopenharmony_ci      cfg.type = AGX_VARYING_TYPE_FRAGCOORD_Z;
1555bf215546Sopenharmony_ci      cfg.components = 1;
1556bf215546Sopenharmony_ci      cfg.triangle_slot = cfg.point_slot = base;
1557bf215546Sopenharmony_ci   }
1558bf215546Sopenharmony_ci
1559bf215546Sopenharmony_ci   base++;
1560bf215546Sopenharmony_ci   packed++;
1561bf215546Sopenharmony_ci
1562bf215546Sopenharmony_ci   unsigned comps[MAX_VARYING] = { 0 };
1563bf215546Sopenharmony_ci
1564bf215546Sopenharmony_ci   nir_foreach_shader_in_variable(var, nir) {
1565bf215546Sopenharmony_ci     unsigned loc = var->data.driver_location;
1566bf215546Sopenharmony_ci     const struct glsl_type *column =
1567bf215546Sopenharmony_ci        glsl_without_array_or_matrix(var->type);
1568bf215546Sopenharmony_ci     unsigned chan = glsl_get_components(column);
1569bf215546Sopenharmony_ci
1570bf215546Sopenharmony_ci     /* If we have a fractional location added, we need to increase the size
1571bf215546Sopenharmony_ci      * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.
1572bf215546Sopenharmony_ci      * We could do better but this is an edge case as it is, normally
1573bf215546Sopenharmony_ci      * packed varyings will be aligned.
1574bf215546Sopenharmony_ci      */
1575bf215546Sopenharmony_ci     chan += var->data.location_frac;
1576bf215546Sopenharmony_ci     comps[loc] = MAX2(comps[loc], chan);
1577bf215546Sopenharmony_ci   }
1578bf215546Sopenharmony_ci
1579bf215546Sopenharmony_ci   nir_foreach_shader_in_variable(var, nir) {
1580bf215546Sopenharmony_ci     unsigned loc = var->data.driver_location;
1581bf215546Sopenharmony_ci     unsigned sz = glsl_count_attribute_slots(var->type, FALSE);
1582bf215546Sopenharmony_ci     unsigned channels = comps[loc];
1583bf215546Sopenharmony_ci
1584bf215546Sopenharmony_ci     assert(var->data.driver_location <= AGX_MAX_VARYINGS);
1585bf215546Sopenharmony_ci     remap[var->data.driver_location] = base;
1586bf215546Sopenharmony_ci
1587bf215546Sopenharmony_ci     for (int c = 0; c < sz; ++c) {
1588bf215546Sopenharmony_ci        agx_pack(packed, VARYING, cfg) {
1589bf215546Sopenharmony_ci           cfg.type = (var->data.location == VARYING_SLOT_PNTC) ?
1590bf215546Sopenharmony_ci              AGX_VARYING_TYPE_POINT_COORDINATES :
1591bf215546Sopenharmony_ci              (var->data.interpolation == INTERP_MODE_FLAT) ?
1592bf215546Sopenharmony_ci                 AGX_VARYING_TYPE_FLAT_LAST :
1593bf215546Sopenharmony_ci                 AGX_VARYING_TYPE_SMOOTH;
1594bf215546Sopenharmony_ci
1595bf215546Sopenharmony_ci           cfg.components = channels;
1596bf215546Sopenharmony_ci           cfg.triangle_slot = cfg.point_slot = base;
1597bf215546Sopenharmony_ci        }
1598bf215546Sopenharmony_ci
1599bf215546Sopenharmony_ci        base += channels;
1600bf215546Sopenharmony_ci        packed++;
1601bf215546Sopenharmony_ci     }
1602bf215546Sopenharmony_ci   }
1603bf215546Sopenharmony_ci
1604bf215546Sopenharmony_ci   varyings->nr_descs = (packed - varyings->packed);
1605bf215546Sopenharmony_ci   varyings->nr_slots = base;
1606bf215546Sopenharmony_ci}
1607bf215546Sopenharmony_ci
1608bf215546Sopenharmony_ci/*
1609bf215546Sopenharmony_ci * Build a bit mask of varyings (by location) that are flatshaded. This
1610bf215546Sopenharmony_ci * information is needed by lower_mediump_io.
1611bf215546Sopenharmony_ci */
1612bf215546Sopenharmony_cistatic uint64_t
1613bf215546Sopenharmony_ciagx_flat_varying_mask(nir_shader *nir)
1614bf215546Sopenharmony_ci{
1615bf215546Sopenharmony_ci   uint64_t mask = 0;
1616bf215546Sopenharmony_ci
1617bf215546Sopenharmony_ci   assert(nir->info.stage == MESA_SHADER_FRAGMENT);
1618bf215546Sopenharmony_ci
1619bf215546Sopenharmony_ci   nir_foreach_shader_in_variable(var, nir) {
1620bf215546Sopenharmony_ci      if (var->data.interpolation == INTERP_MODE_FLAT)
1621bf215546Sopenharmony_ci         mask |= BITFIELD64_BIT(var->data.location);
1622bf215546Sopenharmony_ci   }
1623bf215546Sopenharmony_ci
1624bf215546Sopenharmony_ci   return mask;
1625bf215546Sopenharmony_ci}
1626bf215546Sopenharmony_ci
1627bf215546Sopenharmony_civoid
1628bf215546Sopenharmony_ciagx_compile_shader_nir(nir_shader *nir,
1629bf215546Sopenharmony_ci      struct agx_shader_key *key,
1630bf215546Sopenharmony_ci      struct util_dynarray *binary,
1631bf215546Sopenharmony_ci      struct agx_shader_info *out)
1632bf215546Sopenharmony_ci{
1633bf215546Sopenharmony_ci   agx_debug = debug_get_option_agx_debug();
1634bf215546Sopenharmony_ci
1635bf215546Sopenharmony_ci   agx_context *ctx = rzalloc(NULL, agx_context);
1636bf215546Sopenharmony_ci   ctx->nir = nir;
1637bf215546Sopenharmony_ci   ctx->out = out;
1638bf215546Sopenharmony_ci   ctx->key = key;
1639bf215546Sopenharmony_ci   ctx->stage = nir->info.stage;
1640bf215546Sopenharmony_ci   list_inithead(&ctx->blocks);
1641bf215546Sopenharmony_ci
1642bf215546Sopenharmony_ci   if (ctx->stage == MESA_SHADER_VERTEX) {
1643bf215546Sopenharmony_ci      out->writes_psiz = nir->info.outputs_written &
1644bf215546Sopenharmony_ci         BITFIELD_BIT(VARYING_SLOT_PSIZ);
1645bf215546Sopenharmony_ci   }
1646bf215546Sopenharmony_ci
1647bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1648bf215546Sopenharmony_ci
1649bf215546Sopenharmony_ci   /* Lower large arrays to scratch and small arrays to csel */
1650bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16,
1651bf215546Sopenharmony_ci         glsl_get_natural_size_align_bytes);
1652bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0);
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci   if (ctx->stage == MESA_SHADER_VERTEX) {
1655bf215546Sopenharmony_ci      /* Lower from OpenGL [-1, 1] to [0, 1] if half-z is not set */
1656bf215546Sopenharmony_ci      if (!key->vs.clip_halfz)
1657bf215546Sopenharmony_ci         NIR_PASS_V(nir, nir_lower_clip_halfz);
1658bf215546Sopenharmony_ci   }
1659bf215546Sopenharmony_ci
1660bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_split_var_copies);
1661bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
1662bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_var_copies);
1663bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1664bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
1665bf215546Sopenharmony_ci         glsl_type_size, 0);
1666bf215546Sopenharmony_ci   if (ctx->stage == MESA_SHADER_FRAGMENT) {
1667bf215546Sopenharmony_ci      /* Interpolate varyings at fp16 and write to the tilebuffer at fp16. As an
1668bf215546Sopenharmony_ci       * exception, interpolate flat shaded at fp32. This works around a
1669bf215546Sopenharmony_ci       * hardware limitation. The resulting code (with an extra f2f16 at the end
1670bf215546Sopenharmony_ci       * if needed) matches what Metal produces.
1671bf215546Sopenharmony_ci       */
1672bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_mediump_io,
1673bf215546Sopenharmony_ci            nir_var_shader_in | nir_var_shader_out,
1674bf215546Sopenharmony_ci            ~agx_flat_varying_mask(nir), false);
1675bf215546Sopenharmony_ci   }
1676bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_shader_instructions_pass,
1677bf215546Sopenharmony_ci         agx_lower_aligned_offsets,
1678bf215546Sopenharmony_ci         nir_metadata_block_index | nir_metadata_dominance, NULL);
1679bf215546Sopenharmony_ci
1680bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_ssbo);
1681bf215546Sopenharmony_ci
1682bf215546Sopenharmony_ci   /* Varying output is scalar, other I/O is vector */
1683bf215546Sopenharmony_ci   if (ctx->stage == MESA_SHADER_VERTEX) {
1684bf215546Sopenharmony_ci      NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out);
1685bf215546Sopenharmony_ci   }
1686bf215546Sopenharmony_ci
1687bf215546Sopenharmony_ci   nir_lower_tex_options lower_tex_options = {
1688bf215546Sopenharmony_ci      .lower_txs_lod = true,
1689bf215546Sopenharmony_ci      .lower_txp = ~0,
1690bf215546Sopenharmony_ci      .lower_invalid_implicit_lod = true,
1691bf215546Sopenharmony_ci   };
1692bf215546Sopenharmony_ci
1693bf215546Sopenharmony_ci   nir_tex_src_type_constraints tex_constraints = {
1694bf215546Sopenharmony_ci      [nir_tex_src_lod] = { true, 16 },
1695bf215546Sopenharmony_ci      [nir_tex_src_bias] = { true, 16 },
1696bf215546Sopenharmony_ci   };
1697bf215546Sopenharmony_ci
1698bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
1699bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_legalize_16bit_sampler_srcs, tex_constraints);
1700bf215546Sopenharmony_ci
1701bf215546Sopenharmony_ci   agx_optimize_nir(nir);
1702bf215546Sopenharmony_ci
1703bf215546Sopenharmony_ci   /* Implement conditional discard with real control flow like Metal */
1704bf215546Sopenharmony_ci   NIR_PASS_V(nir, nir_lower_discard_if);
1705bf215546Sopenharmony_ci
1706bf215546Sopenharmony_ci   /* Must be last since NIR passes can remap driver_location freely */
1707bf215546Sopenharmony_ci   if (ctx->stage == MESA_SHADER_VERTEX) {
1708bf215546Sopenharmony_ci      agx_remap_varyings_vs(nir, &out->varyings, ctx->varyings);
1709bf215546Sopenharmony_ci   } else if (ctx->stage == MESA_SHADER_FRAGMENT) {
1710bf215546Sopenharmony_ci      agx_remap_varyings_fs(nir, &out->varyings, ctx->varyings);
1711bf215546Sopenharmony_ci   }
1712bf215546Sopenharmony_ci
1713bf215546Sopenharmony_ci   bool skip_internal = nir->info.internal;
1714bf215546Sopenharmony_ci   skip_internal &= !(agx_debug & AGX_DBG_INTERNAL);
1715bf215546Sopenharmony_ci
1716bf215546Sopenharmony_ci   if (agx_debug & AGX_DBG_SHADERS && !skip_internal) {
1717bf215546Sopenharmony_ci      nir_print_shader(nir, stdout);
1718bf215546Sopenharmony_ci   }
1719bf215546Sopenharmony_ci
1720bf215546Sopenharmony_ci   ctx->allocated_vec = _mesa_hash_table_u64_create(ctx);
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_ci   nir_foreach_function(func, nir) {
1723bf215546Sopenharmony_ci      if (!func->impl)
1724bf215546Sopenharmony_ci         continue;
1725bf215546Sopenharmony_ci
1726bf215546Sopenharmony_ci      nir_index_blocks(func->impl);
1727bf215546Sopenharmony_ci
1728bf215546Sopenharmony_ci      ctx->indexed_nir_blocks =
1729bf215546Sopenharmony_ci         rzalloc_array(ctx, agx_block *, func->impl->num_blocks);
1730bf215546Sopenharmony_ci
1731bf215546Sopenharmony_ci      ctx->alloc += func->impl->ssa_alloc;
1732bf215546Sopenharmony_ci      emit_cf_list(ctx, &func->impl->body);
1733bf215546Sopenharmony_ci      agx_emit_phis_deferred(ctx);
1734bf215546Sopenharmony_ci      break; /* TODO: Multi-function shaders */
1735bf215546Sopenharmony_ci   }
1736bf215546Sopenharmony_ci
1737bf215546Sopenharmony_ci   /* Terminate the shader after the exit block */
1738bf215546Sopenharmony_ci   agx_block *last_block = list_last_entry(&ctx->blocks, agx_block, link);
1739bf215546Sopenharmony_ci   agx_builder _b = agx_init_builder(ctx, agx_after_block(last_block));
1740bf215546Sopenharmony_ci   agx_stop(&_b);
1741bf215546Sopenharmony_ci
1742bf215546Sopenharmony_ci   /* Also add traps to match the blob, unsure what the function is */
1743bf215546Sopenharmony_ci   for (unsigned i = 0; i < 8; ++i)
1744bf215546Sopenharmony_ci      agx_trap(&_b);
1745bf215546Sopenharmony_ci
1746bf215546Sopenharmony_ci   /* Index blocks now that we're done emitting so the order is consistent */
1747bf215546Sopenharmony_ci   agx_foreach_block(ctx, block)
1748bf215546Sopenharmony_ci      block->index = ctx->num_blocks++;
1749bf215546Sopenharmony_ci
1750bf215546Sopenharmony_ci   agx_validate(ctx, "IR translation");
1751bf215546Sopenharmony_ci
1752bf215546Sopenharmony_ci   if (agx_debug & AGX_DBG_SHADERS && !skip_internal)
1753bf215546Sopenharmony_ci      agx_print_shader(ctx, stdout);
1754bf215546Sopenharmony_ci
1755bf215546Sopenharmony_ci   agx_optimizer(ctx);
1756bf215546Sopenharmony_ci   agx_dce(ctx);
1757bf215546Sopenharmony_ci   agx_validate(ctx, "Optimization");
1758bf215546Sopenharmony_ci
1759bf215546Sopenharmony_ci   if (agx_debug & AGX_DBG_SHADERS && !skip_internal)
1760bf215546Sopenharmony_ci      agx_print_shader(ctx, stdout);
1761bf215546Sopenharmony_ci
1762bf215546Sopenharmony_ci   agx_ra(ctx);
1763bf215546Sopenharmony_ci
1764bf215546Sopenharmony_ci   if (ctx->stage == MESA_SHADER_VERTEX)
1765bf215546Sopenharmony_ci      agx_set_st_vary_final(ctx);
1766bf215546Sopenharmony_ci
1767bf215546Sopenharmony_ci   if (agx_debug & AGX_DBG_SHADERS && !skip_internal)
1768bf215546Sopenharmony_ci      agx_print_shader(ctx, stdout);
1769bf215546Sopenharmony_ci
1770bf215546Sopenharmony_ci   agx_lower_pseudo(ctx);
1771bf215546Sopenharmony_ci
1772bf215546Sopenharmony_ci   agx_pack_binary(ctx, binary);
1773bf215546Sopenharmony_ci
1774bf215546Sopenharmony_ci   if ((agx_debug & AGX_DBG_SHADERDB) && !skip_internal)
1775bf215546Sopenharmony_ci      agx_print_stats(ctx, binary->size, stderr);
1776bf215546Sopenharmony_ci
1777bf215546Sopenharmony_ci   ralloc_free(ctx);
1778bf215546Sopenharmony_ci}
1779