1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "ir3_context.h"
28bf215546Sopenharmony_ci#include "ir3_compiler.h"
29bf215546Sopenharmony_ci#include "ir3_image.h"
30bf215546Sopenharmony_ci#include "ir3_nir.h"
31bf215546Sopenharmony_ci#include "ir3_shader.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_cistruct ir3_context *
34bf215546Sopenharmony_ciir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
35bf215546Sopenharmony_ci                 struct ir3_shader_variant *so)
36bf215546Sopenharmony_ci{
37bf215546Sopenharmony_ci   struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci   if (compiler->gen == 4) {
40bf215546Sopenharmony_ci      if (so->type == MESA_SHADER_VERTEX) {
41bf215546Sopenharmony_ci         ctx->astc_srgb = so->key.vastc_srgb;
42bf215546Sopenharmony_ci         memcpy(ctx->sampler_swizzles, so->key.vsampler_swizzles, sizeof(ctx->sampler_swizzles));
43bf215546Sopenharmony_ci      } else if (so->type == MESA_SHADER_FRAGMENT ||
44bf215546Sopenharmony_ci            so->type == MESA_SHADER_COMPUTE) {
45bf215546Sopenharmony_ci         ctx->astc_srgb = so->key.fastc_srgb;
46bf215546Sopenharmony_ci         memcpy(ctx->sampler_swizzles, so->key.fsampler_swizzles, sizeof(ctx->sampler_swizzles));
47bf215546Sopenharmony_ci      }
48bf215546Sopenharmony_ci   } else if (compiler->gen == 3) {
49bf215546Sopenharmony_ci      if (so->type == MESA_SHADER_VERTEX) {
50bf215546Sopenharmony_ci         ctx->samples = so->key.vsamples;
51bf215546Sopenharmony_ci      } else if (so->type == MESA_SHADER_FRAGMENT) {
52bf215546Sopenharmony_ci         ctx->samples = so->key.fsamples;
53bf215546Sopenharmony_ci      }
54bf215546Sopenharmony_ci   }
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci   if (compiler->gen >= 6) {
57bf215546Sopenharmony_ci      ctx->funcs = &ir3_a6xx_funcs;
58bf215546Sopenharmony_ci   } else if (compiler->gen >= 4) {
59bf215546Sopenharmony_ci      ctx->funcs = &ir3_a4xx_funcs;
60bf215546Sopenharmony_ci   }
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci   ctx->compiler = compiler;
63bf215546Sopenharmony_ci   ctx->so = so;
64bf215546Sopenharmony_ci   ctx->def_ht =
65bf215546Sopenharmony_ci      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
66bf215546Sopenharmony_ci   ctx->block_ht =
67bf215546Sopenharmony_ci      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
68bf215546Sopenharmony_ci   ctx->continue_block_ht =
69bf215546Sopenharmony_ci      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
70bf215546Sopenharmony_ci   ctx->sel_cond_conversions =
71bf215546Sopenharmony_ci      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci   /* TODO: maybe generate some sort of bitmask of what key
74bf215546Sopenharmony_ci    * lowers vs what shader has (ie. no need to lower
75bf215546Sopenharmony_ci    * texture clamp lowering if no texture sample instrs)..
76bf215546Sopenharmony_ci    * although should be done further up the stack to avoid
77bf215546Sopenharmony_ci    * creating duplicate variants..
78bf215546Sopenharmony_ci    */
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   ctx->s = nir_shader_clone(ctx, shader->nir);
81bf215546Sopenharmony_ci   ir3_nir_lower_variant(so, ctx->s);
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   /* this needs to be the last pass run, so do this here instead of
84bf215546Sopenharmony_ci    * in ir3_optimize_nir():
85bf215546Sopenharmony_ci    */
86bf215546Sopenharmony_ci   bool progress = false;
87bf215546Sopenharmony_ci   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs);
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   /* we could need cleanup after lower_locals_to_regs */
90bf215546Sopenharmony_ci   while (progress) {
91bf215546Sopenharmony_ci      progress = false;
92bf215546Sopenharmony_ci      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
93bf215546Sopenharmony_ci      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
94bf215546Sopenharmony_ci   }
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci   /* We want to lower nir_op_imul as late as possible, to catch also
97bf215546Sopenharmony_ci    * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
98bf215546Sopenharmony_ci    * However, we want a final swing of a few passes to have a chance
99bf215546Sopenharmony_ci    * at optimizing the result.
100bf215546Sopenharmony_ci    */
101bf215546Sopenharmony_ci   progress = false;
102bf215546Sopenharmony_ci   NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
103bf215546Sopenharmony_ci   while (progress) {
104bf215546Sopenharmony_ci      progress = false;
105bf215546Sopenharmony_ci      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
106bf215546Sopenharmony_ci      NIR_PASS(progress, ctx->s, nir_opt_copy_prop_vars);
107bf215546Sopenharmony_ci      NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars);
108bf215546Sopenharmony_ci      NIR_PASS(progress, ctx->s, nir_opt_dce);
109bf215546Sopenharmony_ci      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
110bf215546Sopenharmony_ci   }
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   /* Enable the texture pre-fetch feature only a4xx onwards.  But
113bf215546Sopenharmony_ci    * only enable it on generations that have been tested:
114bf215546Sopenharmony_ci    */
115bf215546Sopenharmony_ci   if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gen >= 6))
116bf215546Sopenharmony_ci      NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch);
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true);
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   /* Super crude heuristic to limit # of tex prefetch in small
121bf215546Sopenharmony_ci    * shaders.  This completely ignores loops.. but that's really
122bf215546Sopenharmony_ci    * not the worst of it's problems.  (A frag shader that has
123bf215546Sopenharmony_ci    * loops is probably going to be big enough to not trigger a
124bf215546Sopenharmony_ci    * lower threshold.)
125bf215546Sopenharmony_ci    *
126bf215546Sopenharmony_ci    *   1) probably want to do this in terms of ir3 instructions
127bf215546Sopenharmony_ci    *   2) probably really want to decide this after scheduling
128bf215546Sopenharmony_ci    *      (or at least pre-RA sched) so we have a rough idea about
129bf215546Sopenharmony_ci    *      nops, and don't count things that get cp'd away
130bf215546Sopenharmony_ci    *   3) blob seems to use higher thresholds with a mix of more
131bf215546Sopenharmony_ci    *      SFU instructions.  Which partly makes sense, more SFU
132bf215546Sopenharmony_ci    *      instructions probably means you want to get the real
133bf215546Sopenharmony_ci    *      shader started sooner, but that considers where in the
134bf215546Sopenharmony_ci    *      shader the SFU instructions are, which blob doesn't seem
135bf215546Sopenharmony_ci    *      to do.
136bf215546Sopenharmony_ci    *
137bf215546Sopenharmony_ci    * This uses more conservative thresholds assuming a more alu
138bf215546Sopenharmony_ci    * than sfu heavy instruction mix.
139bf215546Sopenharmony_ci    */
140bf215546Sopenharmony_ci   if (so->type == MESA_SHADER_FRAGMENT) {
141bf215546Sopenharmony_ci      nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci      unsigned instruction_count = 0;
144bf215546Sopenharmony_ci      nir_foreach_block (block, fxn) {
145bf215546Sopenharmony_ci         instruction_count += exec_list_length(&block->instr_list);
146bf215546Sopenharmony_ci      }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci      if (instruction_count < 50) {
149bf215546Sopenharmony_ci         ctx->prefetch_limit = 2;
150bf215546Sopenharmony_ci      } else if (instruction_count < 70) {
151bf215546Sopenharmony_ci         ctx->prefetch_limit = 3;
152bf215546Sopenharmony_ci      } else {
153bf215546Sopenharmony_ci         ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH;
154bf215546Sopenharmony_ci      }
155bf215546Sopenharmony_ci   }
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   if (shader_debug_enabled(so->type)) {
158bf215546Sopenharmony_ci      mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so),
159bf215546Sopenharmony_ci                so->name);
160bf215546Sopenharmony_ci      nir_log_shaderi(ctx->s);
161bf215546Sopenharmony_ci   }
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   return ctx;
166bf215546Sopenharmony_ci}
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_civoid
169bf215546Sopenharmony_ciir3_context_free(struct ir3_context *ctx)
170bf215546Sopenharmony_ci{
171bf215546Sopenharmony_ci   ralloc_free(ctx);
172bf215546Sopenharmony_ci}
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci/*
175bf215546Sopenharmony_ci * Misc helpers
176bf215546Sopenharmony_ci */
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci/* allocate a n element value array (to be populated by caller) and
179bf215546Sopenharmony_ci * insert in def_ht
180bf215546Sopenharmony_ci */
181bf215546Sopenharmony_cistruct ir3_instruction **
182bf215546Sopenharmony_ciir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
183bf215546Sopenharmony_ci{
184bf215546Sopenharmony_ci   struct ir3_instruction **value =
185bf215546Sopenharmony_ci      ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
186bf215546Sopenharmony_ci   _mesa_hash_table_insert(ctx->def_ht, dst, value);
187bf215546Sopenharmony_ci   return value;
188bf215546Sopenharmony_ci}
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_cistruct ir3_instruction **
191bf215546Sopenharmony_ciir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
192bf215546Sopenharmony_ci{
193bf215546Sopenharmony_ci   struct ir3_instruction **value;
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   if (dst->is_ssa) {
196bf215546Sopenharmony_ci      value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
197bf215546Sopenharmony_ci   } else {
198bf215546Sopenharmony_ci      value = ralloc_array(ctx, struct ir3_instruction *, n);
199bf215546Sopenharmony_ci   }
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci   /* NOTE: in non-ssa case, we don't really need to store last_dst
202bf215546Sopenharmony_ci    * but this helps us catch cases where put_dst() call is forgotten
203bf215546Sopenharmony_ci    */
204bf215546Sopenharmony_ci   compile_assert(ctx, !ctx->last_dst);
205bf215546Sopenharmony_ci   ctx->last_dst = value;
206bf215546Sopenharmony_ci   ctx->last_dst_n = n;
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   return value;
209bf215546Sopenharmony_ci}
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_cistruct ir3_instruction *const *
212bf215546Sopenharmony_ciir3_get_src(struct ir3_context *ctx, nir_src *src)
213bf215546Sopenharmony_ci{
214bf215546Sopenharmony_ci   if (src->is_ssa) {
215bf215546Sopenharmony_ci      struct hash_entry *entry;
216bf215546Sopenharmony_ci      entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
217bf215546Sopenharmony_ci      compile_assert(ctx, entry);
218bf215546Sopenharmony_ci      return entry->data;
219bf215546Sopenharmony_ci   } else {
220bf215546Sopenharmony_ci      nir_register *reg = src->reg.reg;
221bf215546Sopenharmony_ci      struct ir3_array *arr = ir3_get_array(ctx, reg);
222bf215546Sopenharmony_ci      unsigned num_components = arr->r->num_components;
223bf215546Sopenharmony_ci      struct ir3_instruction *addr = NULL;
224bf215546Sopenharmony_ci      struct ir3_instruction **value =
225bf215546Sopenharmony_ci         ralloc_array(ctx, struct ir3_instruction *, num_components);
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci      if (src->reg.indirect)
228bf215546Sopenharmony_ci         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
229bf215546Sopenharmony_ci                              reg->num_components);
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_components; i++) {
232bf215546Sopenharmony_ci         unsigned n = src->reg.base_offset * reg->num_components + i;
233bf215546Sopenharmony_ci         compile_assert(ctx, n < arr->length);
234bf215546Sopenharmony_ci         value[i] = ir3_create_array_load(ctx, arr, n, addr);
235bf215546Sopenharmony_ci      }
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci      return value;
238bf215546Sopenharmony_ci   }
239bf215546Sopenharmony_ci}
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_civoid
242bf215546Sopenharmony_ciir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
243bf215546Sopenharmony_ci{
244bf215546Sopenharmony_ci   unsigned bit_size = ir3_bitsize(ctx, nir_dest_bit_size(*dst));
245bf215546Sopenharmony_ci
246bf215546Sopenharmony_ci   /* add extra mov if dst value is shared reg.. in some cases not all
247bf215546Sopenharmony_ci    * instructions can read from shared regs, in cases where they can
248bf215546Sopenharmony_ci    * ir3_cp will clean up the extra mov:
249bf215546Sopenharmony_ci    */
250bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->last_dst_n; i++) {
251bf215546Sopenharmony_ci      if (!ctx->last_dst[i])
252bf215546Sopenharmony_ci         continue;
253bf215546Sopenharmony_ci      if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
254bf215546Sopenharmony_ci         ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
255bf215546Sopenharmony_ci      }
256bf215546Sopenharmony_ci   }
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   if (bit_size <= 16) {
259bf215546Sopenharmony_ci      for (unsigned i = 0; i < ctx->last_dst_n; i++) {
260bf215546Sopenharmony_ci         struct ir3_instruction *dst = ctx->last_dst[i];
261bf215546Sopenharmony_ci         ir3_set_dst_type(dst, true);
262bf215546Sopenharmony_ci         ir3_fixup_src_type(dst);
263bf215546Sopenharmony_ci         if (dst->opc == OPC_META_SPLIT) {
264bf215546Sopenharmony_ci            ir3_set_dst_type(ssa(dst->srcs[0]), true);
265bf215546Sopenharmony_ci            ir3_fixup_src_type(ssa(dst->srcs[0]));
266bf215546Sopenharmony_ci            dst->srcs[0]->flags |= IR3_REG_HALF;
267bf215546Sopenharmony_ci         }
268bf215546Sopenharmony_ci      }
269bf215546Sopenharmony_ci   }
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   if (!dst->is_ssa) {
272bf215546Sopenharmony_ci      nir_register *reg = dst->reg.reg;
273bf215546Sopenharmony_ci      struct ir3_array *arr = ir3_get_array(ctx, reg);
274bf215546Sopenharmony_ci      unsigned num_components = ctx->last_dst_n;
275bf215546Sopenharmony_ci      struct ir3_instruction *addr = NULL;
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci      if (dst->reg.indirect)
278bf215546Sopenharmony_ci         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
279bf215546Sopenharmony_ci                              reg->num_components);
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_components; i++) {
282bf215546Sopenharmony_ci         unsigned n = dst->reg.base_offset * reg->num_components + i;
283bf215546Sopenharmony_ci         compile_assert(ctx, n < arr->length);
284bf215546Sopenharmony_ci         if (!ctx->last_dst[i])
285bf215546Sopenharmony_ci            continue;
286bf215546Sopenharmony_ci         ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
287bf215546Sopenharmony_ci      }
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci      ralloc_free(ctx->last_dst);
290bf215546Sopenharmony_ci   }
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci   ctx->last_dst = NULL;
293bf215546Sopenharmony_ci   ctx->last_dst_n = 0;
294bf215546Sopenharmony_ci}
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_cistatic unsigned
297bf215546Sopenharmony_cidest_flags(struct ir3_instruction *instr)
298bf215546Sopenharmony_ci{
299bf215546Sopenharmony_ci   return instr->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED);
300bf215546Sopenharmony_ci}
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_cistruct ir3_instruction *
303bf215546Sopenharmony_ciir3_create_collect(struct ir3_block *block, struct ir3_instruction *const *arr,
304bf215546Sopenharmony_ci                   unsigned arrsz)
305bf215546Sopenharmony_ci{
306bf215546Sopenharmony_ci   struct ir3_instruction *collect;
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci   if (arrsz == 0)
309bf215546Sopenharmony_ci      return NULL;
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   unsigned flags = dest_flags(arr[0]);
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   collect = ir3_instr_create(block, OPC_META_COLLECT, 1, arrsz);
314bf215546Sopenharmony_ci   __ssa_dst(collect)->flags |= flags;
315bf215546Sopenharmony_ci   for (unsigned i = 0; i < arrsz; i++) {
316bf215546Sopenharmony_ci      struct ir3_instruction *elem = arr[i];
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci      /* Since arrays are pre-colored in RA, we can't assume that
319bf215546Sopenharmony_ci       * things will end up in the right place.  (Ie. if a collect
320bf215546Sopenharmony_ci       * joins elements from two different arrays.)  So insert an
321bf215546Sopenharmony_ci       * extra mov.
322bf215546Sopenharmony_ci       *
323bf215546Sopenharmony_ci       * We could possibly skip this if all the collected elements
324bf215546Sopenharmony_ci       * are contiguous elements in a single array.. not sure how
325bf215546Sopenharmony_ci       * likely that is to happen.
326bf215546Sopenharmony_ci       *
327bf215546Sopenharmony_ci       * Fixes a problem with glamor shaders, that in effect do
328bf215546Sopenharmony_ci       * something like:
329bf215546Sopenharmony_ci       *
330bf215546Sopenharmony_ci       *   if (foo)
331bf215546Sopenharmony_ci       *     texcoord = ..
332bf215546Sopenharmony_ci       *   else
333bf215546Sopenharmony_ci       *     texcoord = ..
334bf215546Sopenharmony_ci       *   color = texture2D(tex, texcoord);
335bf215546Sopenharmony_ci       *
336bf215546Sopenharmony_ci       * In this case, texcoord will end up as nir registers (which
337bf215546Sopenharmony_ci       * translate to ir3 array's of length 1.  And we can't assume
338bf215546Sopenharmony_ci       * the two (or more) arrays will get allocated in consecutive
339bf215546Sopenharmony_ci       * scalar registers.
340bf215546Sopenharmony_ci       *
341bf215546Sopenharmony_ci       */
342bf215546Sopenharmony_ci      if (elem->dsts[0]->flags & IR3_REG_ARRAY) {
343bf215546Sopenharmony_ci         type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
344bf215546Sopenharmony_ci         elem = ir3_MOV(block, elem, type);
345bf215546Sopenharmony_ci      }
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci      assert(dest_flags(elem) == flags);
348bf215546Sopenharmony_ci      __ssa_src(collect, elem, flags);
349bf215546Sopenharmony_ci   }
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci   collect->dsts[0]->wrmask = MASK(arrsz);
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   return collect;
354bf215546Sopenharmony_ci}
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci/* helper for instructions that produce multiple consecutive scalar
357bf215546Sopenharmony_ci * outputs which need to have a split meta instruction inserted
358bf215546Sopenharmony_ci */
359bf215546Sopenharmony_civoid
360bf215546Sopenharmony_ciir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
361bf215546Sopenharmony_ci               struct ir3_instruction *src, unsigned base, unsigned n)
362bf215546Sopenharmony_ci{
363bf215546Sopenharmony_ci   if ((n == 1) && (src->dsts[0]->wrmask == 0x1) &&
364bf215546Sopenharmony_ci       /* setup_input needs ir3_split_dest to generate a SPLIT instruction */
365bf215546Sopenharmony_ci       src->opc != OPC_META_INPUT) {
366bf215546Sopenharmony_ci      dst[0] = src;
367bf215546Sopenharmony_ci      return;
368bf215546Sopenharmony_ci   }
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci   if (src->opc == OPC_META_COLLECT) {
371bf215546Sopenharmony_ci      assert((base + n) <= src->srcs_count);
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci      for (int i = 0; i < n; i++) {
374bf215546Sopenharmony_ci         dst[i] = ssa(src->srcs[i + base]);
375bf215546Sopenharmony_ci      }
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci      return;
378bf215546Sopenharmony_ci   }
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci   unsigned flags = dest_flags(src);
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci   for (int i = 0, j = 0; i < n; i++) {
383bf215546Sopenharmony_ci      struct ir3_instruction *split =
384bf215546Sopenharmony_ci         ir3_instr_create(block, OPC_META_SPLIT, 1, 1);
385bf215546Sopenharmony_ci      __ssa_dst(split)->flags |= flags;
386bf215546Sopenharmony_ci      __ssa_src(split, src, flags);
387bf215546Sopenharmony_ci      split->split.off = i + base;
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci      if (src->dsts[0]->wrmask & (1 << (i + base)))
390bf215546Sopenharmony_ci         dst[j++] = split;
391bf215546Sopenharmony_ci   }
392bf215546Sopenharmony_ci}
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ciNORETURN void
395bf215546Sopenharmony_ciir3_context_error(struct ir3_context *ctx, const char *format, ...)
396bf215546Sopenharmony_ci{
397bf215546Sopenharmony_ci   struct hash_table *errors = NULL;
398bf215546Sopenharmony_ci   va_list ap;
399bf215546Sopenharmony_ci   va_start(ap, format);
400bf215546Sopenharmony_ci   if (ctx->cur_instr) {
401bf215546Sopenharmony_ci      errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
402bf215546Sopenharmony_ci                                       _mesa_key_pointer_equal);
403bf215546Sopenharmony_ci      char *msg = ralloc_vasprintf(errors, format, ap);
404bf215546Sopenharmony_ci      _mesa_hash_table_insert(errors, ctx->cur_instr, msg);
405bf215546Sopenharmony_ci   } else {
406bf215546Sopenharmony_ci      mesa_loge_v(format, ap);
407bf215546Sopenharmony_ci   }
408bf215546Sopenharmony_ci   va_end(ap);
409bf215546Sopenharmony_ci   nir_log_shader_annotated(ctx->s, errors);
410bf215546Sopenharmony_ci   ralloc_free(errors);
411bf215546Sopenharmony_ci   ctx->error = true;
412bf215546Sopenharmony_ci   unreachable("");
413bf215546Sopenharmony_ci}
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_cistatic struct ir3_instruction *
416bf215546Sopenharmony_cicreate_addr0(struct ir3_block *block, struct ir3_instruction *src, int align)
417bf215546Sopenharmony_ci{
418bf215546Sopenharmony_ci   struct ir3_instruction *instr, *immed;
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci   instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci   switch (align) {
423bf215546Sopenharmony_ci   case 1:
424bf215546Sopenharmony_ci      /* src *= 1: */
425bf215546Sopenharmony_ci      break;
426bf215546Sopenharmony_ci   case 2:
427bf215546Sopenharmony_ci      /* src *= 2	=> src <<= 1: */
428bf215546Sopenharmony_ci      immed = create_immed_typed(block, 1, TYPE_S16);
429bf215546Sopenharmony_ci      instr = ir3_SHL_B(block, instr, 0, immed, 0);
430bf215546Sopenharmony_ci      break;
431bf215546Sopenharmony_ci   case 3:
432bf215546Sopenharmony_ci      /* src *= 3: */
433bf215546Sopenharmony_ci      immed = create_immed_typed(block, 3, TYPE_S16);
434bf215546Sopenharmony_ci      instr = ir3_MULL_U(block, instr, 0, immed, 0);
435bf215546Sopenharmony_ci      break;
436bf215546Sopenharmony_ci   case 4:
437bf215546Sopenharmony_ci      /* src *= 4 => src <<= 2: */
438bf215546Sopenharmony_ci      immed = create_immed_typed(block, 2, TYPE_S16);
439bf215546Sopenharmony_ci      instr = ir3_SHL_B(block, instr, 0, immed, 0);
440bf215546Sopenharmony_ci      break;
441bf215546Sopenharmony_ci   default:
442bf215546Sopenharmony_ci      unreachable("bad align");
443bf215546Sopenharmony_ci      return NULL;
444bf215546Sopenharmony_ci   }
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   instr->dsts[0]->flags |= IR3_REG_HALF;
447bf215546Sopenharmony_ci
448bf215546Sopenharmony_ci   instr = ir3_MOV(block, instr, TYPE_S16);
449bf215546Sopenharmony_ci   instr->dsts[0]->num = regid(REG_A0, 0);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   return instr;
452bf215546Sopenharmony_ci}
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_cistatic struct ir3_instruction *
455bf215546Sopenharmony_cicreate_addr1(struct ir3_block *block, unsigned const_val)
456bf215546Sopenharmony_ci{
457bf215546Sopenharmony_ci   struct ir3_instruction *immed =
458bf215546Sopenharmony_ci      create_immed_typed(block, const_val, TYPE_U16);
459bf215546Sopenharmony_ci   struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_U16);
460bf215546Sopenharmony_ci   instr->dsts[0]->num = regid(REG_A0, 1);
461bf215546Sopenharmony_ci   return instr;
462bf215546Sopenharmony_ci}
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci/* caches addr values to avoid generating multiple cov/shl/mova
465bf215546Sopenharmony_ci * sequences for each use of a given NIR level src as address
466bf215546Sopenharmony_ci */
467bf215546Sopenharmony_cistruct ir3_instruction *
468bf215546Sopenharmony_ciir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align)
469bf215546Sopenharmony_ci{
470bf215546Sopenharmony_ci   struct ir3_instruction *addr;
471bf215546Sopenharmony_ci   unsigned idx = align - 1;
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci   compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht));
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci   if (!ctx->addr0_ht[idx]) {
476bf215546Sopenharmony_ci      ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
477bf215546Sopenharmony_ci                                                   _mesa_key_pointer_equal);
478bf215546Sopenharmony_ci   } else {
479bf215546Sopenharmony_ci      struct hash_entry *entry;
480bf215546Sopenharmony_ci      entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src);
481bf215546Sopenharmony_ci      if (entry)
482bf215546Sopenharmony_ci         return entry->data;
483bf215546Sopenharmony_ci   }
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci   addr = create_addr0(ctx->block, src, align);
486bf215546Sopenharmony_ci   _mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr);
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   return addr;
489bf215546Sopenharmony_ci}
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci/* Similar to ir3_get_addr0, but for a1.x. */
492bf215546Sopenharmony_cistruct ir3_instruction *
493bf215546Sopenharmony_ciir3_get_addr1(struct ir3_context *ctx, unsigned const_val)
494bf215546Sopenharmony_ci{
495bf215546Sopenharmony_ci   struct ir3_instruction *addr;
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_ci   if (!ctx->addr1_ht) {
498bf215546Sopenharmony_ci      ctx->addr1_ht = _mesa_hash_table_u64_create(ctx);
499bf215546Sopenharmony_ci   } else {
500bf215546Sopenharmony_ci      addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val);
501bf215546Sopenharmony_ci      if (addr)
502bf215546Sopenharmony_ci         return addr;
503bf215546Sopenharmony_ci   }
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci   addr = create_addr1(ctx->block, const_val);
506bf215546Sopenharmony_ci   _mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr);
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci   return addr;
509bf215546Sopenharmony_ci}
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_cistruct ir3_instruction *
512bf215546Sopenharmony_ciir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
513bf215546Sopenharmony_ci{
514bf215546Sopenharmony_ci   struct ir3_block *b = ctx->block;
515bf215546Sopenharmony_ci   struct ir3_instruction *cond;
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci   /* NOTE: only cmps.*.* can write p0.x: */
518bf215546Sopenharmony_ci   struct ir3_instruction *zero =
519bf215546Sopenharmony_ci         create_immed_typed(b, 0, is_half(src) ? TYPE_U16 : TYPE_U32);
520bf215546Sopenharmony_ci   cond = ir3_CMPS_S(b, src, 0, zero, 0);
521bf215546Sopenharmony_ci   cond->cat2.condition = IR3_COND_NE;
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci   /* condition always goes in predicate register: */
524bf215546Sopenharmony_ci   cond->dsts[0]->num = regid(REG_P0, 0);
525bf215546Sopenharmony_ci   cond->dsts[0]->flags &= ~IR3_REG_SSA;
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci   return cond;
528bf215546Sopenharmony_ci}
529bf215546Sopenharmony_ci
530bf215546Sopenharmony_ci/*
531bf215546Sopenharmony_ci * Array helpers
532bf215546Sopenharmony_ci */
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_civoid
535bf215546Sopenharmony_ciir3_declare_array(struct ir3_context *ctx, nir_register *reg)
536bf215546Sopenharmony_ci{
537bf215546Sopenharmony_ci   struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
538bf215546Sopenharmony_ci   arr->id = ++ctx->num_arrays;
539bf215546Sopenharmony_ci   /* NOTE: sometimes we get non array regs, for example for arrays of
540bf215546Sopenharmony_ci    * length 1.  See fs-const-array-of-struct-of-array.shader_test.  So
541bf215546Sopenharmony_ci    * treat a non-array as if it was an array of length 1.
542bf215546Sopenharmony_ci    *
543bf215546Sopenharmony_ci    * It would be nice if there was a nir pass to convert arrays of
544bf215546Sopenharmony_ci    * length 1 to ssa.
545bf215546Sopenharmony_ci    */
546bf215546Sopenharmony_ci   arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
547bf215546Sopenharmony_ci   compile_assert(ctx, arr->length > 0);
548bf215546Sopenharmony_ci   arr->r = reg;
549bf215546Sopenharmony_ci   arr->half = ir3_bitsize(ctx, reg->bit_size) <= 16;
550bf215546Sopenharmony_ci   list_addtail(&arr->node, &ctx->ir->array_list);
551bf215546Sopenharmony_ci}
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_cistruct ir3_array *
554bf215546Sopenharmony_ciir3_get_array(struct ir3_context *ctx, nir_register *reg)
555bf215546Sopenharmony_ci{
556bf215546Sopenharmony_ci   foreach_array (arr, &ctx->ir->array_list) {
557bf215546Sopenharmony_ci      if (arr->r == reg)
558bf215546Sopenharmony_ci         return arr;
559bf215546Sopenharmony_ci   }
560bf215546Sopenharmony_ci   ir3_context_error(ctx, "bogus reg: r%d\n", reg->index);
561bf215546Sopenharmony_ci   return NULL;
562bf215546Sopenharmony_ci}
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci/* relative (indirect) if address!=NULL */
565bf215546Sopenharmony_cistruct ir3_instruction *
566bf215546Sopenharmony_ciir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
567bf215546Sopenharmony_ci                      struct ir3_instruction *address)
568bf215546Sopenharmony_ci{
569bf215546Sopenharmony_ci   struct ir3_block *block = ctx->block;
570bf215546Sopenharmony_ci   struct ir3_instruction *mov;
571bf215546Sopenharmony_ci   struct ir3_register *src;
572bf215546Sopenharmony_ci   unsigned flags = 0;
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
575bf215546Sopenharmony_ci   if (arr->half) {
576bf215546Sopenharmony_ci      mov->cat1.src_type = TYPE_U16;
577bf215546Sopenharmony_ci      mov->cat1.dst_type = TYPE_U16;
578bf215546Sopenharmony_ci      flags |= IR3_REG_HALF;
579bf215546Sopenharmony_ci   } else {
580bf215546Sopenharmony_ci      mov->cat1.src_type = TYPE_U32;
581bf215546Sopenharmony_ci      mov->cat1.dst_type = TYPE_U32;
582bf215546Sopenharmony_ci   }
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_ci   mov->barrier_class = IR3_BARRIER_ARRAY_R;
585bf215546Sopenharmony_ci   mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
586bf215546Sopenharmony_ci   __ssa_dst(mov)->flags |= flags;
587bf215546Sopenharmony_ci   src = ir3_src_create(mov, 0,
588bf215546Sopenharmony_ci                        IR3_REG_ARRAY | COND(address, IR3_REG_RELATIV) | flags);
589bf215546Sopenharmony_ci   src->def = (arr->last_write && arr->last_write->instr->block == block)
590bf215546Sopenharmony_ci                 ? arr->last_write
591bf215546Sopenharmony_ci                 : NULL;
592bf215546Sopenharmony_ci   src->size = arr->length;
593bf215546Sopenharmony_ci   src->array.id = arr->id;
594bf215546Sopenharmony_ci   src->array.offset = n;
595bf215546Sopenharmony_ci   src->array.base = INVALID_REG;
596bf215546Sopenharmony_ci
597bf215546Sopenharmony_ci   if (address)
598bf215546Sopenharmony_ci      ir3_instr_set_address(mov, address);
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   return mov;
601bf215546Sopenharmony_ci}
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci/* relative (indirect) if address!=NULL */
604bf215546Sopenharmony_civoid
605bf215546Sopenharmony_ciir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
606bf215546Sopenharmony_ci                       struct ir3_instruction *src,
607bf215546Sopenharmony_ci                       struct ir3_instruction *address)
608bf215546Sopenharmony_ci{
609bf215546Sopenharmony_ci   struct ir3_block *block = ctx->block;
610bf215546Sopenharmony_ci   struct ir3_instruction *mov;
611bf215546Sopenharmony_ci   struct ir3_register *dst;
612bf215546Sopenharmony_ci   unsigned flags = 0;
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci   /* if not relative store, don't create an extra mov, since that
615bf215546Sopenharmony_ci    * ends up being difficult for cp to remove.
616bf215546Sopenharmony_ci    *
617bf215546Sopenharmony_ci    * Also, don't skip the mov if the src is meta (like fanout/split),
618bf215546Sopenharmony_ci    * since that creates a situation that RA can't really handle properly.
619bf215546Sopenharmony_ci    */
620bf215546Sopenharmony_ci   if (!address && !is_meta(src)) {
621bf215546Sopenharmony_ci      dst = src->dsts[0];
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci      src->barrier_class |= IR3_BARRIER_ARRAY_W;
624bf215546Sopenharmony_ci      src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci      dst->flags |= IR3_REG_ARRAY;
627bf215546Sopenharmony_ci      dst->size = arr->length;
628bf215546Sopenharmony_ci      dst->array.id = arr->id;
629bf215546Sopenharmony_ci      dst->array.offset = n;
630bf215546Sopenharmony_ci      dst->array.base = INVALID_REG;
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_ci      if (arr->last_write && arr->last_write->instr->block == src->block)
633bf215546Sopenharmony_ci         ir3_reg_set_last_array(src, dst, arr->last_write);
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_ci      arr->last_write = dst;
636bf215546Sopenharmony_ci
637bf215546Sopenharmony_ci      array_insert(block, block->keeps, src);
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci      return;
640bf215546Sopenharmony_ci   }
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
643bf215546Sopenharmony_ci   if (arr->half) {
644bf215546Sopenharmony_ci      mov->cat1.src_type = TYPE_U16;
645bf215546Sopenharmony_ci      mov->cat1.dst_type = TYPE_U16;
646bf215546Sopenharmony_ci      flags |= IR3_REG_HALF;
647bf215546Sopenharmony_ci   } else {
648bf215546Sopenharmony_ci      mov->cat1.src_type = TYPE_U32;
649bf215546Sopenharmony_ci      mov->cat1.dst_type = TYPE_U32;
650bf215546Sopenharmony_ci   }
651bf215546Sopenharmony_ci   mov->barrier_class = IR3_BARRIER_ARRAY_W;
652bf215546Sopenharmony_ci   mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
653bf215546Sopenharmony_ci   dst = ir3_dst_create(
654bf215546Sopenharmony_ci      mov, 0,
655bf215546Sopenharmony_ci      IR3_REG_SSA | IR3_REG_ARRAY | flags | COND(address, IR3_REG_RELATIV));
656bf215546Sopenharmony_ci   dst->instr = mov;
657bf215546Sopenharmony_ci   dst->size = arr->length;
658bf215546Sopenharmony_ci   dst->array.id = arr->id;
659bf215546Sopenharmony_ci   dst->array.offset = n;
660bf215546Sopenharmony_ci   dst->array.base = INVALID_REG;
661bf215546Sopenharmony_ci   ir3_src_create(mov, 0, IR3_REG_SSA | flags)->def = src->dsts[0];
662bf215546Sopenharmony_ci
663bf215546Sopenharmony_ci   if (arr->last_write && arr->last_write->instr->block == block)
664bf215546Sopenharmony_ci      ir3_reg_set_last_array(mov, dst, arr->last_write);
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_ci   if (address)
667bf215546Sopenharmony_ci      ir3_instr_set_address(mov, address);
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ci   arr->last_write = dst;
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_ci   /* the array store may only matter to something in an earlier
672bf215546Sopenharmony_ci    * block (ie. loops), but since arrays are not in SSA, depth
673bf215546Sopenharmony_ci    * pass won't know this.. so keep all array stores:
674bf215546Sopenharmony_ci    */
675bf215546Sopenharmony_ci   array_insert(block, block->keeps, mov);
676bf215546Sopenharmony_ci}
677