1/*
2 * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "ir3_context.h"
28#include "ir3_compiler.h"
29#include "ir3_image.h"
30#include "ir3_nir.h"
31#include "ir3_shader.h"
32
33struct ir3_context *
34ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
35                 struct ir3_shader_variant *so)
36{
37   struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
38
39   if (compiler->gen == 4) {
40      if (so->type == MESA_SHADER_VERTEX) {
41         ctx->astc_srgb = so->key.vastc_srgb;
42         memcpy(ctx->sampler_swizzles, so->key.vsampler_swizzles, sizeof(ctx->sampler_swizzles));
43      } else if (so->type == MESA_SHADER_FRAGMENT ||
44            so->type == MESA_SHADER_COMPUTE) {
45         ctx->astc_srgb = so->key.fastc_srgb;
46         memcpy(ctx->sampler_swizzles, so->key.fsampler_swizzles, sizeof(ctx->sampler_swizzles));
47      }
48   } else if (compiler->gen == 3) {
49      if (so->type == MESA_SHADER_VERTEX) {
50         ctx->samples = so->key.vsamples;
51      } else if (so->type == MESA_SHADER_FRAGMENT) {
52         ctx->samples = so->key.fsamples;
53      }
54   }
55
56   if (compiler->gen >= 6) {
57      ctx->funcs = &ir3_a6xx_funcs;
58   } else if (compiler->gen >= 4) {
59      ctx->funcs = &ir3_a4xx_funcs;
60   }
61
62   ctx->compiler = compiler;
63   ctx->so = so;
64   ctx->def_ht =
65      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
66   ctx->block_ht =
67      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
68   ctx->continue_block_ht =
69      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
70   ctx->sel_cond_conversions =
71      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
72
73   /* TODO: maybe generate some sort of bitmask of what key
74    * lowers vs what shader has (ie. no need to lower
75    * texture clamp lowering if no texture sample instrs)..
76    * although should be done further up the stack to avoid
77    * creating duplicate variants..
78    */
79
80   ctx->s = nir_shader_clone(ctx, shader->nir);
81   ir3_nir_lower_variant(so, ctx->s);
82
83   /* this needs to be the last pass run, so do this here instead of
84    * in ir3_optimize_nir():
85    */
86   bool progress = false;
87   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs);
88
89   /* we could need cleanup after lower_locals_to_regs */
90   while (progress) {
91      progress = false;
92      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
93      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
94   }
95
96   /* We want to lower nir_op_imul as late as possible, to catch also
97    * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
98    * However, we want a final swing of a few passes to have a chance
99    * at optimizing the result.
100    */
101   progress = false;
102   NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
103   while (progress) {
104      progress = false;
105      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
106      NIR_PASS(progress, ctx->s, nir_opt_copy_prop_vars);
107      NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars);
108      NIR_PASS(progress, ctx->s, nir_opt_dce);
109      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
110   }
111
112   /* Enable the texture pre-fetch feature only a4xx onwards.  But
113    * only enable it on generations that have been tested:
114    */
115   if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gen >= 6))
116      NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch);
117
118   NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true);
119
120   /* Super crude heuristic to limit # of tex prefetch in small
121    * shaders.  This completely ignores loops.. but that's really
122    * not the worst of it's problems.  (A frag shader that has
123    * loops is probably going to be big enough to not trigger a
124    * lower threshold.)
125    *
126    *   1) probably want to do this in terms of ir3 instructions
127    *   2) probably really want to decide this after scheduling
128    *      (or at least pre-RA sched) so we have a rough idea about
129    *      nops, and don't count things that get cp'd away
130    *   3) blob seems to use higher thresholds with a mix of more
131    *      SFU instructions.  Which partly makes sense, more SFU
132    *      instructions probably means you want to get the real
133    *      shader started sooner, but that considers where in the
134    *      shader the SFU instructions are, which blob doesn't seem
135    *      to do.
136    *
137    * This uses more conservative thresholds assuming a more alu
138    * than sfu heavy instruction mix.
139    */
140   if (so->type == MESA_SHADER_FRAGMENT) {
141      nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
142
143      unsigned instruction_count = 0;
144      nir_foreach_block (block, fxn) {
145         instruction_count += exec_list_length(&block->instr_list);
146      }
147
148      if (instruction_count < 50) {
149         ctx->prefetch_limit = 2;
150      } else if (instruction_count < 70) {
151         ctx->prefetch_limit = 3;
152      } else {
153         ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH;
154      }
155   }
156
157   if (shader_debug_enabled(so->type)) {
158      mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so),
159                so->name);
160      nir_log_shaderi(ctx->s);
161   }
162
163   ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
164
165   return ctx;
166}
167
168void
169ir3_context_free(struct ir3_context *ctx)
170{
171   ralloc_free(ctx);
172}
173
174/*
175 * Misc helpers
176 */
177
178/* allocate a n element value array (to be populated by caller) and
179 * insert in def_ht
180 */
181struct ir3_instruction **
182ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
183{
184   struct ir3_instruction **value =
185      ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
186   _mesa_hash_table_insert(ctx->def_ht, dst, value);
187   return value;
188}
189
190struct ir3_instruction **
191ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
192{
193   struct ir3_instruction **value;
194
195   if (dst->is_ssa) {
196      value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
197   } else {
198      value = ralloc_array(ctx, struct ir3_instruction *, n);
199   }
200
201   /* NOTE: in non-ssa case, we don't really need to store last_dst
202    * but this helps us catch cases where put_dst() call is forgotten
203    */
204   compile_assert(ctx, !ctx->last_dst);
205   ctx->last_dst = value;
206   ctx->last_dst_n = n;
207
208   return value;
209}
210
211struct ir3_instruction *const *
212ir3_get_src(struct ir3_context *ctx, nir_src *src)
213{
214   if (src->is_ssa) {
215      struct hash_entry *entry;
216      entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
217      compile_assert(ctx, entry);
218      return entry->data;
219   } else {
220      nir_register *reg = src->reg.reg;
221      struct ir3_array *arr = ir3_get_array(ctx, reg);
222      unsigned num_components = arr->r->num_components;
223      struct ir3_instruction *addr = NULL;
224      struct ir3_instruction **value =
225         ralloc_array(ctx, struct ir3_instruction *, num_components);
226
227      if (src->reg.indirect)
228         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
229                              reg->num_components);
230
231      for (unsigned i = 0; i < num_components; i++) {
232         unsigned n = src->reg.base_offset * reg->num_components + i;
233         compile_assert(ctx, n < arr->length);
234         value[i] = ir3_create_array_load(ctx, arr, n, addr);
235      }
236
237      return value;
238   }
239}
240
241void
242ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
243{
244   unsigned bit_size = ir3_bitsize(ctx, nir_dest_bit_size(*dst));
245
246   /* add extra mov if dst value is shared reg.. in some cases not all
247    * instructions can read from shared regs, in cases where they can
248    * ir3_cp will clean up the extra mov:
249    */
250   for (unsigned i = 0; i < ctx->last_dst_n; i++) {
251      if (!ctx->last_dst[i])
252         continue;
253      if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
254         ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
255      }
256   }
257
258   if (bit_size <= 16) {
259      for (unsigned i = 0; i < ctx->last_dst_n; i++) {
260         struct ir3_instruction *dst = ctx->last_dst[i];
261         ir3_set_dst_type(dst, true);
262         ir3_fixup_src_type(dst);
263         if (dst->opc == OPC_META_SPLIT) {
264            ir3_set_dst_type(ssa(dst->srcs[0]), true);
265            ir3_fixup_src_type(ssa(dst->srcs[0]));
266            dst->srcs[0]->flags |= IR3_REG_HALF;
267         }
268      }
269   }
270
271   if (!dst->is_ssa) {
272      nir_register *reg = dst->reg.reg;
273      struct ir3_array *arr = ir3_get_array(ctx, reg);
274      unsigned num_components = ctx->last_dst_n;
275      struct ir3_instruction *addr = NULL;
276
277      if (dst->reg.indirect)
278         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
279                              reg->num_components);
280
281      for (unsigned i = 0; i < num_components; i++) {
282         unsigned n = dst->reg.base_offset * reg->num_components + i;
283         compile_assert(ctx, n < arr->length);
284         if (!ctx->last_dst[i])
285            continue;
286         ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
287      }
288
289      ralloc_free(ctx->last_dst);
290   }
291
292   ctx->last_dst = NULL;
293   ctx->last_dst_n = 0;
294}
295
296static unsigned
297dest_flags(struct ir3_instruction *instr)
298{
299   return instr->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED);
300}
301
302struct ir3_instruction *
303ir3_create_collect(struct ir3_block *block, struct ir3_instruction *const *arr,
304                   unsigned arrsz)
305{
306   struct ir3_instruction *collect;
307
308   if (arrsz == 0)
309      return NULL;
310
311   unsigned flags = dest_flags(arr[0]);
312
313   collect = ir3_instr_create(block, OPC_META_COLLECT, 1, arrsz);
314   __ssa_dst(collect)->flags |= flags;
315   for (unsigned i = 0; i < arrsz; i++) {
316      struct ir3_instruction *elem = arr[i];
317
318      /* Since arrays are pre-colored in RA, we can't assume that
319       * things will end up in the right place.  (Ie. if a collect
320       * joins elements from two different arrays.)  So insert an
321       * extra mov.
322       *
323       * We could possibly skip this if all the collected elements
324       * are contiguous elements in a single array.. not sure how
325       * likely that is to happen.
326       *
327       * Fixes a problem with glamor shaders, that in effect do
328       * something like:
329       *
330       *   if (foo)
331       *     texcoord = ..
332       *   else
333       *     texcoord = ..
334       *   color = texture2D(tex, texcoord);
335       *
336       * In this case, texcoord will end up as nir registers (which
337       * translate to ir3 array's of length 1.  And we can't assume
338       * the two (or more) arrays will get allocated in consecutive
339       * scalar registers.
340       *
341       */
342      if (elem->dsts[0]->flags & IR3_REG_ARRAY) {
343         type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
344         elem = ir3_MOV(block, elem, type);
345      }
346
347      assert(dest_flags(elem) == flags);
348      __ssa_src(collect, elem, flags);
349   }
350
351   collect->dsts[0]->wrmask = MASK(arrsz);
352
353   return collect;
354}
355
356/* helper for instructions that produce multiple consecutive scalar
357 * outputs which need to have a split meta instruction inserted
358 */
359void
360ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
361               struct ir3_instruction *src, unsigned base, unsigned n)
362{
363   if ((n == 1) && (src->dsts[0]->wrmask == 0x1) &&
364       /* setup_input needs ir3_split_dest to generate a SPLIT instruction */
365       src->opc != OPC_META_INPUT) {
366      dst[0] = src;
367      return;
368   }
369
370   if (src->opc == OPC_META_COLLECT) {
371      assert((base + n) <= src->srcs_count);
372
373      for (int i = 0; i < n; i++) {
374         dst[i] = ssa(src->srcs[i + base]);
375      }
376
377      return;
378   }
379
380   unsigned flags = dest_flags(src);
381
382   for (int i = 0, j = 0; i < n; i++) {
383      struct ir3_instruction *split =
384         ir3_instr_create(block, OPC_META_SPLIT, 1, 1);
385      __ssa_dst(split)->flags |= flags;
386      __ssa_src(split, src, flags);
387      split->split.off = i + base;
388
389      if (src->dsts[0]->wrmask & (1 << (i + base)))
390         dst[j++] = split;
391   }
392}
393
394NORETURN void
395ir3_context_error(struct ir3_context *ctx, const char *format, ...)
396{
397   struct hash_table *errors = NULL;
398   va_list ap;
399   va_start(ap, format);
400   if (ctx->cur_instr) {
401      errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
402                                       _mesa_key_pointer_equal);
403      char *msg = ralloc_vasprintf(errors, format, ap);
404      _mesa_hash_table_insert(errors, ctx->cur_instr, msg);
405   } else {
406      mesa_loge_v(format, ap);
407   }
408   va_end(ap);
409   nir_log_shader_annotated(ctx->s, errors);
410   ralloc_free(errors);
411   ctx->error = true;
412   unreachable("");
413}
414
415static struct ir3_instruction *
416create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align)
417{
418   struct ir3_instruction *instr, *immed;
419
420   instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
421
422   switch (align) {
423   case 1:
424      /* src *= 1: */
425      break;
426   case 2:
427      /* src *= 2	=> src <<= 1: */
428      immed = create_immed_typed(block, 1, TYPE_S16);
429      instr = ir3_SHL_B(block, instr, 0, immed, 0);
430      break;
431   case 3:
432      /* src *= 3: */
433      immed = create_immed_typed(block, 3, TYPE_S16);
434      instr = ir3_MULL_U(block, instr, 0, immed, 0);
435      break;
436   case 4:
437      /* src *= 4 => src <<= 2: */
438      immed = create_immed_typed(block, 2, TYPE_S16);
439      instr = ir3_SHL_B(block, instr, 0, immed, 0);
440      break;
441   default:
442      unreachable("bad align");
443      return NULL;
444   }
445
446   instr->dsts[0]->flags |= IR3_REG_HALF;
447
448   instr = ir3_MOV(block, instr, TYPE_S16);
449   instr->dsts[0]->num = regid(REG_A0, 0);
450
451   return instr;
452}
453
454static struct ir3_instruction *
455create_addr1(struct ir3_block *block, unsigned const_val)
456{
457   struct ir3_instruction *immed =
458      create_immed_typed(block, const_val, TYPE_U16);
459   struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_U16);
460   instr->dsts[0]->num = regid(REG_A0, 1);
461   return instr;
462}
463
464/* caches addr values to avoid generating multiple cov/shl/mova
465 * sequences for each use of a given NIR level src as address
466 */
467struct ir3_instruction *
468ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align)
469{
470   struct ir3_instruction *addr;
471   unsigned idx = align - 1;
472
473   compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht));
474
475   if (!ctx->addr0_ht[idx]) {
476      ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
477                                                   _mesa_key_pointer_equal);
478   } else {
479      struct hash_entry *entry;
480      entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src);
481      if (entry)
482         return entry->data;
483   }
484
485   addr = create_addr0(ctx->block, src, align);
486   _mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr);
487
488   return addr;
489}
490
491/* Similar to ir3_get_addr0, but for a1.x. */
492struct ir3_instruction *
493ir3_get_addr1(struct ir3_context *ctx, unsigned const_val)
494{
495   struct ir3_instruction *addr;
496
497   if (!ctx->addr1_ht) {
498      ctx->addr1_ht = _mesa_hash_table_u64_create(ctx);
499   } else {
500      addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val);
501      if (addr)
502         return addr;
503   }
504
505   addr = create_addr1(ctx->block, const_val);
506   _mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr);
507
508   return addr;
509}
510
511struct ir3_instruction *
512ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
513{
514   struct ir3_block *b = ctx->block;
515   struct ir3_instruction *cond;
516
517   /* NOTE: only cmps.*.* can write p0.x: */
518   struct ir3_instruction *zero =
519         create_immed_typed(b, 0, is_half(src) ? TYPE_U16 : TYPE_U32);
520   cond = ir3_CMPS_S(b, src, 0, zero, 0);
521   cond->cat2.condition = IR3_COND_NE;
522
523   /* condition always goes in predicate register: */
524   cond->dsts[0]->num = regid(REG_P0, 0);
525   cond->dsts[0]->flags &= ~IR3_REG_SSA;
526
527   return cond;
528}
529
530/*
531 * Array helpers
532 */
533
534void
535ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
536{
537   struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
538   arr->id = ++ctx->num_arrays;
539   /* NOTE: sometimes we get non array regs, for example for arrays of
540    * length 1.  See fs-const-array-of-struct-of-array.shader_test.  So
541    * treat a non-array as if it was an array of length 1.
542    *
543    * It would be nice if there was a nir pass to convert arrays of
544    * length 1 to ssa.
545    */
546   arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
547   compile_assert(ctx, arr->length > 0);
548   arr->r = reg;
549   arr->half = ir3_bitsize(ctx, reg->bit_size) <= 16;
550   list_addtail(&arr->node, &ctx->ir->array_list);
551}
552
553struct ir3_array *
554ir3_get_array(struct ir3_context *ctx, nir_register *reg)
555{
556   foreach_array (arr, &ctx->ir->array_list) {
557      if (arr->r == reg)
558         return arr;
559   }
560   ir3_context_error(ctx, "bogus reg: r%d\n", reg->index);
561   return NULL;
562}
563
564/* relative (indirect) if address!=NULL */
565struct ir3_instruction *
566ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
567                      struct ir3_instruction *address)
568{
569   struct ir3_block *block = ctx->block;
570   struct ir3_instruction *mov;
571   struct ir3_register *src;
572   unsigned flags = 0;
573
574   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
575   if (arr->half) {
576      mov->cat1.src_type = TYPE_U16;
577      mov->cat1.dst_type = TYPE_U16;
578      flags |= IR3_REG_HALF;
579   } else {
580      mov->cat1.src_type = TYPE_U32;
581      mov->cat1.dst_type = TYPE_U32;
582   }
583
584   mov->barrier_class = IR3_BARRIER_ARRAY_R;
585   mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
586   __ssa_dst(mov)->flags |= flags;
587   src = ir3_src_create(mov, 0,
588                        IR3_REG_ARRAY | COND(address, IR3_REG_RELATIV) | flags);
589   src->def = (arr->last_write && arr->last_write->instr->block == block)
590                 ? arr->last_write
591                 : NULL;
592   src->size = arr->length;
593   src->array.id = arr->id;
594   src->array.offset = n;
595   src->array.base = INVALID_REG;
596
597   if (address)
598      ir3_instr_set_address(mov, address);
599
600   return mov;
601}
602
603/* relative (indirect) if address!=NULL */
604void
605ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
606                       struct ir3_instruction *src,
607                       struct ir3_instruction *address)
608{
609   struct ir3_block *block = ctx->block;
610   struct ir3_instruction *mov;
611   struct ir3_register *dst;
612   unsigned flags = 0;
613
614   /* if not relative store, don't create an extra mov, since that
615    * ends up being difficult for cp to remove.
616    *
617    * Also, don't skip the mov if the src is meta (like fanout/split),
618    * since that creates a situation that RA can't really handle properly.
619    */
620   if (!address && !is_meta(src)) {
621      dst = src->dsts[0];
622
623      src->barrier_class |= IR3_BARRIER_ARRAY_W;
624      src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
625
626      dst->flags |= IR3_REG_ARRAY;
627      dst->size = arr->length;
628      dst->array.id = arr->id;
629      dst->array.offset = n;
630      dst->array.base = INVALID_REG;
631
632      if (arr->last_write && arr->last_write->instr->block == src->block)
633         ir3_reg_set_last_array(src, dst, arr->last_write);
634
635      arr->last_write = dst;
636
637      array_insert(block, block->keeps, src);
638
639      return;
640   }
641
642   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
643   if (arr->half) {
644      mov->cat1.src_type = TYPE_U16;
645      mov->cat1.dst_type = TYPE_U16;
646      flags |= IR3_REG_HALF;
647   } else {
648      mov->cat1.src_type = TYPE_U32;
649      mov->cat1.dst_type = TYPE_U32;
650   }
651   mov->barrier_class = IR3_BARRIER_ARRAY_W;
652   mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
653   dst = ir3_dst_create(
654      mov, 0,
655      IR3_REG_SSA | IR3_REG_ARRAY | flags | COND(address, IR3_REG_RELATIV));
656   dst->instr = mov;
657   dst->size = arr->length;
658   dst->array.id = arr->id;
659   dst->array.offset = n;
660   dst->array.base = INVALID_REG;
661   ir3_src_create(mov, 0, IR3_REG_SSA | flags)->def = src->dsts[0];
662
663   if (arr->last_write && arr->last_write->instr->block == block)
664      ir3_reg_set_last_array(mov, dst, arr->last_write);
665
666   if (address)
667      ir3_instr_set_address(mov, address);
668
669   arr->last_write = dst;
670
671   /* the array store may only matter to something in an earlier
672    * block (ie. loops), but since arrays are not in SSA, depth
673    * pass won't know this.. so keep all array stores:
674    */
675   array_insert(block, block->keeps, mov);
676}
677