1/* 2 * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "ir3_context.h" 28#include "ir3_compiler.h" 29#include "ir3_image.h" 30#include "ir3_nir.h" 31#include "ir3_shader.h" 32 33struct ir3_context * 34ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, 35 struct ir3_shader_variant *so) 36{ 37 struct ir3_context *ctx = rzalloc(NULL, struct ir3_context); 38 39 if (compiler->gen == 4) { 40 if (so->type == MESA_SHADER_VERTEX) { 41 ctx->astc_srgb = so->key.vastc_srgb; 42 memcpy(ctx->sampler_swizzles, so->key.vsampler_swizzles, sizeof(ctx->sampler_swizzles)); 43 } else if (so->type == MESA_SHADER_FRAGMENT || 44 so->type == MESA_SHADER_COMPUTE) { 45 ctx->astc_srgb = so->key.fastc_srgb; 46 memcpy(ctx->sampler_swizzles, so->key.fsampler_swizzles, sizeof(ctx->sampler_swizzles)); 47 } 48 } else if (compiler->gen == 3) { 49 if (so->type == MESA_SHADER_VERTEX) { 50 ctx->samples = so->key.vsamples; 51 } else if (so->type == MESA_SHADER_FRAGMENT) { 52 ctx->samples = so->key.fsamples; 53 } 54 } 55 56 if (compiler->gen >= 6) { 57 ctx->funcs = &ir3_a6xx_funcs; 58 } else if (compiler->gen >= 4) { 59 ctx->funcs = &ir3_a4xx_funcs; 60 } 61 62 ctx->compiler = compiler; 63 ctx->so = so; 64 ctx->def_ht = 65 _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 66 ctx->block_ht = 67 _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 68 ctx->continue_block_ht = 69 _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 70 ctx->sel_cond_conversions = 71 _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 72 73 /* TODO: maybe generate some sort of bitmask of what key 74 * lowers vs what shader has (ie. no need to lower 75 * texture clamp lowering if no texture sample instrs).. 76 * although should be done further up the stack to avoid 77 * creating duplicate variants.. 78 */ 79 80 ctx->s = nir_shader_clone(ctx, shader->nir); 81 ir3_nir_lower_variant(so, ctx->s); 82 83 /* this needs to be the last pass run, so do this here instead of 84 * in ir3_optimize_nir(): 85 */ 86 bool progress = false; 87 NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs); 88 89 /* we could need cleanup after lower_locals_to_regs */ 90 while (progress) { 91 progress = false; 92 NIR_PASS(progress, ctx->s, nir_opt_algebraic); 93 NIR_PASS(progress, ctx->s, nir_opt_constant_folding); 94 } 95 96 /* We want to lower nir_op_imul as late as possible, to catch also 97 * those generated by earlier passes (e.g, nir_lower_locals_to_regs). 98 * However, we want a final swing of a few passes to have a chance 99 * at optimizing the result. 100 */ 101 progress = false; 102 NIR_PASS(progress, ctx->s, ir3_nir_lower_imul); 103 while (progress) { 104 progress = false; 105 NIR_PASS(progress, ctx->s, nir_opt_algebraic); 106 NIR_PASS(progress, ctx->s, nir_opt_copy_prop_vars); 107 NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars); 108 NIR_PASS(progress, ctx->s, nir_opt_dce); 109 NIR_PASS(progress, ctx->s, nir_opt_constant_folding); 110 } 111 112 /* Enable the texture pre-fetch feature only a4xx onwards. But 113 * only enable it on generations that have been tested: 114 */ 115 if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gen >= 6)) 116 NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch); 117 118 NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true); 119 120 /* Super crude heuristic to limit # of tex prefetch in small 121 * shaders. This completely ignores loops.. but that's really 122 * not the worst of it's problems. (A frag shader that has 123 * loops is probably going to be big enough to not trigger a 124 * lower threshold.) 125 * 126 * 1) probably want to do this in terms of ir3 instructions 127 * 2) probably really want to decide this after scheduling 128 * (or at least pre-RA sched) so we have a rough idea about 129 * nops, and don't count things that get cp'd away 130 * 3) blob seems to use higher thresholds with a mix of more 131 * SFU instructions. Which partly makes sense, more SFU 132 * instructions probably means you want to get the real 133 * shader started sooner, but that considers where in the 134 * shader the SFU instructions are, which blob doesn't seem 135 * to do. 136 * 137 * This uses more conservative thresholds assuming a more alu 138 * than sfu heavy instruction mix. 139 */ 140 if (so->type == MESA_SHADER_FRAGMENT) { 141 nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s); 142 143 unsigned instruction_count = 0; 144 nir_foreach_block (block, fxn) { 145 instruction_count += exec_list_length(&block->instr_list); 146 } 147 148 if (instruction_count < 50) { 149 ctx->prefetch_limit = 2; 150 } else if (instruction_count < 70) { 151 ctx->prefetch_limit = 3; 152 } else { 153 ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH; 154 } 155 } 156 157 if (shader_debug_enabled(so->type)) { 158 mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so), 159 so->name); 160 nir_log_shaderi(ctx->s); 161 } 162 163 ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); 164 165 return ctx; 166} 167 168void 169ir3_context_free(struct ir3_context *ctx) 170{ 171 ralloc_free(ctx); 172} 173 174/* 175 * Misc helpers 176 */ 177 178/* allocate a n element value array (to be populated by caller) and 179 * insert in def_ht 180 */ 181struct ir3_instruction ** 182ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n) 183{ 184 struct ir3_instruction **value = 185 ralloc_array(ctx->def_ht, struct ir3_instruction *, n); 186 _mesa_hash_table_insert(ctx->def_ht, dst, value); 187 return value; 188} 189 190struct ir3_instruction ** 191ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) 192{ 193 struct ir3_instruction **value; 194 195 if (dst->is_ssa) { 196 value = ir3_get_dst_ssa(ctx, &dst->ssa, n); 197 } else { 198 value = ralloc_array(ctx, struct ir3_instruction *, n); 199 } 200 201 /* NOTE: in non-ssa case, we don't really need to store last_dst 202 * but this helps us catch cases where put_dst() call is forgotten 203 */ 204 compile_assert(ctx, !ctx->last_dst); 205 ctx->last_dst = value; 206 ctx->last_dst_n = n; 207 208 return value; 209} 210 211struct ir3_instruction *const * 212ir3_get_src(struct ir3_context *ctx, nir_src *src) 213{ 214 if (src->is_ssa) { 215 struct hash_entry *entry; 216 entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); 217 compile_assert(ctx, entry); 218 return entry->data; 219 } else { 220 nir_register *reg = src->reg.reg; 221 struct ir3_array *arr = ir3_get_array(ctx, reg); 222 unsigned num_components = arr->r->num_components; 223 struct ir3_instruction *addr = NULL; 224 struct ir3_instruction **value = 225 ralloc_array(ctx, struct ir3_instruction *, num_components); 226 227 if (src->reg.indirect) 228 addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0], 229 reg->num_components); 230 231 for (unsigned i = 0; i < num_components; i++) { 232 unsigned n = src->reg.base_offset * reg->num_components + i; 233 compile_assert(ctx, n < arr->length); 234 value[i] = ir3_create_array_load(ctx, arr, n, addr); 235 } 236 237 return value; 238 } 239} 240 241void 242ir3_put_dst(struct ir3_context *ctx, nir_dest *dst) 243{ 244 unsigned bit_size = ir3_bitsize(ctx, nir_dest_bit_size(*dst)); 245 246 /* add extra mov if dst value is shared reg.. in some cases not all 247 * instructions can read from shared regs, in cases where they can 248 * ir3_cp will clean up the extra mov: 249 */ 250 for (unsigned i = 0; i < ctx->last_dst_n; i++) { 251 if (!ctx->last_dst[i]) 252 continue; 253 if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) { 254 ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32); 255 } 256 } 257 258 if (bit_size <= 16) { 259 for (unsigned i = 0; i < ctx->last_dst_n; i++) { 260 struct ir3_instruction *dst = ctx->last_dst[i]; 261 ir3_set_dst_type(dst, true); 262 ir3_fixup_src_type(dst); 263 if (dst->opc == OPC_META_SPLIT) { 264 ir3_set_dst_type(ssa(dst->srcs[0]), true); 265 ir3_fixup_src_type(ssa(dst->srcs[0])); 266 dst->srcs[0]->flags |= IR3_REG_HALF; 267 } 268 } 269 } 270 271 if (!dst->is_ssa) { 272 nir_register *reg = dst->reg.reg; 273 struct ir3_array *arr = ir3_get_array(ctx, reg); 274 unsigned num_components = ctx->last_dst_n; 275 struct ir3_instruction *addr = NULL; 276 277 if (dst->reg.indirect) 278 addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0], 279 reg->num_components); 280 281 for (unsigned i = 0; i < num_components; i++) { 282 unsigned n = dst->reg.base_offset * reg->num_components + i; 283 compile_assert(ctx, n < arr->length); 284 if (!ctx->last_dst[i]) 285 continue; 286 ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr); 287 } 288 289 ralloc_free(ctx->last_dst); 290 } 291 292 ctx->last_dst = NULL; 293 ctx->last_dst_n = 0; 294} 295 296static unsigned 297dest_flags(struct ir3_instruction *instr) 298{ 299 return instr->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED); 300} 301 302struct ir3_instruction * 303ir3_create_collect(struct ir3_block *block, struct ir3_instruction *const *arr, 304 unsigned arrsz) 305{ 306 struct ir3_instruction *collect; 307 308 if (arrsz == 0) 309 return NULL; 310 311 unsigned flags = dest_flags(arr[0]); 312 313 collect = ir3_instr_create(block, OPC_META_COLLECT, 1, arrsz); 314 __ssa_dst(collect)->flags |= flags; 315 for (unsigned i = 0; i < arrsz; i++) { 316 struct ir3_instruction *elem = arr[i]; 317 318 /* Since arrays are pre-colored in RA, we can't assume that 319 * things will end up in the right place. (Ie. if a collect 320 * joins elements from two different arrays.) So insert an 321 * extra mov. 322 * 323 * We could possibly skip this if all the collected elements 324 * are contiguous elements in a single array.. not sure how 325 * likely that is to happen. 326 * 327 * Fixes a problem with glamor shaders, that in effect do 328 * something like: 329 * 330 * if (foo) 331 * texcoord = .. 332 * else 333 * texcoord = .. 334 * color = texture2D(tex, texcoord); 335 * 336 * In this case, texcoord will end up as nir registers (which 337 * translate to ir3 array's of length 1. And we can't assume 338 * the two (or more) arrays will get allocated in consecutive 339 * scalar registers. 340 * 341 */ 342 if (elem->dsts[0]->flags & IR3_REG_ARRAY) { 343 type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 344 elem = ir3_MOV(block, elem, type); 345 } 346 347 assert(dest_flags(elem) == flags); 348 __ssa_src(collect, elem, flags); 349 } 350 351 collect->dsts[0]->wrmask = MASK(arrsz); 352 353 return collect; 354} 355 356/* helper for instructions that produce multiple consecutive scalar 357 * outputs which need to have a split meta instruction inserted 358 */ 359void 360ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, 361 struct ir3_instruction *src, unsigned base, unsigned n) 362{ 363 if ((n == 1) && (src->dsts[0]->wrmask == 0x1) && 364 /* setup_input needs ir3_split_dest to generate a SPLIT instruction */ 365 src->opc != OPC_META_INPUT) { 366 dst[0] = src; 367 return; 368 } 369 370 if (src->opc == OPC_META_COLLECT) { 371 assert((base + n) <= src->srcs_count); 372 373 for (int i = 0; i < n; i++) { 374 dst[i] = ssa(src->srcs[i + base]); 375 } 376 377 return; 378 } 379 380 unsigned flags = dest_flags(src); 381 382 for (int i = 0, j = 0; i < n; i++) { 383 struct ir3_instruction *split = 384 ir3_instr_create(block, OPC_META_SPLIT, 1, 1); 385 __ssa_dst(split)->flags |= flags; 386 __ssa_src(split, src, flags); 387 split->split.off = i + base; 388 389 if (src->dsts[0]->wrmask & (1 << (i + base))) 390 dst[j++] = split; 391 } 392} 393 394NORETURN void 395ir3_context_error(struct ir3_context *ctx, const char *format, ...) 396{ 397 struct hash_table *errors = NULL; 398 va_list ap; 399 va_start(ap, format); 400 if (ctx->cur_instr) { 401 errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 402 _mesa_key_pointer_equal); 403 char *msg = ralloc_vasprintf(errors, format, ap); 404 _mesa_hash_table_insert(errors, ctx->cur_instr, msg); 405 } else { 406 mesa_loge_v(format, ap); 407 } 408 va_end(ap); 409 nir_log_shader_annotated(ctx->s, errors); 410 ralloc_free(errors); 411 ctx->error = true; 412 unreachable(""); 413} 414 415static struct ir3_instruction * 416create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align) 417{ 418 struct ir3_instruction *instr, *immed; 419 420 instr = ir3_COV(block, src, TYPE_U32, TYPE_S16); 421 422 switch (align) { 423 case 1: 424 /* src *= 1: */ 425 break; 426 case 2: 427 /* src *= 2 => src <<= 1: */ 428 immed = create_immed_typed(block, 1, TYPE_S16); 429 instr = ir3_SHL_B(block, instr, 0, immed, 0); 430 break; 431 case 3: 432 /* src *= 3: */ 433 immed = create_immed_typed(block, 3, TYPE_S16); 434 instr = ir3_MULL_U(block, instr, 0, immed, 0); 435 break; 436 case 4: 437 /* src *= 4 => src <<= 2: */ 438 immed = create_immed_typed(block, 2, TYPE_S16); 439 instr = ir3_SHL_B(block, instr, 0, immed, 0); 440 break; 441 default: 442 unreachable("bad align"); 443 return NULL; 444 } 445 446 instr->dsts[0]->flags |= IR3_REG_HALF; 447 448 instr = ir3_MOV(block, instr, TYPE_S16); 449 instr->dsts[0]->num = regid(REG_A0, 0); 450 451 return instr; 452} 453 454static struct ir3_instruction * 455create_addr1(struct ir3_block *block, unsigned const_val) 456{ 457 struct ir3_instruction *immed = 458 create_immed_typed(block, const_val, TYPE_U16); 459 struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_U16); 460 instr->dsts[0]->num = regid(REG_A0, 1); 461 return instr; 462} 463 464/* caches addr values to avoid generating multiple cov/shl/mova 465 * sequences for each use of a given NIR level src as address 466 */ 467struct ir3_instruction * 468ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align) 469{ 470 struct ir3_instruction *addr; 471 unsigned idx = align - 1; 472 473 compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht)); 474 475 if (!ctx->addr0_ht[idx]) { 476 ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer, 477 _mesa_key_pointer_equal); 478 } else { 479 struct hash_entry *entry; 480 entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src); 481 if (entry) 482 return entry->data; 483 } 484 485 addr = create_addr0(ctx->block, src, align); 486 _mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr); 487 488 return addr; 489} 490 491/* Similar to ir3_get_addr0, but for a1.x. */ 492struct ir3_instruction * 493ir3_get_addr1(struct ir3_context *ctx, unsigned const_val) 494{ 495 struct ir3_instruction *addr; 496 497 if (!ctx->addr1_ht) { 498 ctx->addr1_ht = _mesa_hash_table_u64_create(ctx); 499 } else { 500 addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val); 501 if (addr) 502 return addr; 503 } 504 505 addr = create_addr1(ctx->block, const_val); 506 _mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr); 507 508 return addr; 509} 510 511struct ir3_instruction * 512ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) 513{ 514 struct ir3_block *b = ctx->block; 515 struct ir3_instruction *cond; 516 517 /* NOTE: only cmps.*.* can write p0.x: */ 518 struct ir3_instruction *zero = 519 create_immed_typed(b, 0, is_half(src) ? TYPE_U16 : TYPE_U32); 520 cond = ir3_CMPS_S(b, src, 0, zero, 0); 521 cond->cat2.condition = IR3_COND_NE; 522 523 /* condition always goes in predicate register: */ 524 cond->dsts[0]->num = regid(REG_P0, 0); 525 cond->dsts[0]->flags &= ~IR3_REG_SSA; 526 527 return cond; 528} 529 530/* 531 * Array helpers 532 */ 533 534void 535ir3_declare_array(struct ir3_context *ctx, nir_register *reg) 536{ 537 struct ir3_array *arr = rzalloc(ctx, struct ir3_array); 538 arr->id = ++ctx->num_arrays; 539 /* NOTE: sometimes we get non array regs, for example for arrays of 540 * length 1. See fs-const-array-of-struct-of-array.shader_test. So 541 * treat a non-array as if it was an array of length 1. 542 * 543 * It would be nice if there was a nir pass to convert arrays of 544 * length 1 to ssa. 545 */ 546 arr->length = reg->num_components * MAX2(1, reg->num_array_elems); 547 compile_assert(ctx, arr->length > 0); 548 arr->r = reg; 549 arr->half = ir3_bitsize(ctx, reg->bit_size) <= 16; 550 list_addtail(&arr->node, &ctx->ir->array_list); 551} 552 553struct ir3_array * 554ir3_get_array(struct ir3_context *ctx, nir_register *reg) 555{ 556 foreach_array (arr, &ctx->ir->array_list) { 557 if (arr->r == reg) 558 return arr; 559 } 560 ir3_context_error(ctx, "bogus reg: r%d\n", reg->index); 561 return NULL; 562} 563 564/* relative (indirect) if address!=NULL */ 565struct ir3_instruction * 566ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, 567 struct ir3_instruction *address) 568{ 569 struct ir3_block *block = ctx->block; 570 struct ir3_instruction *mov; 571 struct ir3_register *src; 572 unsigned flags = 0; 573 574 mov = ir3_instr_create(block, OPC_MOV, 1, 1); 575 if (arr->half) { 576 mov->cat1.src_type = TYPE_U16; 577 mov->cat1.dst_type = TYPE_U16; 578 flags |= IR3_REG_HALF; 579 } else { 580 mov->cat1.src_type = TYPE_U32; 581 mov->cat1.dst_type = TYPE_U32; 582 } 583 584 mov->barrier_class = IR3_BARRIER_ARRAY_R; 585 mov->barrier_conflict = IR3_BARRIER_ARRAY_W; 586 __ssa_dst(mov)->flags |= flags; 587 src = ir3_src_create(mov, 0, 588 IR3_REG_ARRAY | COND(address, IR3_REG_RELATIV) | flags); 589 src->def = (arr->last_write && arr->last_write->instr->block == block) 590 ? arr->last_write 591 : NULL; 592 src->size = arr->length; 593 src->array.id = arr->id; 594 src->array.offset = n; 595 src->array.base = INVALID_REG; 596 597 if (address) 598 ir3_instr_set_address(mov, address); 599 600 return mov; 601} 602 603/* relative (indirect) if address!=NULL */ 604void 605ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, 606 struct ir3_instruction *src, 607 struct ir3_instruction *address) 608{ 609 struct ir3_block *block = ctx->block; 610 struct ir3_instruction *mov; 611 struct ir3_register *dst; 612 unsigned flags = 0; 613 614 /* if not relative store, don't create an extra mov, since that 615 * ends up being difficult for cp to remove. 616 * 617 * Also, don't skip the mov if the src is meta (like fanout/split), 618 * since that creates a situation that RA can't really handle properly. 619 */ 620 if (!address && !is_meta(src)) { 621 dst = src->dsts[0]; 622 623 src->barrier_class |= IR3_BARRIER_ARRAY_W; 624 src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 625 626 dst->flags |= IR3_REG_ARRAY; 627 dst->size = arr->length; 628 dst->array.id = arr->id; 629 dst->array.offset = n; 630 dst->array.base = INVALID_REG; 631 632 if (arr->last_write && arr->last_write->instr->block == src->block) 633 ir3_reg_set_last_array(src, dst, arr->last_write); 634 635 arr->last_write = dst; 636 637 array_insert(block, block->keeps, src); 638 639 return; 640 } 641 642 mov = ir3_instr_create(block, OPC_MOV, 1, 1); 643 if (arr->half) { 644 mov->cat1.src_type = TYPE_U16; 645 mov->cat1.dst_type = TYPE_U16; 646 flags |= IR3_REG_HALF; 647 } else { 648 mov->cat1.src_type = TYPE_U32; 649 mov->cat1.dst_type = TYPE_U32; 650 } 651 mov->barrier_class = IR3_BARRIER_ARRAY_W; 652 mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 653 dst = ir3_dst_create( 654 mov, 0, 655 IR3_REG_SSA | IR3_REG_ARRAY | flags | COND(address, IR3_REG_RELATIV)); 656 dst->instr = mov; 657 dst->size = arr->length; 658 dst->array.id = arr->id; 659 dst->array.offset = n; 660 dst->array.base = INVALID_REG; 661 ir3_src_create(mov, 0, IR3_REG_SSA | flags)->def = src->dsts[0]; 662 663 if (arr->last_write && arr->last_write->instr->block == block) 664 ir3_reg_set_last_array(mov, dst, arr->last_write); 665 666 if (address) 667 ir3_instr_set_address(mov, address); 668 669 arr->last_write = dst; 670 671 /* the array store may only matter to something in an earlier 672 * block (ie. loops), but since arrays are not in SSA, depth 673 * pass won't know this.. so keep all array stores: 674 */ 675 array_insert(block, block->keeps, mov); 676} 677