1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2015-2018 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "ir3_context.h" 28bf215546Sopenharmony_ci#include "ir3_compiler.h" 29bf215546Sopenharmony_ci#include "ir3_image.h" 30bf215546Sopenharmony_ci#include "ir3_nir.h" 31bf215546Sopenharmony_ci#include "ir3_shader.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cistruct ir3_context * 34bf215546Sopenharmony_ciir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, 35bf215546Sopenharmony_ci struct ir3_shader_variant *so) 36bf215546Sopenharmony_ci{ 37bf215546Sopenharmony_ci struct ir3_context *ctx = rzalloc(NULL, struct ir3_context); 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci if (compiler->gen == 4) { 40bf215546Sopenharmony_ci if (so->type == MESA_SHADER_VERTEX) { 41bf215546Sopenharmony_ci ctx->astc_srgb = so->key.vastc_srgb; 42bf215546Sopenharmony_ci memcpy(ctx->sampler_swizzles, so->key.vsampler_swizzles, sizeof(ctx->sampler_swizzles)); 43bf215546Sopenharmony_ci } else if (so->type == MESA_SHADER_FRAGMENT || 44bf215546Sopenharmony_ci so->type == MESA_SHADER_COMPUTE) { 45bf215546Sopenharmony_ci ctx->astc_srgb = so->key.fastc_srgb; 46bf215546Sopenharmony_ci memcpy(ctx->sampler_swizzles, so->key.fsampler_swizzles, sizeof(ctx->sampler_swizzles)); 47bf215546Sopenharmony_ci } 48bf215546Sopenharmony_ci } else if (compiler->gen == 3) { 49bf215546Sopenharmony_ci if (so->type == MESA_SHADER_VERTEX) { 50bf215546Sopenharmony_ci ctx->samples = so->key.vsamples; 51bf215546Sopenharmony_ci } else if (so->type == MESA_SHADER_FRAGMENT) { 52bf215546Sopenharmony_ci ctx->samples = so->key.fsamples; 53bf215546Sopenharmony_ci } 54bf215546Sopenharmony_ci } 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci if (compiler->gen >= 6) { 57bf215546Sopenharmony_ci ctx->funcs = &ir3_a6xx_funcs; 58bf215546Sopenharmony_ci } else if (compiler->gen >= 4) { 59bf215546Sopenharmony_ci ctx->funcs = &ir3_a4xx_funcs; 60bf215546Sopenharmony_ci } 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci ctx->compiler = compiler; 63bf215546Sopenharmony_ci ctx->so = so; 64bf215546Sopenharmony_ci ctx->def_ht = 65bf215546Sopenharmony_ci _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 66bf215546Sopenharmony_ci ctx->block_ht = 67bf215546Sopenharmony_ci _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 68bf215546Sopenharmony_ci ctx->continue_block_ht = 69bf215546Sopenharmony_ci _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 70bf215546Sopenharmony_ci ctx->sel_cond_conversions = 71bf215546Sopenharmony_ci _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci /* TODO: maybe generate some sort of bitmask of what key 74bf215546Sopenharmony_ci * lowers vs what shader has (ie. no need to lower 75bf215546Sopenharmony_ci * texture clamp lowering if no texture sample instrs).. 76bf215546Sopenharmony_ci * although should be done further up the stack to avoid 77bf215546Sopenharmony_ci * creating duplicate variants.. 78bf215546Sopenharmony_ci */ 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci ctx->s = nir_shader_clone(ctx, shader->nir); 81bf215546Sopenharmony_ci ir3_nir_lower_variant(so, ctx->s); 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci /* this needs to be the last pass run, so do this here instead of 84bf215546Sopenharmony_ci * in ir3_optimize_nir(): 85bf215546Sopenharmony_ci */ 86bf215546Sopenharmony_ci bool progress = false; 87bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci /* we could need cleanup after lower_locals_to_regs */ 90bf215546Sopenharmony_ci while (progress) { 91bf215546Sopenharmony_ci progress = false; 92bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_opt_algebraic); 93bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_opt_constant_folding); 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci /* We want to lower nir_op_imul as late as possible, to catch also 97bf215546Sopenharmony_ci * those generated by earlier passes (e.g, nir_lower_locals_to_regs). 98bf215546Sopenharmony_ci * However, we want a final swing of a few passes to have a chance 99bf215546Sopenharmony_ci * at optimizing the result. 100bf215546Sopenharmony_ci */ 101bf215546Sopenharmony_ci progress = false; 102bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, ir3_nir_lower_imul); 103bf215546Sopenharmony_ci while (progress) { 104bf215546Sopenharmony_ci progress = false; 105bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_opt_algebraic); 106bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_opt_copy_prop_vars); 107bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars); 108bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_opt_dce); 109bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_opt_constant_folding); 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci /* Enable the texture pre-fetch feature only a4xx onwards. But 113bf215546Sopenharmony_ci * only enable it on generations that have been tested: 114bf215546Sopenharmony_ci */ 115bf215546Sopenharmony_ci if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gen >= 6)) 116bf215546Sopenharmony_ci NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true); 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci /* Super crude heuristic to limit # of tex prefetch in small 121bf215546Sopenharmony_ci * shaders. This completely ignores loops.. but that's really 122bf215546Sopenharmony_ci * not the worst of it's problems. (A frag shader that has 123bf215546Sopenharmony_ci * loops is probably going to be big enough to not trigger a 124bf215546Sopenharmony_ci * lower threshold.) 125bf215546Sopenharmony_ci * 126bf215546Sopenharmony_ci * 1) probably want to do this in terms of ir3 instructions 127bf215546Sopenharmony_ci * 2) probably really want to decide this after scheduling 128bf215546Sopenharmony_ci * (or at least pre-RA sched) so we have a rough idea about 129bf215546Sopenharmony_ci * nops, and don't count things that get cp'd away 130bf215546Sopenharmony_ci * 3) blob seems to use higher thresholds with a mix of more 131bf215546Sopenharmony_ci * SFU instructions. Which partly makes sense, more SFU 132bf215546Sopenharmony_ci * instructions probably means you want to get the real 133bf215546Sopenharmony_ci * shader started sooner, but that considers where in the 134bf215546Sopenharmony_ci * shader the SFU instructions are, which blob doesn't seem 135bf215546Sopenharmony_ci * to do. 136bf215546Sopenharmony_ci * 137bf215546Sopenharmony_ci * This uses more conservative thresholds assuming a more alu 138bf215546Sopenharmony_ci * than sfu heavy instruction mix. 139bf215546Sopenharmony_ci */ 140bf215546Sopenharmony_ci if (so->type == MESA_SHADER_FRAGMENT) { 141bf215546Sopenharmony_ci nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci unsigned instruction_count = 0; 144bf215546Sopenharmony_ci nir_foreach_block (block, fxn) { 145bf215546Sopenharmony_ci instruction_count += exec_list_length(&block->instr_list); 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if (instruction_count < 50) { 149bf215546Sopenharmony_ci ctx->prefetch_limit = 2; 150bf215546Sopenharmony_ci } else if (instruction_count < 70) { 151bf215546Sopenharmony_ci ctx->prefetch_limit = 3; 152bf215546Sopenharmony_ci } else { 153bf215546Sopenharmony_ci ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH; 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci } 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci if (shader_debug_enabled(so->type)) { 158bf215546Sopenharmony_ci mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so), 159bf215546Sopenharmony_ci so->name); 160bf215546Sopenharmony_ci nir_log_shaderi(ctx->s); 161bf215546Sopenharmony_ci } 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci return ctx; 166bf215546Sopenharmony_ci} 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_civoid 169bf215546Sopenharmony_ciir3_context_free(struct ir3_context *ctx) 170bf215546Sopenharmony_ci{ 171bf215546Sopenharmony_ci ralloc_free(ctx); 172bf215546Sopenharmony_ci} 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci/* 175bf215546Sopenharmony_ci * Misc helpers 176bf215546Sopenharmony_ci */ 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci/* allocate a n element value array (to be populated by caller) and 179bf215546Sopenharmony_ci * insert in def_ht 180bf215546Sopenharmony_ci */ 181bf215546Sopenharmony_cistruct ir3_instruction ** 182bf215546Sopenharmony_ciir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n) 183bf215546Sopenharmony_ci{ 184bf215546Sopenharmony_ci struct ir3_instruction **value = 185bf215546Sopenharmony_ci ralloc_array(ctx->def_ht, struct ir3_instruction *, n); 186bf215546Sopenharmony_ci _mesa_hash_table_insert(ctx->def_ht, dst, value); 187bf215546Sopenharmony_ci return value; 188bf215546Sopenharmony_ci} 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_cistruct ir3_instruction ** 191bf215546Sopenharmony_ciir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) 192bf215546Sopenharmony_ci{ 193bf215546Sopenharmony_ci struct ir3_instruction **value; 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci if (dst->is_ssa) { 196bf215546Sopenharmony_ci value = ir3_get_dst_ssa(ctx, &dst->ssa, n); 197bf215546Sopenharmony_ci } else { 198bf215546Sopenharmony_ci value = ralloc_array(ctx, struct ir3_instruction *, n); 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci /* NOTE: in non-ssa case, we don't really need to store last_dst 202bf215546Sopenharmony_ci * but this helps us catch cases where put_dst() call is forgotten 203bf215546Sopenharmony_ci */ 204bf215546Sopenharmony_ci compile_assert(ctx, !ctx->last_dst); 205bf215546Sopenharmony_ci ctx->last_dst = value; 206bf215546Sopenharmony_ci ctx->last_dst_n = n; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci return value; 209bf215546Sopenharmony_ci} 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_cistruct ir3_instruction *const * 212bf215546Sopenharmony_ciir3_get_src(struct ir3_context *ctx, nir_src *src) 213bf215546Sopenharmony_ci{ 214bf215546Sopenharmony_ci if (src->is_ssa) { 215bf215546Sopenharmony_ci struct hash_entry *entry; 216bf215546Sopenharmony_ci entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); 217bf215546Sopenharmony_ci compile_assert(ctx, entry); 218bf215546Sopenharmony_ci return entry->data; 219bf215546Sopenharmony_ci } else { 220bf215546Sopenharmony_ci nir_register *reg = src->reg.reg; 221bf215546Sopenharmony_ci struct ir3_array *arr = ir3_get_array(ctx, reg); 222bf215546Sopenharmony_ci unsigned num_components = arr->r->num_components; 223bf215546Sopenharmony_ci struct ir3_instruction *addr = NULL; 224bf215546Sopenharmony_ci struct ir3_instruction **value = 225bf215546Sopenharmony_ci ralloc_array(ctx, struct ir3_instruction *, num_components); 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci if (src->reg.indirect) 228bf215546Sopenharmony_ci addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0], 229bf215546Sopenharmony_ci reg->num_components); 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i++) { 232bf215546Sopenharmony_ci unsigned n = src->reg.base_offset * reg->num_components + i; 233bf215546Sopenharmony_ci compile_assert(ctx, n < arr->length); 234bf215546Sopenharmony_ci value[i] = ir3_create_array_load(ctx, arr, n, addr); 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci return value; 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci} 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_civoid 242bf215546Sopenharmony_ciir3_put_dst(struct ir3_context *ctx, nir_dest *dst) 243bf215546Sopenharmony_ci{ 244bf215546Sopenharmony_ci unsigned bit_size = ir3_bitsize(ctx, nir_dest_bit_size(*dst)); 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci /* add extra mov if dst value is shared reg.. in some cases not all 247bf215546Sopenharmony_ci * instructions can read from shared regs, in cases where they can 248bf215546Sopenharmony_ci * ir3_cp will clean up the extra mov: 249bf215546Sopenharmony_ci */ 250bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->last_dst_n; i++) { 251bf215546Sopenharmony_ci if (!ctx->last_dst[i]) 252bf215546Sopenharmony_ci continue; 253bf215546Sopenharmony_ci if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) { 254bf215546Sopenharmony_ci ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32); 255bf215546Sopenharmony_ci } 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci if (bit_size <= 16) { 259bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->last_dst_n; i++) { 260bf215546Sopenharmony_ci struct ir3_instruction *dst = ctx->last_dst[i]; 261bf215546Sopenharmony_ci ir3_set_dst_type(dst, true); 262bf215546Sopenharmony_ci ir3_fixup_src_type(dst); 263bf215546Sopenharmony_ci if (dst->opc == OPC_META_SPLIT) { 264bf215546Sopenharmony_ci ir3_set_dst_type(ssa(dst->srcs[0]), true); 265bf215546Sopenharmony_ci ir3_fixup_src_type(ssa(dst->srcs[0])); 266bf215546Sopenharmony_ci dst->srcs[0]->flags |= IR3_REG_HALF; 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci } 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci if (!dst->is_ssa) { 272bf215546Sopenharmony_ci nir_register *reg = dst->reg.reg; 273bf215546Sopenharmony_ci struct ir3_array *arr = ir3_get_array(ctx, reg); 274bf215546Sopenharmony_ci unsigned num_components = ctx->last_dst_n; 275bf215546Sopenharmony_ci struct ir3_instruction *addr = NULL; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci if (dst->reg.indirect) 278bf215546Sopenharmony_ci addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0], 279bf215546Sopenharmony_ci reg->num_components); 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i++) { 282bf215546Sopenharmony_ci unsigned n = dst->reg.base_offset * reg->num_components + i; 283bf215546Sopenharmony_ci compile_assert(ctx, n < arr->length); 284bf215546Sopenharmony_ci if (!ctx->last_dst[i]) 285bf215546Sopenharmony_ci continue; 286bf215546Sopenharmony_ci ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr); 287bf215546Sopenharmony_ci } 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci ralloc_free(ctx->last_dst); 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci ctx->last_dst = NULL; 293bf215546Sopenharmony_ci ctx->last_dst_n = 0; 294bf215546Sopenharmony_ci} 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_cistatic unsigned 297bf215546Sopenharmony_cidest_flags(struct ir3_instruction *instr) 298bf215546Sopenharmony_ci{ 299bf215546Sopenharmony_ci return instr->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED); 300bf215546Sopenharmony_ci} 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_cistruct ir3_instruction * 303bf215546Sopenharmony_ciir3_create_collect(struct ir3_block *block, struct ir3_instruction *const *arr, 304bf215546Sopenharmony_ci unsigned arrsz) 305bf215546Sopenharmony_ci{ 306bf215546Sopenharmony_ci struct ir3_instruction *collect; 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci if (arrsz == 0) 309bf215546Sopenharmony_ci return NULL; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci unsigned flags = dest_flags(arr[0]); 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci collect = ir3_instr_create(block, OPC_META_COLLECT, 1, arrsz); 314bf215546Sopenharmony_ci __ssa_dst(collect)->flags |= flags; 315bf215546Sopenharmony_ci for (unsigned i = 0; i < arrsz; i++) { 316bf215546Sopenharmony_ci struct ir3_instruction *elem = arr[i]; 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci /* Since arrays are pre-colored in RA, we can't assume that 319bf215546Sopenharmony_ci * things will end up in the right place. (Ie. if a collect 320bf215546Sopenharmony_ci * joins elements from two different arrays.) So insert an 321bf215546Sopenharmony_ci * extra mov. 322bf215546Sopenharmony_ci * 323bf215546Sopenharmony_ci * We could possibly skip this if all the collected elements 324bf215546Sopenharmony_ci * are contiguous elements in a single array.. not sure how 325bf215546Sopenharmony_ci * likely that is to happen. 326bf215546Sopenharmony_ci * 327bf215546Sopenharmony_ci * Fixes a problem with glamor shaders, that in effect do 328bf215546Sopenharmony_ci * something like: 329bf215546Sopenharmony_ci * 330bf215546Sopenharmony_ci * if (foo) 331bf215546Sopenharmony_ci * texcoord = .. 332bf215546Sopenharmony_ci * else 333bf215546Sopenharmony_ci * texcoord = .. 334bf215546Sopenharmony_ci * color = texture2D(tex, texcoord); 335bf215546Sopenharmony_ci * 336bf215546Sopenharmony_ci * In this case, texcoord will end up as nir registers (which 337bf215546Sopenharmony_ci * translate to ir3 array's of length 1. And we can't assume 338bf215546Sopenharmony_ci * the two (or more) arrays will get allocated in consecutive 339bf215546Sopenharmony_ci * scalar registers. 340bf215546Sopenharmony_ci * 341bf215546Sopenharmony_ci */ 342bf215546Sopenharmony_ci if (elem->dsts[0]->flags & IR3_REG_ARRAY) { 343bf215546Sopenharmony_ci type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; 344bf215546Sopenharmony_ci elem = ir3_MOV(block, elem, type); 345bf215546Sopenharmony_ci } 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci assert(dest_flags(elem) == flags); 348bf215546Sopenharmony_ci __ssa_src(collect, elem, flags); 349bf215546Sopenharmony_ci } 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci collect->dsts[0]->wrmask = MASK(arrsz); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci return collect; 354bf215546Sopenharmony_ci} 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci/* helper for instructions that produce multiple consecutive scalar 357bf215546Sopenharmony_ci * outputs which need to have a split meta instruction inserted 358bf215546Sopenharmony_ci */ 359bf215546Sopenharmony_civoid 360bf215546Sopenharmony_ciir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, 361bf215546Sopenharmony_ci struct ir3_instruction *src, unsigned base, unsigned n) 362bf215546Sopenharmony_ci{ 363bf215546Sopenharmony_ci if ((n == 1) && (src->dsts[0]->wrmask == 0x1) && 364bf215546Sopenharmony_ci /* setup_input needs ir3_split_dest to generate a SPLIT instruction */ 365bf215546Sopenharmony_ci src->opc != OPC_META_INPUT) { 366bf215546Sopenharmony_ci dst[0] = src; 367bf215546Sopenharmony_ci return; 368bf215546Sopenharmony_ci } 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci if (src->opc == OPC_META_COLLECT) { 371bf215546Sopenharmony_ci assert((base + n) <= src->srcs_count); 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci for (int i = 0; i < n; i++) { 374bf215546Sopenharmony_ci dst[i] = ssa(src->srcs[i + base]); 375bf215546Sopenharmony_ci } 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci return; 378bf215546Sopenharmony_ci } 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci unsigned flags = dest_flags(src); 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci for (int i = 0, j = 0; i < n; i++) { 383bf215546Sopenharmony_ci struct ir3_instruction *split = 384bf215546Sopenharmony_ci ir3_instr_create(block, OPC_META_SPLIT, 1, 1); 385bf215546Sopenharmony_ci __ssa_dst(split)->flags |= flags; 386bf215546Sopenharmony_ci __ssa_src(split, src, flags); 387bf215546Sopenharmony_ci split->split.off = i + base; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci if (src->dsts[0]->wrmask & (1 << (i + base))) 390bf215546Sopenharmony_ci dst[j++] = split; 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci} 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ciNORETURN void 395bf215546Sopenharmony_ciir3_context_error(struct ir3_context *ctx, const char *format, ...) 396bf215546Sopenharmony_ci{ 397bf215546Sopenharmony_ci struct hash_table *errors = NULL; 398bf215546Sopenharmony_ci va_list ap; 399bf215546Sopenharmony_ci va_start(ap, format); 400bf215546Sopenharmony_ci if (ctx->cur_instr) { 401bf215546Sopenharmony_ci errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 402bf215546Sopenharmony_ci _mesa_key_pointer_equal); 403bf215546Sopenharmony_ci char *msg = ralloc_vasprintf(errors, format, ap); 404bf215546Sopenharmony_ci _mesa_hash_table_insert(errors, ctx->cur_instr, msg); 405bf215546Sopenharmony_ci } else { 406bf215546Sopenharmony_ci mesa_loge_v(format, ap); 407bf215546Sopenharmony_ci } 408bf215546Sopenharmony_ci va_end(ap); 409bf215546Sopenharmony_ci nir_log_shader_annotated(ctx->s, errors); 410bf215546Sopenharmony_ci ralloc_free(errors); 411bf215546Sopenharmony_ci ctx->error = true; 412bf215546Sopenharmony_ci unreachable(""); 413bf215546Sopenharmony_ci} 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_cistatic struct ir3_instruction * 416bf215546Sopenharmony_cicreate_addr0(struct ir3_block *block, struct ir3_instruction *src, int align) 417bf215546Sopenharmony_ci{ 418bf215546Sopenharmony_ci struct ir3_instruction *instr, *immed; 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci instr = ir3_COV(block, src, TYPE_U32, TYPE_S16); 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci switch (align) { 423bf215546Sopenharmony_ci case 1: 424bf215546Sopenharmony_ci /* src *= 1: */ 425bf215546Sopenharmony_ci break; 426bf215546Sopenharmony_ci case 2: 427bf215546Sopenharmony_ci /* src *= 2 => src <<= 1: */ 428bf215546Sopenharmony_ci immed = create_immed_typed(block, 1, TYPE_S16); 429bf215546Sopenharmony_ci instr = ir3_SHL_B(block, instr, 0, immed, 0); 430bf215546Sopenharmony_ci break; 431bf215546Sopenharmony_ci case 3: 432bf215546Sopenharmony_ci /* src *= 3: */ 433bf215546Sopenharmony_ci immed = create_immed_typed(block, 3, TYPE_S16); 434bf215546Sopenharmony_ci instr = ir3_MULL_U(block, instr, 0, immed, 0); 435bf215546Sopenharmony_ci break; 436bf215546Sopenharmony_ci case 4: 437bf215546Sopenharmony_ci /* src *= 4 => src <<= 2: */ 438bf215546Sopenharmony_ci immed = create_immed_typed(block, 2, TYPE_S16); 439bf215546Sopenharmony_ci instr = ir3_SHL_B(block, instr, 0, immed, 0); 440bf215546Sopenharmony_ci break; 441bf215546Sopenharmony_ci default: 442bf215546Sopenharmony_ci unreachable("bad align"); 443bf215546Sopenharmony_ci return NULL; 444bf215546Sopenharmony_ci } 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci instr->dsts[0]->flags |= IR3_REG_HALF; 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci instr = ir3_MOV(block, instr, TYPE_S16); 449bf215546Sopenharmony_ci instr->dsts[0]->num = regid(REG_A0, 0); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci return instr; 452bf215546Sopenharmony_ci} 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_cistatic struct ir3_instruction * 455bf215546Sopenharmony_cicreate_addr1(struct ir3_block *block, unsigned const_val) 456bf215546Sopenharmony_ci{ 457bf215546Sopenharmony_ci struct ir3_instruction *immed = 458bf215546Sopenharmony_ci create_immed_typed(block, const_val, TYPE_U16); 459bf215546Sopenharmony_ci struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_U16); 460bf215546Sopenharmony_ci instr->dsts[0]->num = regid(REG_A0, 1); 461bf215546Sopenharmony_ci return instr; 462bf215546Sopenharmony_ci} 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci/* caches addr values to avoid generating multiple cov/shl/mova 465bf215546Sopenharmony_ci * sequences for each use of a given NIR level src as address 466bf215546Sopenharmony_ci */ 467bf215546Sopenharmony_cistruct ir3_instruction * 468bf215546Sopenharmony_ciir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align) 469bf215546Sopenharmony_ci{ 470bf215546Sopenharmony_ci struct ir3_instruction *addr; 471bf215546Sopenharmony_ci unsigned idx = align - 1; 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht)); 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci if (!ctx->addr0_ht[idx]) { 476bf215546Sopenharmony_ci ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer, 477bf215546Sopenharmony_ci _mesa_key_pointer_equal); 478bf215546Sopenharmony_ci } else { 479bf215546Sopenharmony_ci struct hash_entry *entry; 480bf215546Sopenharmony_ci entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src); 481bf215546Sopenharmony_ci if (entry) 482bf215546Sopenharmony_ci return entry->data; 483bf215546Sopenharmony_ci } 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci addr = create_addr0(ctx->block, src, align); 486bf215546Sopenharmony_ci _mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr); 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci return addr; 489bf215546Sopenharmony_ci} 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci/* Similar to ir3_get_addr0, but for a1.x. */ 492bf215546Sopenharmony_cistruct ir3_instruction * 493bf215546Sopenharmony_ciir3_get_addr1(struct ir3_context *ctx, unsigned const_val) 494bf215546Sopenharmony_ci{ 495bf215546Sopenharmony_ci struct ir3_instruction *addr; 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_ci if (!ctx->addr1_ht) { 498bf215546Sopenharmony_ci ctx->addr1_ht = _mesa_hash_table_u64_create(ctx); 499bf215546Sopenharmony_ci } else { 500bf215546Sopenharmony_ci addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val); 501bf215546Sopenharmony_ci if (addr) 502bf215546Sopenharmony_ci return addr; 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci addr = create_addr1(ctx->block, const_val); 506bf215546Sopenharmony_ci _mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr); 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci return addr; 509bf215546Sopenharmony_ci} 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_cistruct ir3_instruction * 512bf215546Sopenharmony_ciir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) 513bf215546Sopenharmony_ci{ 514bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 515bf215546Sopenharmony_ci struct ir3_instruction *cond; 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci /* NOTE: only cmps.*.* can write p0.x: */ 518bf215546Sopenharmony_ci struct ir3_instruction *zero = 519bf215546Sopenharmony_ci create_immed_typed(b, 0, is_half(src) ? TYPE_U16 : TYPE_U32); 520bf215546Sopenharmony_ci cond = ir3_CMPS_S(b, src, 0, zero, 0); 521bf215546Sopenharmony_ci cond->cat2.condition = IR3_COND_NE; 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci /* condition always goes in predicate register: */ 524bf215546Sopenharmony_ci cond->dsts[0]->num = regid(REG_P0, 0); 525bf215546Sopenharmony_ci cond->dsts[0]->flags &= ~IR3_REG_SSA; 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci return cond; 528bf215546Sopenharmony_ci} 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci/* 531bf215546Sopenharmony_ci * Array helpers 532bf215546Sopenharmony_ci */ 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_civoid 535bf215546Sopenharmony_ciir3_declare_array(struct ir3_context *ctx, nir_register *reg) 536bf215546Sopenharmony_ci{ 537bf215546Sopenharmony_ci struct ir3_array *arr = rzalloc(ctx, struct ir3_array); 538bf215546Sopenharmony_ci arr->id = ++ctx->num_arrays; 539bf215546Sopenharmony_ci /* NOTE: sometimes we get non array regs, for example for arrays of 540bf215546Sopenharmony_ci * length 1. See fs-const-array-of-struct-of-array.shader_test. So 541bf215546Sopenharmony_ci * treat a non-array as if it was an array of length 1. 542bf215546Sopenharmony_ci * 543bf215546Sopenharmony_ci * It would be nice if there was a nir pass to convert arrays of 544bf215546Sopenharmony_ci * length 1 to ssa. 545bf215546Sopenharmony_ci */ 546bf215546Sopenharmony_ci arr->length = reg->num_components * MAX2(1, reg->num_array_elems); 547bf215546Sopenharmony_ci compile_assert(ctx, arr->length > 0); 548bf215546Sopenharmony_ci arr->r = reg; 549bf215546Sopenharmony_ci arr->half = ir3_bitsize(ctx, reg->bit_size) <= 16; 550bf215546Sopenharmony_ci list_addtail(&arr->node, &ctx->ir->array_list); 551bf215546Sopenharmony_ci} 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_cistruct ir3_array * 554bf215546Sopenharmony_ciir3_get_array(struct ir3_context *ctx, nir_register *reg) 555bf215546Sopenharmony_ci{ 556bf215546Sopenharmony_ci foreach_array (arr, &ctx->ir->array_list) { 557bf215546Sopenharmony_ci if (arr->r == reg) 558bf215546Sopenharmony_ci return arr; 559bf215546Sopenharmony_ci } 560bf215546Sopenharmony_ci ir3_context_error(ctx, "bogus reg: r%d\n", reg->index); 561bf215546Sopenharmony_ci return NULL; 562bf215546Sopenharmony_ci} 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci/* relative (indirect) if address!=NULL */ 565bf215546Sopenharmony_cistruct ir3_instruction * 566bf215546Sopenharmony_ciir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, 567bf215546Sopenharmony_ci struct ir3_instruction *address) 568bf215546Sopenharmony_ci{ 569bf215546Sopenharmony_ci struct ir3_block *block = ctx->block; 570bf215546Sopenharmony_ci struct ir3_instruction *mov; 571bf215546Sopenharmony_ci struct ir3_register *src; 572bf215546Sopenharmony_ci unsigned flags = 0; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci mov = ir3_instr_create(block, OPC_MOV, 1, 1); 575bf215546Sopenharmony_ci if (arr->half) { 576bf215546Sopenharmony_ci mov->cat1.src_type = TYPE_U16; 577bf215546Sopenharmony_ci mov->cat1.dst_type = TYPE_U16; 578bf215546Sopenharmony_ci flags |= IR3_REG_HALF; 579bf215546Sopenharmony_ci } else { 580bf215546Sopenharmony_ci mov->cat1.src_type = TYPE_U32; 581bf215546Sopenharmony_ci mov->cat1.dst_type = TYPE_U32; 582bf215546Sopenharmony_ci } 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci mov->barrier_class = IR3_BARRIER_ARRAY_R; 585bf215546Sopenharmony_ci mov->barrier_conflict = IR3_BARRIER_ARRAY_W; 586bf215546Sopenharmony_ci __ssa_dst(mov)->flags |= flags; 587bf215546Sopenharmony_ci src = ir3_src_create(mov, 0, 588bf215546Sopenharmony_ci IR3_REG_ARRAY | COND(address, IR3_REG_RELATIV) | flags); 589bf215546Sopenharmony_ci src->def = (arr->last_write && arr->last_write->instr->block == block) 590bf215546Sopenharmony_ci ? arr->last_write 591bf215546Sopenharmony_ci : NULL; 592bf215546Sopenharmony_ci src->size = arr->length; 593bf215546Sopenharmony_ci src->array.id = arr->id; 594bf215546Sopenharmony_ci src->array.offset = n; 595bf215546Sopenharmony_ci src->array.base = INVALID_REG; 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci if (address) 598bf215546Sopenharmony_ci ir3_instr_set_address(mov, address); 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci return mov; 601bf215546Sopenharmony_ci} 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci/* relative (indirect) if address!=NULL */ 604bf215546Sopenharmony_civoid 605bf215546Sopenharmony_ciir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, 606bf215546Sopenharmony_ci struct ir3_instruction *src, 607bf215546Sopenharmony_ci struct ir3_instruction *address) 608bf215546Sopenharmony_ci{ 609bf215546Sopenharmony_ci struct ir3_block *block = ctx->block; 610bf215546Sopenharmony_ci struct ir3_instruction *mov; 611bf215546Sopenharmony_ci struct ir3_register *dst; 612bf215546Sopenharmony_ci unsigned flags = 0; 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci /* if not relative store, don't create an extra mov, since that 615bf215546Sopenharmony_ci * ends up being difficult for cp to remove. 616bf215546Sopenharmony_ci * 617bf215546Sopenharmony_ci * Also, don't skip the mov if the src is meta (like fanout/split), 618bf215546Sopenharmony_ci * since that creates a situation that RA can't really handle properly. 619bf215546Sopenharmony_ci */ 620bf215546Sopenharmony_ci if (!address && !is_meta(src)) { 621bf215546Sopenharmony_ci dst = src->dsts[0]; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci src->barrier_class |= IR3_BARRIER_ARRAY_W; 624bf215546Sopenharmony_ci src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci dst->flags |= IR3_REG_ARRAY; 627bf215546Sopenharmony_ci dst->size = arr->length; 628bf215546Sopenharmony_ci dst->array.id = arr->id; 629bf215546Sopenharmony_ci dst->array.offset = n; 630bf215546Sopenharmony_ci dst->array.base = INVALID_REG; 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_ci if (arr->last_write && arr->last_write->instr->block == src->block) 633bf215546Sopenharmony_ci ir3_reg_set_last_array(src, dst, arr->last_write); 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci arr->last_write = dst; 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci array_insert(block, block->keeps, src); 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci return; 640bf215546Sopenharmony_ci } 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci mov = ir3_instr_create(block, OPC_MOV, 1, 1); 643bf215546Sopenharmony_ci if (arr->half) { 644bf215546Sopenharmony_ci mov->cat1.src_type = TYPE_U16; 645bf215546Sopenharmony_ci mov->cat1.dst_type = TYPE_U16; 646bf215546Sopenharmony_ci flags |= IR3_REG_HALF; 647bf215546Sopenharmony_ci } else { 648bf215546Sopenharmony_ci mov->cat1.src_type = TYPE_U32; 649bf215546Sopenharmony_ci mov->cat1.dst_type = TYPE_U32; 650bf215546Sopenharmony_ci } 651bf215546Sopenharmony_ci mov->barrier_class = IR3_BARRIER_ARRAY_W; 652bf215546Sopenharmony_ci mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; 653bf215546Sopenharmony_ci dst = ir3_dst_create( 654bf215546Sopenharmony_ci mov, 0, 655bf215546Sopenharmony_ci IR3_REG_SSA | IR3_REG_ARRAY | flags | COND(address, IR3_REG_RELATIV)); 656bf215546Sopenharmony_ci dst->instr = mov; 657bf215546Sopenharmony_ci dst->size = arr->length; 658bf215546Sopenharmony_ci dst->array.id = arr->id; 659bf215546Sopenharmony_ci dst->array.offset = n; 660bf215546Sopenharmony_ci dst->array.base = INVALID_REG; 661bf215546Sopenharmony_ci ir3_src_create(mov, 0, IR3_REG_SSA | flags)->def = src->dsts[0]; 662bf215546Sopenharmony_ci 663bf215546Sopenharmony_ci if (arr->last_write && arr->last_write->instr->block == block) 664bf215546Sopenharmony_ci ir3_reg_set_last_array(mov, dst, arr->last_write); 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci if (address) 667bf215546Sopenharmony_ci ir3_instr_set_address(mov, address); 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci arr->last_write = dst; 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci /* the array store may only matter to something in an earlier 672bf215546Sopenharmony_ci * block (ie. loops), but since arrays are not in SSA, depth 673bf215546Sopenharmony_ci * pass won't know this.. so keep all array stores: 674bf215546Sopenharmony_ci */ 675bf215546Sopenharmony_ci array_insert(block, block->keeps, mov); 676bf215546Sopenharmony_ci} 677