1/* 2 * Copyright (c) 2012-2019 Etnaviv Project 3 * Copyright (c) 2019 Zodiac Inflight Innovations 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sub license, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the 13 * next paragraph) shall be included in all copies or substantial portions 14 * of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jonathan Marek <jonathan@marek.ca> 26 * Wladimir J. van der Laan <laanwj@gmail.com> 27 */ 28 29#include "etnaviv_compiler.h" 30#include "etnaviv_compiler_nir.h" 31#include "etnaviv_asm.h" 32#include "etnaviv_context.h" 33#include "etnaviv_debug.h" 34#include "etnaviv_nir.h" 35#include "etnaviv_uniforms.h" 36#include "etnaviv_util.h" 37 38#include <math.h> 39#include "util/u_memory.h" 40#include "util/register_allocate.h" 41#include "compiler/nir/nir_builder.h" 42 43#include "tgsi/tgsi_strings.h" 44#include "util/compiler.h" 45#include "util/half_float.h" 46 47static bool 48etna_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) 49{ 50 const struct etna_specs *specs = data; 51 52 if (instr->type != nir_instr_type_alu) 53 return false; 54 55 nir_alu_instr *alu = nir_instr_as_alu(instr); 56 switch (alu->op) { 57 case nir_op_frsq: 58 case nir_op_frcp: 59 case nir_op_flog2: 60 case nir_op_fexp2: 61 case nir_op_fsqrt: 62 case nir_op_fcos: 63 case nir_op_fsin: 64 case nir_op_fdiv: 65 case nir_op_imul: 66 return true; 67 /* TODO: can do better than alu_to_scalar for vector compares */ 68 case nir_op_b32all_fequal2: 69 case nir_op_b32all_fequal3: 70 case nir_op_b32all_fequal4: 71 case nir_op_b32any_fnequal2: 72 case nir_op_b32any_fnequal3: 73 case nir_op_b32any_fnequal4: 74 case nir_op_b32all_iequal2: 75 case nir_op_b32all_iequal3: 76 case nir_op_b32all_iequal4: 77 case nir_op_b32any_inequal2: 78 case nir_op_b32any_inequal3: 79 case nir_op_b32any_inequal4: 80 return true; 81 case nir_op_fdot2: 82 if (!specs->has_halti2_instructions) 83 return true; 84 break; 85 default: 86 break; 87 } 88 89 return false; 90} 91 92static void 93etna_emit_block_start(struct etna_compile *c, unsigned block) 94{ 95 c->block_ptr[block] = c->inst_ptr; 96} 97 98static void 99etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src src) 100{ 101 struct etna_shader_io_file *sf = &c->variant->outfile; 102 103 if (is_fs(c)) { 104 switch (var->data.location) { 105 case FRAG_RESULT_COLOR: 106 case FRAG_RESULT_DATA0: /* DATA0 is used by gallium shaders for color */ 107 c->variant->ps_color_out_reg = src.reg; 108 break; 109 case FRAG_RESULT_DEPTH: 110 c->variant->ps_depth_out_reg = src.reg; 111 break; 112 default: 113 unreachable("Unsupported fs output"); 114 } 115 return; 116 } 117 118 switch (var->data.location) { 119 case VARYING_SLOT_POS: 120 c->variant->vs_pos_out_reg = src.reg; 121 break; 122 case VARYING_SLOT_PSIZ: 123 c->variant->vs_pointsize_out_reg = src.reg; 124 break; 125 default: 126 assert(sf->num_reg < ETNA_NUM_INPUTS); 127 sf->reg[sf->num_reg].reg = src.reg; 128 sf->reg[sf->num_reg].slot = var->data.location; 129 sf->reg[sf->num_reg].num_components = glsl_get_components(var->type); 130 sf->num_reg++; 131 break; 132 } 133} 134 135#define OPT(nir, pass, ...) ({ \ 136 bool this_progress = false; \ 137 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 138 this_progress; \ 139}) 140 141static void 142etna_optimize_loop(nir_shader *s) 143{ 144 bool progress; 145 do { 146 progress = false; 147 148 NIR_PASS_V(s, nir_lower_vars_to_ssa); 149 progress |= OPT(s, nir_opt_copy_prop_vars); 150 progress |= OPT(s, nir_opt_shrink_stores, true); 151 progress |= OPT(s, nir_opt_shrink_vectors); 152 progress |= OPT(s, nir_copy_prop); 153 progress |= OPT(s, nir_opt_dce); 154 progress |= OPT(s, nir_opt_cse); 155 progress |= OPT(s, nir_opt_peephole_select, 16, true, true); 156 progress |= OPT(s, nir_opt_intrinsics); 157 progress |= OPT(s, nir_opt_algebraic); 158 progress |= OPT(s, nir_opt_constant_folding); 159 progress |= OPT(s, nir_opt_dead_cf); 160 if (OPT(s, nir_opt_trivial_continues)) { 161 progress = true; 162 /* If nir_opt_trivial_continues makes progress, then we need to clean 163 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll 164 * to make progress. 165 */ 166 OPT(s, nir_copy_prop); 167 OPT(s, nir_opt_dce); 168 } 169 progress |= OPT(s, nir_opt_loop_unroll); 170 progress |= OPT(s, nir_opt_if, nir_opt_if_optimize_phi_true_false); 171 progress |= OPT(s, nir_opt_remove_phis); 172 progress |= OPT(s, nir_opt_undef); 173 } 174 while (progress); 175} 176 177static int 178etna_glsl_type_size(const struct glsl_type *type, bool bindless) 179{ 180 return glsl_count_attribute_slots(type, false); 181} 182 183static void 184copy_uniform_state_to_shader(struct etna_shader_variant *sobj, uint64_t *consts, unsigned count) 185{ 186 struct etna_shader_uniform_info *uinfo = &sobj->uniforms; 187 188 uinfo->count = count * 4; 189 uinfo->data = MALLOC(uinfo->count * sizeof(*uinfo->data)); 190 uinfo->contents = MALLOC(uinfo->count * sizeof(*uinfo->contents)); 191 192 for (unsigned i = 0; i < uinfo->count; i++) { 193 uinfo->data[i] = consts[i]; 194 uinfo->contents[i] = consts[i] >> 32; 195 } 196 197 etna_set_shader_uniforms_dirty_flags(sobj); 198} 199 200#define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3]) 201#define SRC_DISABLE ((hw_src){}) 202#define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s}) 203#define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s}) 204 205typedef struct etna_inst_dst hw_dst; 206typedef struct etna_inst_src hw_src; 207 208static inline hw_src 209src_swizzle(hw_src src, unsigned swizzle) 210{ 211 if (src.rgroup != INST_RGROUP_IMMEDIATE) 212 src.swiz = inst_swiz_compose(src.swiz, swizzle); 213 214 return src; 215} 216 217/* constants are represented as 64-bit ints 218 * 32-bit for the value and 32-bit for the type (imm, uniform, etc) 219 */ 220 221#define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)} 222#define CONST(x) CONST_VAL(ETNA_UNIFORM_CONSTANT, x) 223#define UNIFORM(x) CONST_VAL(ETNA_UNIFORM_UNIFORM, x) 224#define TEXSCALE(x, i) CONST_VAL(ETNA_UNIFORM_TEXRECT_SCALE_X + (i), x) 225 226static int 227const_add(uint64_t *c, uint64_t value) 228{ 229 for (unsigned i = 0; i < 4; i++) { 230 if (c[i] == value || !c[i]) { 231 c[i] = value; 232 return i; 233 } 234 } 235 return -1; 236} 237 238static hw_src 239const_src(struct etna_compile *c, nir_const_value *value, unsigned num_components) 240{ 241 /* use inline immediates if possible */ 242 if (c->specs->halti >= 2 && num_components == 1 && 243 value[0].u64 >> 32 == ETNA_UNIFORM_CONSTANT) { 244 uint32_t bits = value[0].u32; 245 246 /* "float" - shifted by 12 */ 247 if ((bits & 0xfff) == 0) 248 return etna_immediate_src(0, bits >> 12); 249 250 /* "unsigned" - raw 20 bit value */ 251 if (bits < (1 << 20)) 252 return etna_immediate_src(2, bits); 253 254 /* "signed" - sign extended 20-bit (sign included) value */ 255 if (bits >= 0xfff80000) 256 return etna_immediate_src(1, bits); 257 } 258 259 unsigned i; 260 int swiz = -1; 261 for (i = 0; swiz < 0; i++) { 262 uint64_t *a = &c->consts[i*4]; 263 uint64_t save[4]; 264 memcpy(save, a, sizeof(save)); 265 swiz = 0; 266 for (unsigned j = 0; j < num_components; j++) { 267 int c = const_add(a, value[j].u64); 268 if (c < 0) { 269 memcpy(a, save, sizeof(save)); 270 swiz = -1; 271 break; 272 } 273 swiz |= c << j * 2; 274 } 275 } 276 277 assert(i <= ETNA_MAX_IMM / 4); 278 c->const_count = MAX2(c->const_count, i); 279 280 return SRC_CONST(i - 1, swiz); 281} 282 283/* how to swizzle when used as a src */ 284static const uint8_t 285reg_swiz[NUM_REG_TYPES] = { 286 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY, 287 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY, 288 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y), 289 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY, 290 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY, 291 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY, 292 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z), 293 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z), 294 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z), 295 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z), 296 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY, 297 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY, 298 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W), 299 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W), 300 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W), 301 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X), 302 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W), 303 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W), 304 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W), 305 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X), 306 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X), 307 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X), 308}; 309 310/* how to swizzle when used as a dest */ 311static const uint8_t 312reg_dst_swiz[NUM_REG_TYPES] = { 313 [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY, 314 [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY, 315 [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X), 316 [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY, 317 [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY, 318 [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY, 319 [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X), 320 [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y), 321 [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y), 322 [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y), 323 [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY, 324 [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY, 325 [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X), 326 [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y), 327 [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y), 328 [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z), 329 [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y), 330 [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y), 331 [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y), 332 [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z), 333 [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z), 334 [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z), 335}; 336 337/* nir_src to allocated register */ 338static hw_src 339ra_src(struct etna_compile *c, nir_src *src) 340{ 341 unsigned reg = ra_get_node_reg(c->g, c->live_map[src_index(c->impl, src)]); 342 return SRC_REG(reg_get_base(c, reg), reg_swiz[reg_get_type(reg)]); 343} 344 345static hw_src 346get_src(struct etna_compile *c, nir_src *src) 347{ 348 if (!src->is_ssa) 349 return ra_src(c, src); 350 351 nir_instr *instr = src->ssa->parent_instr; 352 353 if (instr->pass_flags & BYPASS_SRC) { 354 assert(instr->type == nir_instr_type_alu); 355 nir_alu_instr *alu = nir_instr_as_alu(instr); 356 assert(alu->op == nir_op_mov); 357 return src_swizzle(get_src(c, &alu->src[0].src), ALU_SWIZ(&alu->src[0])); 358 } 359 360 switch (instr->type) { 361 case nir_instr_type_load_const: 362 return const_src(c, nir_instr_as_load_const(instr)->value, src->ssa->num_components); 363 case nir_instr_type_intrinsic: { 364 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 365 switch (intr->intrinsic) { 366 case nir_intrinsic_load_input: 367 case nir_intrinsic_load_instance_id: 368 case nir_intrinsic_load_uniform: 369 case nir_intrinsic_load_ubo: 370 return ra_src(c, src); 371 case nir_intrinsic_load_front_face: 372 return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL }; 373 case nir_intrinsic_load_frag_coord: 374 return SRC_REG(0, INST_SWIZ_IDENTITY); 375 case nir_intrinsic_load_texture_rect_scaling: { 376 int sampler = nir_src_as_int(intr->src[0]); 377 nir_const_value values[] = { 378 TEXSCALE(sampler, 0), 379 TEXSCALE(sampler, 1), 380 }; 381 382 return src_swizzle(const_src(c, values, 2), SWIZZLE(X,Y,X,X)); 383 } 384 default: 385 compile_error(c, "Unhandled NIR intrinsic type: %s\n", 386 nir_intrinsic_infos[intr->intrinsic].name); 387 break; 388 } 389 } break; 390 case nir_instr_type_alu: 391 case nir_instr_type_tex: 392 return ra_src(c, src); 393 case nir_instr_type_ssa_undef: { 394 /* return zero to deal with broken Blur demo */ 395 nir_const_value value = CONST(0); 396 return src_swizzle(const_src(c, &value, 1), SWIZZLE(X,X,X,X)); 397 } 398 default: 399 compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type); 400 break; 401 } 402 403 return SRC_DISABLE; 404} 405 406static bool 407vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa) 408{ 409 for (unsigned i = 0; i < 4; i++) { 410 if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa) 411 continue; 412 413 if (vec->src[i].swizzle[0] != i) 414 return true; 415 } 416 417 /* don't deal with possible bypassed vec/mov chain */ 418 nir_foreach_use(use_src, ssa) { 419 nir_instr *instr = use_src->parent_instr; 420 if (instr->type != nir_instr_type_alu) 421 continue; 422 423 nir_alu_instr *alu = nir_instr_as_alu(instr); 424 425 switch (alu->op) { 426 case nir_op_mov: 427 case nir_op_vec2: 428 case nir_op_vec3: 429 case nir_op_vec4: 430 return true; 431 default: 432 break; 433 } 434 } 435 return false; 436} 437 438/* get allocated dest register for nir_dest 439 * *p_swiz tells how the components need to be placed into register 440 */ 441static hw_dst 442ra_dest(struct etna_compile *c, nir_dest *dest, unsigned *p_swiz) 443{ 444 unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf; 445 dest = real_dest(dest, &swiz, &mask); 446 447 unsigned r = ra_get_node_reg(c->g, c->live_map[dest_index(c->impl, dest)]); 448 unsigned t = reg_get_type(r); 449 450 *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]); 451 452 return (hw_dst) { 453 .use = 1, 454 .reg = reg_get_base(c, r), 455 .write_mask = inst_write_mask_compose(mask, reg_writemask[t]), 456 }; 457} 458 459static void 460emit_alu(struct etna_compile *c, nir_alu_instr * alu) 461{ 462 const nir_op_info *info = &nir_op_infos[alu->op]; 463 464 /* marked as dead instruction (vecN and other bypassed instr) */ 465 if (alu->instr.pass_flags) 466 return; 467 468 assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4)); 469 470 unsigned dst_swiz; 471 hw_dst dst = ra_dest(c, &alu->dest.dest, &dst_swiz); 472 473 /* compose alu write_mask with RA write mask */ 474 if (!alu->dest.dest.is_ssa) 475 dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask); 476 477 switch (alu->op) { 478 case nir_op_fdot2: 479 case nir_op_fdot3: 480 case nir_op_fdot4: 481 /* not per-component - don't compose dst_swiz */ 482 dst_swiz = INST_SWIZ_IDENTITY; 483 break; 484 default: 485 break; 486 } 487 488 hw_src srcs[3]; 489 490 for (int i = 0; i < info->num_inputs; i++) { 491 nir_alu_src *asrc = &alu->src[i]; 492 hw_src src; 493 494 src = src_swizzle(get_src(c, &asrc->src), ALU_SWIZ(asrc)); 495 src = src_swizzle(src, dst_swiz); 496 497 if (src.rgroup != INST_RGROUP_IMMEDIATE) { 498 src.neg = asrc->negate || (alu->op == nir_op_fneg); 499 src.abs = asrc->abs || (alu->op == nir_op_fabs); 500 } else { 501 assert(!asrc->negate && alu->op != nir_op_fneg); 502 assert(!asrc->abs && alu->op != nir_op_fabs); 503 } 504 505 srcs[i] = src; 506 } 507 508 etna_emit_alu(c, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat)); 509} 510 511static void 512emit_tex(struct etna_compile *c, nir_tex_instr * tex) 513{ 514 unsigned dst_swiz; 515 hw_dst dst = ra_dest(c, &tex->dest, &dst_swiz); 516 nir_src *coord = NULL, *src1 = NULL, *src2 = NULL; 517 518 for (unsigned i = 0; i < tex->num_srcs; i++) { 519 switch (tex->src[i].src_type) { 520 case nir_tex_src_coord: 521 coord = &tex->src[i].src; 522 break; 523 case nir_tex_src_bias: 524 case nir_tex_src_lod: 525 case nir_tex_src_ddx: 526 assert(!src1); 527 src1 = &tex->src[i].src; 528 break; 529 case nir_tex_src_comparator: 530 case nir_tex_src_ddy: 531 src2 = &tex->src[i].src; 532 break; 533 default: 534 compile_error(c, "Unhandled NIR tex src type: %d\n", 535 tex->src[i].src_type); 536 break; 537 } 538 } 539 540 etna_emit_tex(c, tex->op, tex->sampler_index, dst_swiz, dst, get_src(c, coord), 541 src1 ? get_src(c, src1) : SRC_DISABLE, 542 src2 ? get_src(c, src2) : SRC_DISABLE); 543} 544 545static void 546emit_intrinsic(struct etna_compile *c, nir_intrinsic_instr * intr) 547{ 548 switch (intr->intrinsic) { 549 case nir_intrinsic_store_deref: 550 etna_emit_output(c, nir_src_as_deref(intr->src[0])->var, get_src(c, &intr->src[1])); 551 break; 552 case nir_intrinsic_discard_if: 553 etna_emit_discard(c, get_src(c, &intr->src[0])); 554 break; 555 case nir_intrinsic_discard: 556 etna_emit_discard(c, SRC_DISABLE); 557 break; 558 case nir_intrinsic_load_uniform: { 559 unsigned dst_swiz; 560 struct etna_inst_dst dst = ra_dest(c, &intr->dest, &dst_swiz); 561 562 /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */ 563 emit_inst(c, &(struct etna_inst) { 564 .opcode = INST_OPCODE_MOVAR, 565 .dst.write_mask = 0x1, 566 .src[2] = get_src(c, &intr->src[0]), 567 }); 568 emit_inst(c, &(struct etna_inst) { 569 .opcode = INST_OPCODE_MOV, 570 .dst = dst, 571 .src[2] = { 572 .use = 1, 573 .rgroup = INST_RGROUP_UNIFORM_0, 574 .reg = nir_intrinsic_base(intr), 575 .swiz = dst_swiz, 576 .amode = INST_AMODE_ADD_A_X, 577 }, 578 }); 579 } break; 580 case nir_intrinsic_load_ubo: { 581 /* TODO: if offset is of the form (x + C) then add C to the base instead */ 582 unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32; 583 unsigned dst_swiz; 584 emit_inst(c, &(struct etna_inst) { 585 .opcode = INST_OPCODE_LOAD, 586 .type = INST_TYPE_U32, 587 .dst = ra_dest(c, &intr->dest, &dst_swiz), 588 .src[0] = get_src(c, &intr->src[1]), 589 .src[1] = const_src(c, &CONST_VAL(ETNA_UNIFORM_UBO0_ADDR + idx, 0), 1), 590 }); 591 } break; 592 case nir_intrinsic_load_front_face: 593 case nir_intrinsic_load_frag_coord: 594 assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */ 595 break; 596 case nir_intrinsic_load_input: 597 case nir_intrinsic_load_instance_id: 598 case nir_intrinsic_load_texture_rect_scaling: 599 break; 600 default: 601 compile_error(c, "Unhandled NIR intrinsic type: %s\n", 602 nir_intrinsic_infos[intr->intrinsic].name); 603 } 604} 605 606static void 607emit_instr(struct etna_compile *c, nir_instr * instr) 608{ 609 switch (instr->type) { 610 case nir_instr_type_alu: 611 emit_alu(c, nir_instr_as_alu(instr)); 612 break; 613 case nir_instr_type_tex: 614 emit_tex(c, nir_instr_as_tex(instr)); 615 break; 616 case nir_instr_type_intrinsic: 617 emit_intrinsic(c, nir_instr_as_intrinsic(instr)); 618 break; 619 case nir_instr_type_jump: 620 assert(nir_instr_is_last(instr)); 621 break; 622 case nir_instr_type_load_const: 623 case nir_instr_type_ssa_undef: 624 case nir_instr_type_deref: 625 break; 626 default: 627 compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type); 628 break; 629 } 630} 631 632static void 633emit_block(struct etna_compile *c, nir_block * block) 634{ 635 etna_emit_block_start(c, block->index); 636 637 nir_foreach_instr(instr, block) 638 emit_instr(c, instr); 639 640 /* succs->index < block->index is for the loop case */ 641 nir_block *succs = block->successors[0]; 642 if (nir_block_ends_in_jump(block) || succs->index < block->index) 643 etna_emit_jump(c, succs->index, SRC_DISABLE); 644} 645 646static void 647emit_cf_list(struct etna_compile *c, struct exec_list *list); 648 649static void 650emit_if(struct etna_compile *c, nir_if * nif) 651{ 652 etna_emit_jump(c, nir_if_first_else_block(nif)->index, get_src(c, &nif->condition)); 653 emit_cf_list(c, &nif->then_list); 654 655 /* jump at end of then_list to skip else_list 656 * not needed if then_list already ends with a jump or else_list is empty 657 */ 658 if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) && 659 !nir_cf_list_is_empty_block(&nif->else_list)) 660 etna_emit_jump(c, nir_if_last_then_block(nif)->successors[0]->index, SRC_DISABLE); 661 662 emit_cf_list(c, &nif->else_list); 663} 664 665static void 666emit_cf_list(struct etna_compile *c, struct exec_list *list) 667{ 668 foreach_list_typed(nir_cf_node, node, node, list) { 669 switch (node->type) { 670 case nir_cf_node_block: 671 emit_block(c, nir_cf_node_as_block(node)); 672 break; 673 case nir_cf_node_if: 674 emit_if(c, nir_cf_node_as_if(node)); 675 break; 676 case nir_cf_node_loop: 677 emit_cf_list(c, &nir_cf_node_as_loop(node)->body); 678 break; 679 default: 680 compile_error(c, "Unknown NIR node type\n"); 681 break; 682 } 683 } 684} 685 686/* based on nir_lower_vec_to_movs */ 687static unsigned 688insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) 689{ 690 assert(start_idx < nir_op_infos[vec->op].num_inputs); 691 unsigned write_mask = (1u << start_idx); 692 693 nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov); 694 nir_alu_src_copy(&mov->src[0], &vec->src[start_idx]); 695 696 mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0]; 697 mov->src[0].negate = vec->src[start_idx].negate; 698 mov->src[0].abs = vec->src[start_idx].abs; 699 700 unsigned num_components = 1; 701 702 for (unsigned i = start_idx + 1; i < 4; i++) { 703 if (!(vec->dest.write_mask & (1 << i))) 704 continue; 705 706 if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) && 707 vec->src[i].negate == vec->src[start_idx].negate && 708 vec->src[i].abs == vec->src[start_idx].abs) { 709 write_mask |= (1 << i); 710 mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0]; 711 num_components++; 712 } 713 } 714 715 mov->dest.write_mask = (1 << num_components) - 1; 716 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL); 717 718 /* replace vec srcs with inserted mov */ 719 for (unsigned i = 0, j = 0; i < 4; i++) { 720 if (!(write_mask & (1 << i))) 721 continue; 722 723 nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa)); 724 vec->src[i].swizzle[0] = j++; 725 } 726 727 nir_instr_insert_before(&vec->instr, &mov->instr); 728 729 return write_mask; 730} 731 732/* 733 * for vecN instructions: 734 * -merge constant sources into a single src 735 * -insert movs (nir_lower_vec_to_movs equivalent) 736 * for non-vecN instructions: 737 * -try to merge constants as single constant 738 * -insert movs for multiple constants if required 739 */ 740static void 741lower_alu(struct etna_compile *c, nir_alu_instr *alu) 742{ 743 const nir_op_info *info = &nir_op_infos[alu->op]; 744 745 nir_builder b; 746 nir_builder_init(&b, c->impl); 747 b.cursor = nir_before_instr(&alu->instr); 748 749 switch (alu->op) { 750 case nir_op_vec2: 751 case nir_op_vec3: 752 case nir_op_vec4: 753 break; 754 default: 755 if (c->specs->has_no_oneconst_limit) 756 return; 757 758 nir_const_value value[4] = {}; 759 uint8_t swizzle[4][4] = {}; 760 unsigned swiz_max = 0, num_const = 0; 761 762 for (unsigned i = 0; i < info->num_inputs; i++) { 763 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 764 if (!cv) 765 continue; 766 767 unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components; 768 for (unsigned j = 0; j < num_components; j++) { 769 int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64); 770 swizzle[i][j] = idx; 771 swiz_max = MAX2(swiz_max, (unsigned) idx); 772 } 773 num_const++; 774 } 775 776 /* nothing to do */ 777 if (num_const <= 1) 778 return; 779 780 /* resolve with single combined const src */ 781 if (swiz_max < 4) { 782 nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value); 783 784 for (unsigned i = 0; i < info->num_inputs; i++) { 785 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 786 if (!cv) 787 continue; 788 789 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def)); 790 791 for (unsigned j = 0; j < 4; j++) 792 alu->src[i].swizzle[j] = swizzle[i][j]; 793 } 794 return; 795 } 796 797 /* resolve with movs */ 798 num_const = 0; 799 for (unsigned i = 0; i < info->num_inputs; i++) { 800 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 801 if (!cv) 802 continue; 803 804 num_const++; 805 if (num_const == 1) 806 continue; 807 808 nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa); 809 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov)); 810 } 811 return; 812 } 813 814 nir_const_value value[4]; 815 unsigned num_components = 0; 816 817 for (unsigned i = 0; i < info->num_inputs; i++) { 818 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 819 if (cv) 820 value[num_components++] = cv[alu->src[i].swizzle[0]]; 821 } 822 823 /* if there is more than one constant source to the vecN, combine them 824 * into a single load_const (removing the vecN completely if all components 825 * are constant) 826 */ 827 if (num_components > 1) { 828 nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value); 829 830 if (num_components == info->num_inputs) { 831 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, def); 832 nir_instr_remove(&alu->instr); 833 return; 834 } 835 836 for (unsigned i = 0, j = 0; i < info->num_inputs; i++) { 837 nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); 838 if (!cv) 839 continue; 840 841 nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def)); 842 alu->src[i].swizzle[0] = j++; 843 } 844 } 845 846 unsigned finished_write_mask = 0; 847 for (unsigned i = 0; i < 4; i++) { 848 if (!(alu->dest.write_mask & (1 << i))) 849 continue; 850 851 nir_ssa_def *ssa = alu->src[i].src.ssa; 852 853 /* check that vecN instruction is only user of this */ 854 bool need_mov = list_length(&ssa->if_uses) != 0; 855 nir_foreach_use(use_src, ssa) { 856 if (use_src->parent_instr != &alu->instr) 857 need_mov = true; 858 } 859 860 nir_instr *instr = ssa->parent_instr; 861 switch (instr->type) { 862 case nir_instr_type_alu: 863 case nir_instr_type_tex: 864 break; 865 case nir_instr_type_intrinsic: 866 if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) { 867 need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa); 868 break; 869 } 870 FALLTHROUGH; 871 default: 872 need_mov = true; 873 } 874 875 if (need_mov && !(finished_write_mask & (1 << i))) 876 finished_write_mask |= insert_vec_mov(alu, i, c->nir); 877 } 878} 879 880static bool 881emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts) 882{ 883 nir_shader *shader = c->nir; 884 c->impl = nir_shader_get_entrypoint(shader); 885 886 bool have_indirect_uniform = false; 887 unsigned indirect_max = 0; 888 889 nir_builder b; 890 nir_builder_init(&b, c->impl); 891 892 /* convert non-dynamic uniform loads to constants, etc */ 893 nir_foreach_block(block, c->impl) { 894 nir_foreach_instr_safe(instr, block) { 895 switch(instr->type) { 896 case nir_instr_type_alu: 897 /* deals with vecN and const srcs */ 898 lower_alu(c, nir_instr_as_alu(instr)); 899 break; 900 case nir_instr_type_load_const: { 901 nir_load_const_instr *load_const = nir_instr_as_load_const(instr); 902 for (unsigned i = 0; i < load_const->def.num_components; i++) 903 load_const->value[i] = CONST(load_const->value[i].u32); 904 } break; 905 case nir_instr_type_intrinsic: { 906 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 907 /* TODO: load_ubo can also become a constant in some cases 908 * (at the moment it can end up emitting a LOAD with two 909 * uniform sources, which could be a problem on HALTI2) 910 */ 911 if (intr->intrinsic != nir_intrinsic_load_uniform) 912 break; 913 nir_const_value *off = nir_src_as_const_value(intr->src[0]); 914 if (!off || off[0].u64 >> 32 != ETNA_UNIFORM_CONSTANT) { 915 have_indirect_uniform = true; 916 indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr); 917 break; 918 } 919 920 unsigned base = nir_intrinsic_base(intr); 921 /* pre halti2 uniform offset will be float */ 922 if (c->specs->halti < 2) 923 base += (unsigned) off[0].f32; 924 else 925 base += off[0].u32; 926 nir_const_value value[4]; 927 928 for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) 929 value[i] = UNIFORM(base * 4 + i); 930 931 b.cursor = nir_after_instr(instr); 932 nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value); 933 934 nir_ssa_def_rewrite_uses(&intr->dest.ssa, def); 935 nir_instr_remove(instr); 936 } break; 937 default: 938 break; 939 } 940 } 941 } 942 943 /* TODO: only emit required indirect uniform ranges */ 944 if (have_indirect_uniform) { 945 for (unsigned i = 0; i < indirect_max * 4; i++) 946 c->consts[i] = UNIFORM(i).u64; 947 c->const_count = indirect_max; 948 } 949 950 /* add mov for any store output using sysval/const and for depth stores from intrinsics */ 951 nir_foreach_block(block, c->impl) { 952 nir_foreach_instr_safe(instr, block) { 953 if (instr->type != nir_instr_type_intrinsic) 954 continue; 955 956 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 957 958 switch (intr->intrinsic) { 959 case nir_intrinsic_store_deref: { 960 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 961 nir_src *src = &intr->src[1]; 962 if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr) || 963 (shader->info.stage == MESA_SHADER_FRAGMENT && 964 deref->var->data.location == FRAG_RESULT_DEPTH && 965 src->is_ssa && 966 src->ssa->parent_instr->type != nir_instr_type_alu)) { 967 b.cursor = nir_before_instr(instr); 968 nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa))); 969 } 970 } break; 971 default: 972 break; 973 } 974 } 975 } 976 977 /* call directly to avoid validation (load_const don't pass validation at this point) */ 978 nir_convert_from_ssa(shader, true); 979 nir_opt_dce(shader); 980 981 etna_ra_assign(c, shader); 982 983 emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body); 984 985 *num_temps = etna_ra_finish(c); 986 *num_consts = c->const_count; 987 return true; 988} 989 990static bool 991etna_compile_check_limits(struct etna_shader_variant *v) 992{ 993 const struct etna_specs *specs = v->shader->specs; 994 int max_uniforms = (v->stage == MESA_SHADER_VERTEX) 995 ? specs->max_vs_uniforms 996 : specs->max_ps_uniforms; 997 998 if (!specs->has_icache && v->needs_icache) { 999 DBG("Number of instructions (%d) exceeds maximum %d", v->code_size / 4, 1000 specs->max_instructions); 1001 return false; 1002 } 1003 1004 if (v->num_temps > specs->max_registers) { 1005 DBG("Number of registers (%d) exceeds maximum %d", v->num_temps, 1006 specs->max_registers); 1007 return false; 1008 } 1009 1010 if (v->uniforms.count / 4 > max_uniforms) { 1011 DBG("Number of uniforms (%d) exceeds maximum %d", 1012 v->uniforms.count / 4, max_uniforms); 1013 return false; 1014 } 1015 1016 return true; 1017} 1018 1019static void 1020fill_vs_mystery(struct etna_shader_variant *v) 1021{ 1022 const struct etna_specs *specs = v->shader->specs; 1023 1024 v->input_count_unk8 = DIV_ROUND_UP(v->infile.num_reg + 4, 16); /* XXX what is this */ 1025 1026 /* fill in "mystery meat" load balancing value. This value determines how 1027 * work is scheduled between VS and PS 1028 * in the unified shader architecture. More precisely, it is determined from 1029 * the number of VS outputs, as well as chip-specific 1030 * vertex output buffer size, vertex cache size, and the number of shader 1031 * cores. 1032 * 1033 * XXX this is a conservative estimate, the "optimal" value is only known for 1034 * sure at link time because some 1035 * outputs may be unused and thus unmapped. Then again, in the general use 1036 * case with GLSL the vertex and fragment 1037 * shaders are linked already before submitting to Gallium, thus all outputs 1038 * are used. 1039 * 1040 * note: TGSI compiler counts all outputs (including position and pointsize), here 1041 * v->outfile.num_reg only counts varyings, +1 to compensate for the position output 1042 * TODO: might have a problem that we don't count pointsize when it is used 1043 */ 1044 1045 int half_out = v->outfile.num_reg / 2 + 1; 1046 assert(half_out); 1047 1048 uint32_t b = ((20480 / (specs->vertex_output_buffer_size - 1049 2 * half_out * specs->vertex_cache_size)) + 1050 9) / 1051 10; 1052 uint32_t a = (b + 256 / (specs->shader_core_count * half_out)) / 2; 1053 v->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | 1054 VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | 1055 VIVS_VS_LOAD_BALANCING_C(0x3f) | 1056 VIVS_VS_LOAD_BALANCING_D(0x0f); 1057} 1058 1059bool 1060etna_compile_shader(struct etna_shader_variant *v) 1061{ 1062 if (unlikely(!v)) 1063 return false; 1064 1065 struct etna_compile *c = CALLOC_STRUCT(etna_compile); 1066 if (!c) 1067 return false; 1068 1069 c->variant = v; 1070 c->specs = v->shader->specs; 1071 c->nir = nir_shader_clone(NULL, v->shader->nir); 1072 1073 nir_shader *s = c->nir; 1074 const struct etna_specs *specs = c->specs; 1075 1076 v->stage = s->info.stage; 1077 v->uses_discard = s->info.fs.uses_discard; 1078 v->num_loops = 0; /* TODO */ 1079 v->vs_id_in_reg = -1; 1080 v->vs_pos_out_reg = -1; 1081 v->vs_pointsize_out_reg = -1; 1082 v->ps_color_out_reg = 0; /* 0 for shader that doesn't write fragcolor.. */ 1083 v->ps_depth_out_reg = -1; 1084 1085 /* 1086 * Lower glTexCoord, fixes e.g. neverball point sprite (exit cylinder stars) 1087 * and gl4es pointsprite.trace apitrace 1088 */ 1089 if (s->info.stage == MESA_SHADER_FRAGMENT && v->key.sprite_coord_enable) { 1090 NIR_PASS_V(s, nir_lower_texcoord_replace, v->key.sprite_coord_enable, 1091 false, v->key.sprite_coord_yinvert); 1092 } 1093 1094 /* setup input linking */ 1095 struct etna_shader_io_file *sf = &v->infile; 1096 if (s->info.stage == MESA_SHADER_VERTEX) { 1097 nir_foreach_shader_in_variable(var, s) { 1098 unsigned idx = var->data.driver_location; 1099 sf->reg[idx].reg = idx; 1100 sf->reg[idx].slot = var->data.location; 1101 sf->reg[idx].num_components = glsl_get_components(var->type); 1102 sf->num_reg = MAX2(sf->num_reg, idx+1); 1103 } 1104 } else { 1105 unsigned count = 0; 1106 nir_foreach_shader_in_variable(var, s) { 1107 unsigned idx = var->data.driver_location; 1108 sf->reg[idx].reg = idx + 1; 1109 sf->reg[idx].slot = var->data.location; 1110 sf->reg[idx].num_components = glsl_get_components(var->type); 1111 sf->num_reg = MAX2(sf->num_reg, idx+1); 1112 count++; 1113 } 1114 assert(sf->num_reg == count); 1115 } 1116 1117 NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_uniform, etna_glsl_type_size, 1118 (nir_lower_io_options)0); 1119 1120 NIR_PASS_V(s, nir_lower_regs_to_ssa); 1121 NIR_PASS_V(s, nir_lower_vars_to_ssa); 1122 NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX); 1123 NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u, .lower_invalid_implicit_lod = true, }); 1124 1125 if (v->key.has_sample_tex_compare) 1126 NIR_PASS_V(s, nir_lower_tex_shadow, v->key.num_texture_states, 1127 v->key.tex_compare_func, 1128 v->key.tex_swizzle); 1129 1130 NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); 1131 nir_lower_idiv_options idiv_options = { 1132 .imprecise_32bit_lowering = true, 1133 .allow_fp16 = true, 1134 }; 1135 NIR_PASS_V(s, nir_lower_idiv, &idiv_options); 1136 1137 etna_optimize_loop(s); 1138 1139 /* TODO: remove this extra run if nir_opt_peephole_select is able to handle ubo's. */ 1140 if (OPT(s, etna_nir_lower_ubo_to_uniform)) 1141 etna_optimize_loop(s); 1142 1143 NIR_PASS_V(s, etna_lower_io, v); 1144 1145 if (v->shader->specs->vs_need_z_div) 1146 NIR_PASS_V(s, nir_lower_clip_halfz); 1147 1148 /* lower pre-halti2 to float (halti0 has integers, but only scalar..) */ 1149 if (c->specs->halti < 2) { 1150 /* use opt_algebraic between int_to_float and boot_to_float because 1151 * int_to_float emits ftrunc, and ftrunc lowering generates bool ops 1152 */ 1153 NIR_PASS_V(s, nir_lower_int_to_float); 1154 NIR_PASS_V(s, nir_opt_algebraic); 1155 NIR_PASS_V(s, nir_lower_bool_to_float); 1156 } else { 1157 NIR_PASS_V(s, nir_lower_bool_to_int32); 1158 } 1159 1160 while( OPT(s, nir_opt_vectorize, NULL, NULL) ); 1161 NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); 1162 1163 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); 1164 NIR_PASS_V(s, nir_opt_algebraic_late); 1165 1166 NIR_PASS_V(s, nir_move_vec_src_uses_to_dest); 1167 NIR_PASS_V(s, nir_copy_prop); 1168 /* only HW supported integer source mod is ineg for iadd instruction (?) */ 1169 NIR_PASS_V(s, nir_lower_to_source_mods, ~nir_lower_int_source_mods); 1170 /* need copy prop after uses_to_dest, and before src mods: see 1171 * dEQP-GLES2.functional.shaders.random.all_features.fragment.95 1172 */ 1173 1174 NIR_PASS_V(s, nir_opt_dce); 1175 1176 NIR_PASS_V(s, nir_lower_bool_to_bitsize); 1177 NIR_PASS_V(s, etna_lower_alu, c->specs->has_new_transcendentals); 1178 1179 if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) 1180 nir_print_shader(s, stdout); 1181 1182 unsigned block_ptr[nir_shader_get_entrypoint(s)->num_blocks]; 1183 c->block_ptr = block_ptr; 1184 1185 unsigned num_consts; 1186 ASSERTED bool ok = emit_shader(c, &v->num_temps, &num_consts); 1187 assert(ok); 1188 1189 /* empty shader, emit NOP */ 1190 if (!c->inst_ptr) 1191 emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_NOP }); 1192 1193 /* assemble instructions, fixing up labels */ 1194 uint32_t *code = MALLOC(c->inst_ptr * 16); 1195 for (unsigned i = 0; i < c->inst_ptr; i++) { 1196 struct etna_inst *inst = &c->code[i]; 1197 if (inst->opcode == INST_OPCODE_BRANCH) 1198 inst->imm = block_ptr[inst->imm]; 1199 1200 inst->no_oneconst_limit = specs->has_no_oneconst_limit; 1201 etna_assemble(&code[i * 4], inst); 1202 } 1203 1204 v->code_size = c->inst_ptr * 4; 1205 v->code = code; 1206 v->needs_icache = c->inst_ptr > specs->max_instructions; 1207 1208 copy_uniform_state_to_shader(v, c->consts, num_consts); 1209 1210 if (s->info.stage == MESA_SHADER_FRAGMENT) { 1211 v->input_count_unk8 = 31; /* XXX what is this */ 1212 assert(v->ps_depth_out_reg <= 0); 1213 } else { 1214 fill_vs_mystery(v); 1215 } 1216 1217 bool result = etna_compile_check_limits(v); 1218 ralloc_free(c->nir); 1219 FREE(c); 1220 return result; 1221} 1222 1223static const struct etna_shader_inout * 1224etna_shader_vs_lookup(const struct etna_shader_variant *sobj, 1225 const struct etna_shader_inout *in) 1226{ 1227 for (int i = 0; i < sobj->outfile.num_reg; i++) 1228 if (sobj->outfile.reg[i].slot == in->slot) 1229 return &sobj->outfile.reg[i]; 1230 1231 return NULL; 1232} 1233 1234bool 1235etna_link_shader(struct etna_shader_link_info *info, 1236 const struct etna_shader_variant *vs, 1237 const struct etna_shader_variant *fs) 1238{ 1239 int comp_ofs = 0; 1240 /* For each fragment input we need to find the associated vertex shader 1241 * output, which can be found by matching on semantic name and index. A 1242 * binary search could be used because the vs outputs are sorted by their 1243 * semantic index and grouped by semantic type by fill_in_vs_outputs. 1244 */ 1245 assert(fs->infile.num_reg < ETNA_NUM_INPUTS); 1246 info->pcoord_varying_comp_ofs = -1; 1247 1248 for (int idx = 0; idx < fs->infile.num_reg; ++idx) { 1249 const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; 1250 const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); 1251 struct etna_varying *varying; 1252 bool interpolate_always = true; 1253 1254 assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); 1255 1256 if (fsio->reg > info->num_varyings) 1257 info->num_varyings = fsio->reg; 1258 1259 varying = &info->varyings[fsio->reg - 1]; 1260 varying->num_components = fsio->num_components; 1261 1262 if (!interpolate_always) /* colors affected by flat shading */ 1263 varying->pa_attributes = 0x200; 1264 else /* texture coord or other bypasses flat shading */ 1265 varying->pa_attributes = 0x2f1; 1266 1267 varying->use[0] = VARYING_COMPONENT_USE_UNUSED; 1268 varying->use[1] = VARYING_COMPONENT_USE_UNUSED; 1269 varying->use[2] = VARYING_COMPONENT_USE_UNUSED; 1270 varying->use[3] = VARYING_COMPONENT_USE_UNUSED; 1271 1272 /* point/tex coord is an input to the PS without matching VS output, 1273 * so it gets a varying slot without being assigned a VS register. 1274 */ 1275 if (fsio->slot == VARYING_SLOT_PNTC) { 1276 varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X; 1277 varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y; 1278 1279 info->pcoord_varying_comp_ofs = comp_ofs; 1280 } else if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) { 1281 /* 1282 * Do nothing, TexCoord is lowered to PointCoord above 1283 * and the TexCoord here is just a remnant. This needs 1284 * to be removed with some nir_remove_dead_variables(), 1285 * but that one removes all FS inputs ... why? 1286 */ 1287 } else { 1288 if (vsio == NULL) { /* not found -- link error */ 1289 BUG("Semantic value not found in vertex shader outputs\n"); 1290 return true; 1291 } 1292 varying->reg = vsio->reg; 1293 } 1294 1295 comp_ofs += varying->num_components; 1296 } 1297 1298 assert(info->num_varyings == fs->infile.num_reg); 1299 1300 return false; 1301} 1302