1/* 2 * Copyright © 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "compiler/v3d_compiler.h" 25#include "compiler/nir/nir_builder.h" 26 27#include "util/u_helpers.h" 28 29/** 30 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io 31 * intrinsics into something amenable to the V3D architecture. 32 * 33 * Most of the work is turning the VS's store_output intrinsics from working 34 * on a base representing the gallium-level vec4 driver_location to an offset 35 * within the VPM, and emitting the header that's read by the fixed function 36 * hardware between the VS and FS. 37 * 38 * We also adjust the offsets on uniform loads to be in bytes, since that's 39 * what we need for indirect addressing with general TMU access. 40 */ 41 42struct v3d_nir_lower_io_state { 43 int pos_vpm_offset; 44 int vp_vpm_offset; 45 int zs_vpm_offset; 46 int rcp_wc_vpm_offset; 47 int psiz_vpm_offset; 48 int varyings_vpm_offset; 49 50 /* Geometry shader state */ 51 struct { 52 /* VPM offset for the current vertex data output */ 53 nir_variable *output_offset_var; 54 /* VPM offset for the current vertex header */ 55 nir_variable *header_offset_var; 56 /* VPM header for the current vertex */ 57 nir_variable *header_var; 58 59 /* Size of the complete VPM output header */ 60 uint32_t output_header_size; 61 /* Size of the output data for a single vertex */ 62 uint32_t output_vertex_data_size; 63 } gs; 64 65 BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)]; 66 67 nir_ssa_def *pos[4]; 68}; 69 70static void 71v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b, 72 struct v3d_nir_lower_io_state *state); 73 74static void 75v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset, 76 nir_ssa_def *chan) 77{ 78 if (offset) { 79 /* When generating the VIR instruction, the base and the offset 80 * are just going to get added together with an ADD instruction 81 * so we might as well do the add here at the NIR level instead 82 * and let the constant folding do its magic. 83 */ 84 offset = nir_iadd_imm(b, offset, base); 85 base = 0; 86 } else { 87 offset = nir_imm_int(b, 0); 88 } 89 90 nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0); 91} 92 93/* Convert the uniform offset to bytes. If it happens to be a constant, 94 * constant-folding will clean up the shift for us. 95 */ 96static void 97v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b, 98 nir_intrinsic_instr *intr) 99{ 100 /* On SPIR-V/Vulkan we are already getting our offsets in 101 * bytes. 102 */ 103 if (c->key->environment == V3D_ENVIRONMENT_VULKAN) 104 return; 105 106 b->cursor = nir_before_instr(&intr->instr); 107 108 nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16); 109 110 nir_instr_rewrite_src(&intr->instr, 111 &intr->src[0], 112 nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, 113 nir_imm_int(b, 4)))); 114} 115 116static int 117v3d_varying_slot_vpm_offset(struct v3d_compile *c, unsigned location, unsigned component) 118{ 119 uint32_t num_used_outputs = 0; 120 struct v3d_varying_slot *used_outputs = NULL; 121 switch (c->s->info.stage) { 122 case MESA_SHADER_VERTEX: 123 num_used_outputs = c->vs_key->num_used_outputs; 124 used_outputs = c->vs_key->used_outputs; 125 break; 126 case MESA_SHADER_GEOMETRY: 127 num_used_outputs = c->gs_key->num_used_outputs; 128 used_outputs = c->gs_key->used_outputs; 129 break; 130 default: 131 unreachable("Unsupported shader stage"); 132 } 133 134 for (int i = 0; i < num_used_outputs; i++) { 135 struct v3d_varying_slot slot = used_outputs[i]; 136 137 if (v3d_slot_get_slot(slot) == location && 138 v3d_slot_get_component(slot) == component) { 139 return i; 140 } 141 } 142 143 return -1; 144} 145 146/* Lowers a store_output(gallium driver location) to a series of store_outputs 147 * with a driver_location equal to the offset in the VPM. 148 * 149 * For geometry shaders we need to emit multiple vertices so the VPM offsets 150 * need to be computed in the shader code based on the current vertex index. 151 */ 152static void 153v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b, 154 nir_intrinsic_instr *intr, 155 struct v3d_nir_lower_io_state *state) 156{ 157 b->cursor = nir_before_instr(&intr->instr); 158 159 /* If this is a geometry shader we need to emit our outputs 160 * to the current vertex offset in the VPM. 161 */ 162 nir_ssa_def *offset_reg = 163 c->s->info.stage == MESA_SHADER_GEOMETRY ? 164 nir_load_var(b, state->gs.output_offset_var) : NULL; 165 166 int start_comp = nir_intrinsic_component(intr); 167 unsigned location = nir_intrinsic_io_semantics(intr).location; 168 nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0], 169 intr->num_components); 170 /* Save off the components of the position for the setup of VPM inputs 171 * read by fixed function HW. 172 */ 173 if (location == VARYING_SLOT_POS) { 174 for (int i = 0; i < intr->num_components; i++) { 175 state->pos[start_comp + i] = nir_channel(b, src, i); 176 } 177 } 178 179 /* Just psiz to the position in the FF header right now. */ 180 if (location == VARYING_SLOT_PSIZ && 181 state->psiz_vpm_offset != -1) { 182 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src); 183 } 184 185 if (location == VARYING_SLOT_LAYER) { 186 assert(c->s->info.stage == MESA_SHADER_GEOMETRY); 187 nir_ssa_def *header = nir_load_var(b, state->gs.header_var); 188 header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff)); 189 190 /* From the GLES 3.2 spec: 191 * 192 * "When fragments are written to a layered framebuffer, the 193 * fragment’s layer number selects an image from the array 194 * of images at each attachment (...). If the fragment’s 195 * layer number is negative, or greater than or equal to 196 * the minimum number of layers of any attachment, the 197 * effects of the fragment on the framebuffer contents are 198 * undefined." 199 * 200 * This suggests we can just ignore that situation, however, 201 * for V3D an out-of-bounds layer index means that the binner 202 * might do out-of-bounds writes access to the tile state. The 203 * simulator has an assert to catch this, so we play safe here 204 * and we make sure that doesn't happen by setting gl_Layer 205 * to 0 in that case (we always allocate tile state for at 206 * least one layer). 207 */ 208 nir_ssa_def *fb_layers = nir_load_fb_layers_v3d(b, 32); 209 nir_ssa_def *cond = nir_ige(b, src, fb_layers); 210 nir_ssa_def *layer_id = 211 nir_bcsel(b, cond, 212 nir_imm_int(b, 0), 213 nir_ishl(b, src, nir_imm_int(b, 16))); 214 header = nir_ior(b, header, layer_id); 215 nir_store_var(b, state->gs.header_var, header, 0x1); 216 } 217 218 /* Scalarize outputs if it hasn't happened already, since we want to 219 * schedule each VPM write individually. We can skip any outut 220 * components not read by the FS. 221 */ 222 for (int i = 0; i < intr->num_components; i++) { 223 int vpm_offset = 224 v3d_varying_slot_vpm_offset(c, location, start_comp + i); 225 226 227 if (vpm_offset == -1) 228 continue; 229 230 if (nir_src_is_const(intr->src[1])) 231 vpm_offset += nir_src_as_uint(intr->src[1]) * 4; 232 233 BITSET_SET(state->varyings_stored, vpm_offset); 234 235 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset, 236 offset_reg, nir_channel(b, src, i)); 237 } 238 239 nir_instr_remove(&intr->instr); 240} 241 242static inline void 243reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state) 244{ 245 const uint8_t NEW_PRIMITIVE_OFFSET = 0; 246 const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8; 247 248 uint32_t vertex_data_size = state->gs.output_vertex_data_size; 249 assert((vertex_data_size & 0xffffff00) == 0); 250 251 uint32_t header; 252 header = 1 << NEW_PRIMITIVE_OFFSET; 253 header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET; 254 nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1); 255} 256 257static void 258v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b, 259 nir_intrinsic_instr *instr, 260 struct v3d_nir_lower_io_state *state) 261{ 262 b->cursor = nir_before_instr(&instr->instr); 263 264 nir_ssa_def *header = nir_load_var(b, state->gs.header_var); 265 nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var); 266 nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var); 267 268 /* Emit fixed function outputs */ 269 v3d_nir_emit_ff_vpm_outputs(c, b, state); 270 271 /* Emit vertex header */ 272 v3d_nir_store_output(b, 0, header_offset, header); 273 274 /* Update VPM offset for next vertex output data and header */ 275 output_offset = 276 nir_iadd(b, output_offset, 277 nir_imm_int(b, state->gs.output_vertex_data_size)); 278 279 header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1)); 280 281 /* Reset the New Primitive bit */ 282 header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe)); 283 284 nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1); 285 nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1); 286 nir_store_var(b, state->gs.header_var, header, 0x1); 287 288 nir_instr_remove(&instr->instr); 289} 290 291static void 292v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b, 293 nir_intrinsic_instr *instr, 294 struct v3d_nir_lower_io_state *state) 295{ 296 assert(state->gs.header_var); 297 b->cursor = nir_before_instr(&instr->instr); 298 reset_gs_header(b, state); 299 300 nir_instr_remove(&instr->instr); 301} 302 303/* Some vertex attribute formats may require to apply a swizzle but the hardware 304 * doesn't provide means to do that, so we need to apply the swizzle in the 305 * vertex shader. 306 * 307 * This is required at least in Vulkan to support madatory vertex attribute 308 * format VK_FORMAT_B8G8R8A8_UNORM. 309 */ 310static void 311v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b, 312 nir_intrinsic_instr *instr) 313{ 314 assert(c->s->info.stage == MESA_SHADER_VERTEX); 315 316 if (!c->vs_key->va_swap_rb_mask) 317 return; 318 319 const uint32_t location = nir_intrinsic_io_semantics(instr).location; 320 321 if (!(c->vs_key->va_swap_rb_mask & (1 << location))) 322 return; 323 324 assert(instr->num_components == 1); 325 const uint32_t comp = nir_intrinsic_component(instr); 326 if (comp == 0 || comp == 2) 327 nir_intrinsic_set_component(instr, (comp + 2) % 4); 328} 329 330/* Sometimes the origin of gl_PointCoord is in the upper left rather than the 331 * lower left so we need to flip it. 332 * 333 * This is needed for Vulkan, Gallium uses lower_wpos_pntc. 334 */ 335static void 336v3d_nir_lower_fragment_input(struct v3d_compile *c, nir_builder *b, 337 nir_intrinsic_instr *intr) 338{ 339 assert(c->s->info.stage == MESA_SHADER_FRAGMENT); 340 341 /* Gallium uses lower_wpos_pntc */ 342 if (c->key->environment == V3D_ENVIRONMENT_OPENGL) 343 return; 344 345 b->cursor = nir_after_instr(&intr->instr); 346 347 int comp = nir_intrinsic_component(intr); 348 349 nir_variable *input_var = 350 nir_find_variable_with_driver_location(c->s, 351 nir_var_shader_in, 352 nir_intrinsic_base(intr)); 353 354 if (input_var && util_varying_is_point_coord(input_var->data.location, 355 c->fs_key->point_sprite_mask)) { 356 assert(intr->num_components == 1); 357 358 nir_ssa_def *result = &intr->dest.ssa; 359 360 switch (comp) { 361 case 0: 362 case 1: 363 if (!c->fs_key->is_points) 364 result = nir_imm_float(b, 0.0); 365 break; 366 case 2: 367 result = nir_imm_float(b, 0.0); 368 break; 369 case 3: 370 result = nir_imm_float(b, 1.0); 371 break; 372 } 373 if (c->fs_key->point_coord_upper_left && comp == 1) 374 result = nir_fsub(b, nir_imm_float(b, 1.0), result); 375 if (result != &intr->dest.ssa) { 376 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, 377 result, 378 result->parent_instr); 379 } 380 } 381} 382 383static void 384v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b, 385 struct nir_instr *instr, 386 struct v3d_nir_lower_io_state *state) 387{ 388 if (instr->type != nir_instr_type_intrinsic) 389 return; 390 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 391 392 switch (intr->intrinsic) { 393 case nir_intrinsic_load_input: 394 if (c->s->info.stage == MESA_SHADER_VERTEX) 395 v3d_nir_lower_vertex_input(c, b, intr); 396 else if (c->s->info.stage == MESA_SHADER_FRAGMENT) 397 v3d_nir_lower_fragment_input(c, b, intr); 398 break; 399 400 case nir_intrinsic_load_uniform: 401 v3d_nir_lower_uniform(c, b, intr); 402 break; 403 404 case nir_intrinsic_store_output: 405 if (c->s->info.stage == MESA_SHADER_VERTEX || 406 c->s->info.stage == MESA_SHADER_GEOMETRY) { 407 v3d_nir_lower_vpm_output(c, b, intr, state); 408 } 409 break; 410 411 case nir_intrinsic_emit_vertex: 412 v3d_nir_lower_emit_vertex(c, b, intr, state); 413 break; 414 415 case nir_intrinsic_end_primitive: 416 v3d_nir_lower_end_primitive(c, b, intr, state); 417 break; 418 419 default: 420 break; 421 } 422} 423 424/* Remap the output var's .driver_location. This is purely for 425 * nir_print_shader() so that store_output can map back to a variable name. 426 */ 427static void 428v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c, 429 struct v3d_nir_lower_io_state *state) 430{ 431 nir_foreach_shader_out_variable_safe(var, c->s) { 432 if (var->data.location == VARYING_SLOT_POS && 433 state->pos_vpm_offset != -1) { 434 var->data.driver_location = state->pos_vpm_offset; 435 continue; 436 } 437 438 if (var->data.location == VARYING_SLOT_PSIZ && 439 state->psiz_vpm_offset != -1) { 440 var->data.driver_location = state->psiz_vpm_offset; 441 continue; 442 } 443 444 int vpm_offset = 445 v3d_varying_slot_vpm_offset(c, 446 var->data.location, 447 var->data.location_frac); 448 if (vpm_offset != -1) { 449 var->data.driver_location = 450 state->varyings_vpm_offset + vpm_offset; 451 } else { 452 /* If we couldn't find a mapping for the var, delete 453 * it so that its old .driver_location doesn't confuse 454 * nir_print_shader(). 455 */ 456 exec_node_remove(&var->node); 457 } 458 } 459} 460 461static void 462v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c, 463 struct v3d_nir_lower_io_state *state) 464{ 465 uint32_t vpm_offset = 0; 466 467 state->pos_vpm_offset = -1; 468 state->vp_vpm_offset = -1; 469 state->zs_vpm_offset = -1; 470 state->rcp_wc_vpm_offset = -1; 471 state->psiz_vpm_offset = -1; 472 473 bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage; 474 if (needs_ff_outputs) { 475 if (c->vs_key->is_coord) { 476 state->pos_vpm_offset = vpm_offset; 477 vpm_offset += 4; 478 } 479 480 state->vp_vpm_offset = vpm_offset; 481 vpm_offset += 2; 482 483 if (!c->vs_key->is_coord) { 484 state->zs_vpm_offset = vpm_offset++; 485 state->rcp_wc_vpm_offset = vpm_offset++; 486 } 487 488 if (c->vs_key->per_vertex_point_size) 489 state->psiz_vpm_offset = vpm_offset++; 490 } 491 492 state->varyings_vpm_offset = vpm_offset; 493 494 c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs); 495} 496 497static void 498v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c, 499 struct v3d_nir_lower_io_state *state) 500{ 501 /* 1 header slot for number of output vertices */ 502 uint32_t vpm_offset = 1; 503 504 /* 1 header slot per output vertex */ 505 const uint32_t num_vertices = c->s->info.gs.vertices_out; 506 vpm_offset += num_vertices; 507 508 state->gs.output_header_size = vpm_offset; 509 510 /* Vertex data: here we only compute offsets into a generic vertex data 511 * elements. When it is time to actually write a particular vertex to 512 * the VPM, we will add the offset for that vertex into the VPM output 513 * to these offsets. 514 * 515 * If geometry shaders are present, they are always the last shader 516 * stage before rasterization, so we always emit fixed function outputs. 517 */ 518 vpm_offset = 0; 519 if (c->gs_key->is_coord) { 520 state->pos_vpm_offset = vpm_offset; 521 vpm_offset += 4; 522 } else { 523 state->pos_vpm_offset = -1; 524 } 525 526 state->vp_vpm_offset = vpm_offset; 527 vpm_offset += 2; 528 529 if (!c->gs_key->is_coord) { 530 state->zs_vpm_offset = vpm_offset++; 531 state->rcp_wc_vpm_offset = vpm_offset++; 532 } else { 533 state->zs_vpm_offset = -1; 534 state->rcp_wc_vpm_offset = -1; 535 } 536 537 /* Mesa enables OES_geometry_shader_point_size automatically with 538 * OES_geometry_shader so we always need to handle point size 539 * writes if present. 540 */ 541 if (c->gs_key->per_vertex_point_size) 542 state->psiz_vpm_offset = vpm_offset++; 543 544 state->varyings_vpm_offset = vpm_offset; 545 546 state->gs.output_vertex_data_size = 547 state->varyings_vpm_offset + c->gs_key->num_used_outputs; 548 549 c->vpm_output_size = 550 state->gs.output_header_size + 551 state->gs.output_vertex_data_size * num_vertices; 552} 553 554static void 555v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b, 556 struct v3d_nir_lower_io_state *state) 557{ 558 /* If this is a geometry shader we need to emit our fixed function 559 * outputs to the current vertex offset in the VPM. 560 */ 561 nir_ssa_def *offset_reg = 562 c->s->info.stage == MESA_SHADER_GEOMETRY ? 563 nir_load_var(b, state->gs.output_offset_var) : NULL; 564 565 for (int i = 0; i < 4; i++) { 566 if (!state->pos[i]) 567 state->pos[i] = nir_ssa_undef(b, 1, 32); 568 } 569 570 nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]); 571 572 if (state->pos_vpm_offset != -1) { 573 for (int i = 0; i < 4; i++) { 574 v3d_nir_store_output(b, state->pos_vpm_offset + i, 575 offset_reg, state->pos[i]); 576 } 577 } 578 579 if (state->vp_vpm_offset != -1) { 580 for (int i = 0; i < 2; i++) { 581 nir_ssa_def *pos; 582 nir_ssa_def *scale; 583 pos = state->pos[i]; 584 if (i == 0) 585 scale = nir_load_viewport_x_scale(b); 586 else 587 scale = nir_load_viewport_y_scale(b); 588 pos = nir_fmul(b, pos, scale); 589 pos = nir_fmul(b, pos, rcp_wc); 590 /* Pre-V3D 4.3 hardware has a quirk where it expects XY 591 * coordinates in .8 fixed-point format, but then it 592 * will internally round it to .6 fixed-point, 593 * introducing a double rounding. The double rounding 594 * can cause very slight differences in triangle 595 * raterization coverage that can actually be noticed by 596 * some CTS tests. 597 * 598 * The correct fix for this as recommended by Broadcom 599 * is to convert to .8 fixed-point with ffloor(). 600 */ 601 pos = nir_f2i32(b, nir_ffloor(b, pos)); 602 v3d_nir_store_output(b, state->vp_vpm_offset + i, 603 offset_reg, pos); 604 } 605 } 606 607 if (state->zs_vpm_offset != -1) { 608 nir_ssa_def *z = state->pos[2]; 609 z = nir_fmul(b, z, nir_load_viewport_z_scale(b)); 610 z = nir_fmul(b, z, rcp_wc); 611 z = nir_fadd(b, z, nir_load_viewport_z_offset(b)); 612 v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z); 613 } 614 615 if (state->rcp_wc_vpm_offset != -1) { 616 v3d_nir_store_output(b, state->rcp_wc_vpm_offset, 617 offset_reg, rcp_wc); 618 } 619 620 /* Store 0 to varyings requested by the FS but not stored by the 621 * previous stage. This should be undefined behavior, but 622 * glsl-routing seems to rely on it. 623 */ 624 uint32_t num_used_outputs; 625 switch (c->s->info.stage) { 626 case MESA_SHADER_VERTEX: 627 num_used_outputs = c->vs_key->num_used_outputs; 628 break; 629 case MESA_SHADER_GEOMETRY: 630 num_used_outputs = c->gs_key->num_used_outputs; 631 break; 632 default: 633 unreachable("Unsupported shader stage"); 634 } 635 636 for (int i = 0; i < num_used_outputs; i++) { 637 if (!BITSET_TEST(state->varyings_stored, i)) { 638 v3d_nir_store_output(b, state->varyings_vpm_offset + i, 639 offset_reg, nir_imm_int(b, 0)); 640 } 641 } 642} 643 644static void 645emit_gs_prolog(struct v3d_compile *c, nir_builder *b, 646 nir_function_impl *impl, 647 struct v3d_nir_lower_io_state *state) 648{ 649 nir_block *first = nir_start_block(impl); 650 b->cursor = nir_before_block(first); 651 652 const struct glsl_type *uint_type = glsl_uint_type(); 653 654 assert(!state->gs.output_offset_var); 655 state->gs.output_offset_var = 656 nir_local_variable_create(impl, uint_type, "output_offset"); 657 nir_store_var(b, state->gs.output_offset_var, 658 nir_imm_int(b, state->gs.output_header_size), 0x1); 659 660 assert(!state->gs.header_offset_var); 661 state->gs.header_offset_var = 662 nir_local_variable_create(impl, uint_type, "header_offset"); 663 nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1); 664 665 assert(!state->gs.header_var); 666 state->gs.header_var = 667 nir_local_variable_create(impl, uint_type, "header"); 668 reset_gs_header(b, state); 669} 670 671static void 672emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b, 673 struct v3d_nir_lower_io_state *state) 674{ 675 const uint8_t VERTEX_COUNT_OFFSET = 16; 676 677 /* Our GS header has 1 generic header slot (at VPM offset 0) and then 678 * one slot per output vertex after it. This means we don't need to 679 * have a variable just to keep track of the number of vertices we 680 * emitted and instead we can just compute it here from the header 681 * offset variable by removing the one generic header slot that always 682 * goes at the begining of out header. 683 */ 684 nir_ssa_def *header_offset = 685 nir_load_var(b, state->gs.header_offset_var); 686 nir_ssa_def *vertex_count = 687 nir_isub(b, header_offset, nir_imm_int(b, 1)); 688 nir_ssa_def *header = 689 nir_ior(b, nir_imm_int(b, state->gs.output_header_size), 690 nir_ishl(b, vertex_count, 691 nir_imm_int(b, VERTEX_COUNT_OFFSET))); 692 693 v3d_nir_store_output(b, 0, NULL, header); 694} 695 696bool 697v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c) 698{ 699 struct v3d_nir_lower_io_state state = { 0 }; 700 701 /* Set up the layout of the VPM outputs. */ 702 switch (s->info.stage) { 703 case MESA_SHADER_VERTEX: 704 v3d_nir_setup_vpm_layout_vs(c, &state); 705 break; 706 case MESA_SHADER_GEOMETRY: 707 v3d_nir_setup_vpm_layout_gs(c, &state); 708 break; 709 case MESA_SHADER_FRAGMENT: 710 case MESA_SHADER_COMPUTE: 711 break; 712 default: 713 unreachable("Unsupported shader stage"); 714 } 715 716 nir_foreach_function(function, s) { 717 if (function->impl) { 718 nir_builder b; 719 nir_builder_init(&b, function->impl); 720 721 if (c->s->info.stage == MESA_SHADER_GEOMETRY) 722 emit_gs_prolog(c, &b, function->impl, &state); 723 724 nir_foreach_block(block, function->impl) { 725 nir_foreach_instr_safe(instr, block) 726 v3d_nir_lower_io_instr(c, &b, instr, 727 &state); 728 } 729 730 nir_block *last = nir_impl_last_block(function->impl); 731 b.cursor = nir_after_block(last); 732 if (s->info.stage == MESA_SHADER_VERTEX) { 733 v3d_nir_emit_ff_vpm_outputs(c, &b, &state); 734 } else if (s->info.stage == MESA_SHADER_GEOMETRY) { 735 emit_gs_vpm_output_header_prolog(c, &b, &state); 736 } 737 738 nir_metadata_preserve(function->impl, 739 nir_metadata_block_index | 740 nir_metadata_dominance); 741 } 742 } 743 744 if (s->info.stage == MESA_SHADER_VERTEX || 745 s->info.stage == MESA_SHADER_GEOMETRY) { 746 v3d_nir_lower_io_update_output_var_base(c, &state); 747 } 748 749 /* It is really unlikely that we don't get progress here, and fully 750 * filtering when not would make code more complex, but we are still 751 * interested on getting this lowering going through NIR_PASS 752 */ 753 return true; 754} 755