1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * Jason Ekstrand (jason@jlekstrand.net) 26 * 27 */ 28 29/* 30 * This lowering pass converts references to input/output variables with 31 * loads/stores to actual input/output intrinsics. 32 */ 33 34#include "nir.h" 35#include "nir_builder.h" 36#include "nir_deref.h" 37#include "nir_xfb_info.h" 38 39#include "util/u_math.h" 40 41struct lower_io_state { 42 void *dead_ctx; 43 nir_builder builder; 44 int (*type_size)(const struct glsl_type *type, bool); 45 nir_variable_mode modes; 46 nir_lower_io_options options; 47}; 48 49static nir_intrinsic_op 50ssbo_atomic_for_deref(nir_intrinsic_op deref_op) 51{ 52 switch (deref_op) { 53#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O; 54 OP(atomic_exchange) 55 OP(atomic_comp_swap) 56 OP(atomic_add) 57 OP(atomic_imin) 58 OP(atomic_umin) 59 OP(atomic_imax) 60 OP(atomic_umax) 61 OP(atomic_and) 62 OP(atomic_or) 63 OP(atomic_xor) 64 OP(atomic_fadd) 65 OP(atomic_fmin) 66 OP(atomic_fmax) 67 OP(atomic_fcomp_swap) 68#undef OP 69 default: 70 unreachable("Invalid SSBO atomic"); 71 } 72} 73 74static nir_intrinsic_op 75global_atomic_for_deref(nir_address_format addr_format, 76 nir_intrinsic_op deref_op) 77{ 78 switch (deref_op) { 79#define OP(O) case nir_intrinsic_deref_##O: \ 80 if (addr_format != nir_address_format_2x32bit_global) \ 81 return nir_intrinsic_global_##O; \ 82 else \ 83 return nir_intrinsic_global_##O##_2x32; 84 OP(atomic_exchange) 85 OP(atomic_comp_swap) 86 OP(atomic_add) 87 OP(atomic_imin) 88 OP(atomic_umin) 89 OP(atomic_imax) 90 OP(atomic_umax) 91 OP(atomic_and) 92 OP(atomic_or) 93 OP(atomic_xor) 94 OP(atomic_fadd) 95 OP(atomic_fmin) 96 OP(atomic_fmax) 97 OP(atomic_fcomp_swap) 98#undef OP 99 default: 100 unreachable("Invalid SSBO atomic"); 101 } 102} 103 104static nir_intrinsic_op 105shared_atomic_for_deref(nir_intrinsic_op deref_op) 106{ 107 switch (deref_op) { 108#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O; 109 OP(atomic_exchange) 110 OP(atomic_comp_swap) 111 OP(atomic_add) 112 OP(atomic_imin) 113 OP(atomic_umin) 114 OP(atomic_imax) 115 OP(atomic_umax) 116 OP(atomic_and) 117 OP(atomic_or) 118 OP(atomic_xor) 119 OP(atomic_fadd) 120 OP(atomic_fmin) 121 OP(atomic_fmax) 122 OP(atomic_fcomp_swap) 123#undef OP 124 default: 125 unreachable("Invalid shared atomic"); 126 } 127} 128 129static nir_intrinsic_op 130task_payload_atomic_for_deref(nir_intrinsic_op deref_op) 131{ 132 switch (deref_op) { 133#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_task_payload_##O; 134 OP(atomic_exchange) 135 OP(atomic_comp_swap) 136 OP(atomic_add) 137 OP(atomic_imin) 138 OP(atomic_umin) 139 OP(atomic_imax) 140 OP(atomic_umax) 141 OP(atomic_and) 142 OP(atomic_or) 143 OP(atomic_xor) 144 OP(atomic_fadd) 145 OP(atomic_fmin) 146 OP(atomic_fmax) 147 OP(atomic_fcomp_swap) 148#undef OP 149 default: 150 unreachable("Invalid task payload atomic"); 151 } 152} 153 154void 155nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, 156 unsigned *size, 157 int (*type_size)(const struct glsl_type *, bool)) 158{ 159 unsigned location = 0; 160 161 nir_foreach_variable_with_modes(var, shader, mode) { 162 var->data.driver_location = location; 163 bool bindless_type_size = var->data.mode == nir_var_shader_in || 164 var->data.mode == nir_var_shader_out || 165 var->data.bindless; 166 location += type_size(var->type, bindless_type_size); 167 } 168 169 *size = location; 170} 171 172/** 173 * Some inputs and outputs are arrayed, meaning that there is an extra level 174 * of array indexing to handle mismatches between the shader interface and the 175 * dispatch pattern of the shader. For instance, geometry shaders are 176 * executed per-primitive while their inputs and outputs are specified 177 * per-vertex so all inputs and outputs have to be additionally indexed with 178 * the vertex index within the primitive. 179 */ 180bool 181nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage) 182{ 183 if (var->data.patch || !glsl_type_is_array(var->type)) 184 return false; 185 186 if (stage == MESA_SHADER_MESH) { 187 /* NV_mesh_shader: this is flat array for the whole workgroup. */ 188 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) 189 return var->data.per_primitive; 190 } 191 192 if (var->data.mode == nir_var_shader_in) 193 return stage == MESA_SHADER_GEOMETRY || 194 stage == MESA_SHADER_TESS_CTRL || 195 stage == MESA_SHADER_TESS_EVAL; 196 197 if (var->data.mode == nir_var_shader_out) 198 return stage == MESA_SHADER_TESS_CTRL || 199 stage == MESA_SHADER_MESH; 200 201 return false; 202} 203 204static unsigned get_number_of_slots(struct lower_io_state *state, 205 const nir_variable *var) 206{ 207 const struct glsl_type *type = var->type; 208 209 if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) { 210 assert(glsl_type_is_array(type)); 211 type = glsl_get_array_element(type); 212 } 213 214 /* NV_mesh_shader: 215 * PRIMITIVE_INDICES is a flat array, not a proper arrayed output, 216 * as opposed to D3D-style mesh shaders where it's addressed by 217 * the primitive index. 218 * Prevent assigning several slots to primitive indices, 219 * to avoid some issues. 220 */ 221 if (state->builder.shader->info.stage == MESA_SHADER_MESH && 222 var->data.location == VARYING_SLOT_PRIMITIVE_INDICES && 223 !nir_is_arrayed_io(var, state->builder.shader->info.stage)) 224 return 1; 225 226 return state->type_size(type, var->data.bindless); 227} 228 229static nir_ssa_def * 230get_io_offset(nir_builder *b, nir_deref_instr *deref, 231 nir_ssa_def **array_index, 232 int (*type_size)(const struct glsl_type *, bool), 233 unsigned *component, bool bts) 234{ 235 nir_deref_path path; 236 nir_deref_path_init(&path, deref, NULL); 237 238 assert(path.path[0]->deref_type == nir_deref_type_var); 239 nir_deref_instr **p = &path.path[1]; 240 241 /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader 242 * inputs), skip the outermost array index. Process the rest normally. 243 */ 244 if (array_index != NULL) { 245 assert((*p)->deref_type == nir_deref_type_array); 246 *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1); 247 p++; 248 } 249 250 if (path.path[0]->var->data.compact) { 251 assert((*p)->deref_type == nir_deref_type_array); 252 assert(glsl_type_is_scalar((*p)->type)); 253 254 /* We always lower indirect dereferences for "compact" array vars. */ 255 const unsigned index = nir_src_as_uint((*p)->arr.index); 256 const unsigned total_offset = *component + index; 257 const unsigned slot_offset = total_offset / 4; 258 *component = total_offset % 4; 259 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset); 260 } 261 262 /* Just emit code and let constant-folding go to town */ 263 nir_ssa_def *offset = nir_imm_int(b, 0); 264 265 for (; *p; p++) { 266 if ((*p)->deref_type == nir_deref_type_array) { 267 unsigned size = type_size((*p)->type, bts); 268 269 nir_ssa_def *mul = 270 nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size); 271 272 offset = nir_iadd(b, offset, mul); 273 } else if ((*p)->deref_type == nir_deref_type_struct) { 274 /* p starts at path[1], so this is safe */ 275 nir_deref_instr *parent = *(p - 1); 276 277 unsigned field_offset = 0; 278 for (unsigned i = 0; i < (*p)->strct.index; i++) { 279 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts); 280 } 281 offset = nir_iadd_imm(b, offset, field_offset); 282 } else { 283 unreachable("Unsupported deref type"); 284 } 285 } 286 287 nir_deref_path_finish(&path); 288 289 return offset; 290} 291 292static nir_ssa_def * 293emit_load(struct lower_io_state *state, 294 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 295 unsigned component, unsigned num_components, unsigned bit_size, 296 nir_alu_type dest_type) 297{ 298 nir_builder *b = &state->builder; 299 const nir_shader *nir = b->shader; 300 nir_variable_mode mode = var->data.mode; 301 nir_ssa_def *barycentric = NULL; 302 303 nir_intrinsic_op op; 304 switch (mode) { 305 case nir_var_shader_in: 306 if (nir->info.stage == MESA_SHADER_FRAGMENT && 307 nir->options->use_interpolated_input_intrinsics && 308 var->data.interpolation != INTERP_MODE_FLAT && 309 !var->data.per_primitive) { 310 if (var->data.interpolation == INTERP_MODE_EXPLICIT) { 311 assert(array_index != NULL); 312 op = nir_intrinsic_load_input_vertex; 313 } else { 314 assert(array_index == NULL); 315 316 nir_intrinsic_op bary_op; 317 if (var->data.sample || 318 (state->options & nir_lower_io_force_sample_interpolation)) 319 bary_op = nir_intrinsic_load_barycentric_sample; 320 else if (var->data.centroid) 321 bary_op = nir_intrinsic_load_barycentric_centroid; 322 else 323 bary_op = nir_intrinsic_load_barycentric_pixel; 324 325 barycentric = nir_load_barycentric(&state->builder, bary_op, 326 var->data.interpolation); 327 op = nir_intrinsic_load_interpolated_input; 328 } 329 } else { 330 op = array_index ? nir_intrinsic_load_per_vertex_input : 331 nir_intrinsic_load_input; 332 } 333 break; 334 case nir_var_shader_out: 335 op = !array_index ? nir_intrinsic_load_output : 336 var->data.per_primitive ? nir_intrinsic_load_per_primitive_output : 337 nir_intrinsic_load_per_vertex_output; 338 break; 339 case nir_var_uniform: 340 op = nir_intrinsic_load_uniform; 341 break; 342 default: 343 unreachable("Unknown variable mode"); 344 } 345 346 nir_intrinsic_instr *load = 347 nir_intrinsic_instr_create(state->builder.shader, op); 348 load->num_components = num_components; 349 350 nir_intrinsic_set_base(load, var->data.driver_location); 351 if (mode == nir_var_shader_in || mode == nir_var_shader_out) 352 nir_intrinsic_set_component(load, component); 353 354 if (load->intrinsic == nir_intrinsic_load_uniform) 355 nir_intrinsic_set_range(load, 356 state->type_size(var->type, var->data.bindless)); 357 358 if (nir_intrinsic_has_access(load)) 359 nir_intrinsic_set_access(load, var->data.access); 360 361 nir_intrinsic_set_dest_type(load, dest_type); 362 363 if (load->intrinsic != nir_intrinsic_load_uniform) { 364 nir_io_semantics semantics = {0}; 365 semantics.location = var->data.location; 366 semantics.num_slots = get_number_of_slots(state, var); 367 semantics.fb_fetch_output = var->data.fb_fetch_output; 368 semantics.medium_precision = 369 var->data.precision == GLSL_PRECISION_MEDIUM || 370 var->data.precision == GLSL_PRECISION_LOW; 371 nir_intrinsic_set_io_semantics(load, semantics); 372 } 373 374 if (array_index) { 375 load->src[0] = nir_src_for_ssa(array_index); 376 load->src[1] = nir_src_for_ssa(offset); 377 } else if (barycentric) { 378 load->src[0] = nir_src_for_ssa(barycentric); 379 load->src[1] = nir_src_for_ssa(offset); 380 } else { 381 load->src[0] = nir_src_for_ssa(offset); 382 } 383 384 nir_ssa_dest_init(&load->instr, &load->dest, 385 num_components, bit_size, NULL); 386 nir_builder_instr_insert(b, &load->instr); 387 388 return &load->dest.ssa; 389} 390 391static nir_ssa_def * 392lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, 393 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 394 unsigned component, const struct glsl_type *type) 395{ 396 assert(intrin->dest.is_ssa); 397 if (intrin->dest.ssa.bit_size == 64 && 398 (state->options & nir_lower_io_lower_64bit_to_32)) { 399 nir_builder *b = &state->builder; 400 401 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false); 402 403 nir_ssa_def *comp64[4]; 404 assert(component == 0 || component == 2); 405 unsigned dest_comp = 0; 406 while (dest_comp < intrin->dest.ssa.num_components) { 407 const unsigned num_comps = 408 MIN2(intrin->dest.ssa.num_components - dest_comp, 409 (4 - component) / 2); 410 411 nir_ssa_def *data32 = 412 emit_load(state, array_index, var, offset, component, 413 num_comps * 2, 32, nir_type_uint32); 414 for (unsigned i = 0; i < num_comps; i++) { 415 comp64[dest_comp + i] = 416 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2))); 417 } 418 419 /* Only the first store has a component offset */ 420 component = 0; 421 dest_comp += num_comps; 422 offset = nir_iadd_imm(b, offset, slot_size); 423 } 424 425 return nir_vec(b, comp64, intrin->dest.ssa.num_components); 426 } else if (intrin->dest.ssa.bit_size == 1) { 427 /* Booleans are 32-bit */ 428 assert(glsl_type_is_boolean(type)); 429 return nir_b2b1(&state->builder, 430 emit_load(state, array_index, var, offset, component, 431 intrin->dest.ssa.num_components, 32, 432 nir_type_bool32)); 433 } else { 434 return emit_load(state, array_index, var, offset, component, 435 intrin->dest.ssa.num_components, 436 intrin->dest.ssa.bit_size, 437 nir_get_nir_type_for_glsl_type(type)); 438 } 439} 440 441static void 442emit_store(struct lower_io_state *state, nir_ssa_def *data, 443 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 444 unsigned component, unsigned num_components, 445 nir_component_mask_t write_mask, nir_alu_type src_type) 446{ 447 nir_builder *b = &state->builder; 448 449 assert(var->data.mode == nir_var_shader_out); 450 nir_intrinsic_op op = 451 !array_index ? nir_intrinsic_store_output : 452 var->data.per_primitive ? nir_intrinsic_store_per_primitive_output : 453 nir_intrinsic_store_per_vertex_output; 454 455 nir_intrinsic_instr *store = 456 nir_intrinsic_instr_create(state->builder.shader, op); 457 store->num_components = num_components; 458 459 store->src[0] = nir_src_for_ssa(data); 460 461 nir_intrinsic_set_base(store, var->data.driver_location); 462 nir_intrinsic_set_component(store, component); 463 nir_intrinsic_set_src_type(store, src_type); 464 465 nir_intrinsic_set_write_mask(store, write_mask); 466 467 if (nir_intrinsic_has_access(store)) 468 nir_intrinsic_set_access(store, var->data.access); 469 470 if (array_index) 471 store->src[1] = nir_src_for_ssa(array_index); 472 473 store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset); 474 475 unsigned gs_streams = 0; 476 if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) { 477 if (var->data.stream & NIR_STREAM_PACKED) { 478 gs_streams = var->data.stream & ~NIR_STREAM_PACKED; 479 } else { 480 assert(var->data.stream < 4); 481 gs_streams = 0; 482 for (unsigned i = 0; i < num_components; ++i) 483 gs_streams |= var->data.stream << (2 * i); 484 } 485 } 486 487 nir_io_semantics semantics = {0}; 488 semantics.location = var->data.location; 489 semantics.num_slots = get_number_of_slots(state, var); 490 semantics.dual_source_blend_index = var->data.index; 491 semantics.gs_streams = gs_streams; 492 semantics.medium_precision = 493 var->data.precision == GLSL_PRECISION_MEDIUM || 494 var->data.precision == GLSL_PRECISION_LOW; 495 semantics.per_view = var->data.per_view; 496 semantics.invariant = var->data.invariant; 497 498 nir_intrinsic_set_io_semantics(store, semantics); 499 500 nir_builder_instr_insert(b, &store->instr); 501} 502 503static void 504lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, 505 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset, 506 unsigned component, const struct glsl_type *type) 507{ 508 assert(intrin->src[1].is_ssa); 509 if (intrin->src[1].ssa->bit_size == 64 && 510 (state->options & nir_lower_io_lower_64bit_to_32)) { 511 nir_builder *b = &state->builder; 512 513 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false); 514 515 assert(component == 0 || component == 2); 516 unsigned src_comp = 0; 517 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 518 while (src_comp < intrin->num_components) { 519 const unsigned num_comps = 520 MIN2(intrin->num_components - src_comp, 521 (4 - component) / 2); 522 523 if (write_mask & BITFIELD_MASK(num_comps)) { 524 nir_ssa_def *data = 525 nir_channels(b, intrin->src[1].ssa, 526 BITFIELD_RANGE(src_comp, num_comps)); 527 nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32); 528 529 nir_component_mask_t write_mask32 = 0; 530 for (unsigned i = 0; i < num_comps; i++) { 531 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i)) 532 write_mask32 |= 3 << (i * 2); 533 } 534 535 emit_store(state, data32, array_index, var, offset, 536 component, data32->num_components, write_mask32, 537 nir_type_uint32); 538 } 539 540 /* Only the first store has a component offset */ 541 component = 0; 542 src_comp += num_comps; 543 write_mask >>= num_comps; 544 offset = nir_iadd_imm(b, offset, slot_size); 545 } 546 } else if (intrin->dest.ssa.bit_size == 1) { 547 /* Booleans are 32-bit */ 548 assert(glsl_type_is_boolean(type)); 549 nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa); 550 emit_store(state, b32_val, array_index, var, offset, 551 component, intrin->num_components, 552 nir_intrinsic_write_mask(intrin), 553 nir_type_bool32); 554 } else { 555 emit_store(state, intrin->src[1].ssa, array_index, var, offset, 556 component, intrin->num_components, 557 nir_intrinsic_write_mask(intrin), 558 nir_get_nir_type_for_glsl_type(type)); 559 } 560} 561 562static nir_ssa_def * 563lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, 564 nir_variable *var, nir_ssa_def *offset, unsigned component, 565 const struct glsl_type *type) 566{ 567 nir_builder *b = &state->builder; 568 assert(var->data.mode == nir_var_shader_in); 569 570 /* Ignore interpolateAt() for flat variables - flat is flat. Lower 571 * interpolateAtVertex() for explicit variables. 572 */ 573 if (var->data.interpolation == INTERP_MODE_FLAT || 574 var->data.interpolation == INTERP_MODE_EXPLICIT) { 575 nir_ssa_def *vertex_index = NULL; 576 577 if (var->data.interpolation == INTERP_MODE_EXPLICIT) { 578 assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex); 579 vertex_index = intrin->src[1].ssa; 580 } 581 582 return lower_load(intrin, state, vertex_index, var, offset, component, type); 583 } 584 585 /* None of the supported APIs allow interpolation on 64-bit things */ 586 assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32); 587 588 nir_intrinsic_op bary_op; 589 switch (intrin->intrinsic) { 590 case nir_intrinsic_interp_deref_at_centroid: 591 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ? 592 nir_intrinsic_load_barycentric_sample : 593 nir_intrinsic_load_barycentric_centroid; 594 break; 595 case nir_intrinsic_interp_deref_at_sample: 596 bary_op = nir_intrinsic_load_barycentric_at_sample; 597 break; 598 case nir_intrinsic_interp_deref_at_offset: 599 bary_op = nir_intrinsic_load_barycentric_at_offset; 600 break; 601 default: 602 unreachable("Bogus interpolateAt() intrinsic."); 603 } 604 605 nir_intrinsic_instr *bary_setup = 606 nir_intrinsic_instr_create(state->builder.shader, bary_op); 607 608 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL); 609 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation); 610 611 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || 612 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset || 613 intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex) 614 nir_src_copy(&bary_setup->src[0], &intrin->src[1]); 615 616 nir_builder_instr_insert(b, &bary_setup->instr); 617 618 nir_io_semantics semantics = {0}; 619 semantics.location = var->data.location; 620 semantics.num_slots = get_number_of_slots(state, var); 621 semantics.medium_precision = 622 var->data.precision == GLSL_PRECISION_MEDIUM || 623 var->data.precision == GLSL_PRECISION_LOW; 624 625 assert(intrin->dest.is_ssa); 626 nir_ssa_def *load = 627 nir_load_interpolated_input(&state->builder, 628 intrin->dest.ssa.num_components, 629 intrin->dest.ssa.bit_size, 630 &bary_setup->dest.ssa, 631 offset, 632 .base = var->data.driver_location, 633 .component = component, 634 .io_semantics = semantics); 635 636 return load; 637} 638 639static bool 640nir_lower_io_block(nir_block *block, 641 struct lower_io_state *state) 642{ 643 nir_builder *b = &state->builder; 644 const nir_shader_compiler_options *options = b->shader->options; 645 bool progress = false; 646 647 nir_foreach_instr_safe(instr, block) { 648 if (instr->type != nir_instr_type_intrinsic) 649 continue; 650 651 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 652 653 switch (intrin->intrinsic) { 654 case nir_intrinsic_load_deref: 655 case nir_intrinsic_store_deref: 656 /* We can lower the io for this nir instrinsic */ 657 break; 658 case nir_intrinsic_interp_deref_at_centroid: 659 case nir_intrinsic_interp_deref_at_sample: 660 case nir_intrinsic_interp_deref_at_offset: 661 case nir_intrinsic_interp_deref_at_vertex: 662 /* We can optionally lower these to load_interpolated_input */ 663 if (options->use_interpolated_input_intrinsics || 664 options->lower_interpolate_at) 665 break; 666 FALLTHROUGH; 667 default: 668 /* We can't lower the io for this nir instrinsic, so skip it */ 669 continue; 670 } 671 672 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 673 if (!nir_deref_mode_is_one_of(deref, state->modes)) 674 continue; 675 676 nir_variable *var = nir_deref_instr_get_variable(deref); 677 678 b->cursor = nir_before_instr(instr); 679 680 const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage); 681 682 nir_ssa_def *offset; 683 nir_ssa_def *array_index = NULL; 684 unsigned component_offset = var->data.location_frac; 685 bool bindless_type_size = var->data.mode == nir_var_shader_in || 686 var->data.mode == nir_var_shader_out || 687 var->data.bindless; 688 689 if (nir_deref_instr_is_known_out_of_bounds(deref)) { 690 /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says: 691 * 692 * In the subsections described above for array, vector, matrix and 693 * structure accesses, any out-of-bounds access produced undefined 694 * behavior.... 695 * Out-of-bounds reads return undefined values, which 696 * include values from other variables of the active program or zero. 697 * Out-of-bounds writes may be discarded or overwrite 698 * other variables of the active program. 699 * 700 * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero 701 * for reads. 702 * 703 * Otherwise get_io_offset would return out-of-bound offset which may 704 * result in out-of-bound loading/storing of inputs/outputs, 705 * that could cause issues in drivers down the line. 706 */ 707 if (intrin->intrinsic != nir_intrinsic_store_deref) { 708 nir_ssa_def *zero = 709 nir_imm_zero(b, intrin->dest.ssa.num_components, 710 intrin->dest.ssa.bit_size); 711 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 712 zero); 713 } 714 715 nir_instr_remove(&intrin->instr); 716 progress = true; 717 continue; 718 } 719 720 offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL, 721 state->type_size, &component_offset, 722 bindless_type_size); 723 724 nir_ssa_def *replacement = NULL; 725 726 switch (intrin->intrinsic) { 727 case nir_intrinsic_load_deref: 728 replacement = lower_load(intrin, state, array_index, var, offset, 729 component_offset, deref->type); 730 break; 731 732 case nir_intrinsic_store_deref: 733 lower_store(intrin, state, array_index, var, offset, 734 component_offset, deref->type); 735 break; 736 737 case nir_intrinsic_interp_deref_at_centroid: 738 case nir_intrinsic_interp_deref_at_sample: 739 case nir_intrinsic_interp_deref_at_offset: 740 case nir_intrinsic_interp_deref_at_vertex: 741 assert(array_index == NULL); 742 replacement = lower_interpolate_at(intrin, state, var, offset, 743 component_offset, deref->type); 744 break; 745 746 default: 747 continue; 748 } 749 750 if (replacement) { 751 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 752 replacement); 753 } 754 nir_instr_remove(&intrin->instr); 755 progress = true; 756 } 757 758 return progress; 759} 760 761static bool 762nir_lower_io_impl(nir_function_impl *impl, 763 nir_variable_mode modes, 764 int (*type_size)(const struct glsl_type *, bool), 765 nir_lower_io_options options) 766{ 767 struct lower_io_state state; 768 bool progress = false; 769 770 nir_builder_init(&state.builder, impl); 771 state.dead_ctx = ralloc_context(NULL); 772 state.modes = modes; 773 state.type_size = type_size; 774 state.options = options; 775 776 ASSERTED nir_variable_mode supported_modes = 777 nir_var_shader_in | nir_var_shader_out | nir_var_uniform; 778 assert(!(modes & ~supported_modes)); 779 780 nir_foreach_block(block, impl) { 781 progress |= nir_lower_io_block(block, &state); 782 } 783 784 ralloc_free(state.dead_ctx); 785 786 nir_metadata_preserve(impl, nir_metadata_none); 787 788 return progress; 789} 790 791/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics 792 * 793 * This pass is intended to be used for cross-stage shader I/O and driver- 794 * managed uniforms to turn deref-based access into a simpler model using 795 * locations or offsets. For fragment shader inputs, it can optionally turn 796 * load_deref into an explicit interpolation using barycentrics coming from 797 * one of the load_barycentric_* intrinsics. This pass requires that all 798 * deref chains are complete and contain no casts. 799 */ 800bool 801nir_lower_io(nir_shader *shader, nir_variable_mode modes, 802 int (*type_size)(const struct glsl_type *, bool), 803 nir_lower_io_options options) 804{ 805 bool progress = false; 806 807 nir_foreach_function(function, shader) { 808 if (function->impl) { 809 progress |= nir_lower_io_impl(function->impl, modes, 810 type_size, options); 811 } 812 } 813 814 return progress; 815} 816 817static unsigned 818type_scalar_size_bytes(const struct glsl_type *type) 819{ 820 assert(glsl_type_is_vector_or_scalar(type) || 821 glsl_type_is_matrix(type)); 822 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 823} 824 825static nir_ssa_def * 826build_addr_iadd(nir_builder *b, nir_ssa_def *addr, 827 nir_address_format addr_format, 828 nir_variable_mode modes, 829 nir_ssa_def *offset) 830{ 831 assert(offset->num_components == 1); 832 833 switch (addr_format) { 834 case nir_address_format_32bit_global: 835 case nir_address_format_64bit_global: 836 case nir_address_format_32bit_offset: 837 assert(addr->bit_size == offset->bit_size); 838 assert(addr->num_components == 1); 839 return nir_iadd(b, addr, offset); 840 841 case nir_address_format_2x32bit_global: { 842 assert(addr->num_components == 2); 843 nir_ssa_def *lo = nir_channel(b, addr, 0); 844 nir_ssa_def *hi = nir_channel(b, addr, 1); 845 nir_ssa_def *res_lo = nir_iadd(b, lo, offset); 846 nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, lo)); 847 nir_ssa_def *res_hi = nir_iadd(b, hi, carry); 848 return nir_vec2(b, res_lo, res_hi); 849 } 850 851 case nir_address_format_32bit_offset_as_64bit: 852 assert(addr->num_components == 1); 853 assert(offset->bit_size == 32); 854 return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset)); 855 856 case nir_address_format_64bit_global_32bit_offset: 857 case nir_address_format_64bit_bounded_global: 858 assert(addr->num_components == 4); 859 assert(addr->bit_size == offset->bit_size); 860 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3); 861 862 case nir_address_format_32bit_index_offset: 863 assert(addr->num_components == 2); 864 assert(addr->bit_size == offset->bit_size); 865 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1); 866 867 case nir_address_format_32bit_index_offset_pack64: 868 assert(addr->num_components == 1); 869 assert(offset->bit_size == 32); 870 return nir_pack_64_2x32_split(b, 871 nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset), 872 nir_unpack_64_2x32_split_y(b, addr)); 873 874 case nir_address_format_vec2_index_32bit_offset: 875 assert(addr->num_components == 3); 876 assert(offset->bit_size == 32); 877 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2); 878 879 case nir_address_format_62bit_generic: 880 assert(addr->num_components == 1); 881 assert(addr->bit_size == 64); 882 assert(offset->bit_size == 64); 883 if (!(modes & ~(nir_var_function_temp | 884 nir_var_shader_temp | 885 nir_var_mem_shared))) { 886 /* If we're sure it's one of these modes, we can do an easy 32-bit 887 * addition and don't need to bother with 64-bit math. 888 */ 889 nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr); 890 nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr); 891 addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset)); 892 return nir_pack_64_2x32_split(b, addr32, type); 893 } else { 894 return nir_iadd(b, addr, offset); 895 } 896 897 case nir_address_format_logical: 898 unreachable("Unsupported address format"); 899 } 900 unreachable("Invalid address format"); 901} 902 903static unsigned 904addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format) 905{ 906 if (addr_format == nir_address_format_32bit_offset_as_64bit || 907 addr_format == nir_address_format_32bit_index_offset_pack64) 908 return 32; 909 return addr->bit_size; 910} 911 912static nir_ssa_def * 913build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr, 914 nir_address_format addr_format, 915 nir_variable_mode modes, 916 int64_t offset) 917{ 918 return build_addr_iadd(b, addr, addr_format, modes, 919 nir_imm_intN_t(b, offset, 920 addr_get_offset_bit_size(addr, addr_format))); 921} 922 923static nir_ssa_def * 924build_addr_for_var(nir_builder *b, nir_variable *var, 925 nir_address_format addr_format) 926{ 927 assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared | 928 nir_var_mem_task_payload | 929 nir_var_mem_global | 930 nir_var_shader_temp | nir_var_function_temp | 931 nir_var_mem_push_const | nir_var_mem_constant)); 932 933 const unsigned num_comps = nir_address_format_num_components(addr_format); 934 const unsigned bit_size = nir_address_format_bit_size(addr_format); 935 936 switch (addr_format) { 937 case nir_address_format_2x32bit_global: 938 case nir_address_format_32bit_global: 939 case nir_address_format_64bit_global: { 940 nir_ssa_def *base_addr; 941 switch (var->data.mode) { 942 case nir_var_shader_temp: 943 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0); 944 break; 945 946 case nir_var_function_temp: 947 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1); 948 break; 949 950 case nir_var_mem_constant: 951 base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size); 952 break; 953 954 case nir_var_mem_shared: 955 base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size); 956 break; 957 958 case nir_var_mem_global: 959 base_addr = nir_load_global_base_ptr(b, num_comps, bit_size); 960 break; 961 962 default: 963 unreachable("Unsupported variable mode"); 964 } 965 966 return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode, 967 var->data.driver_location); 968 } 969 970 case nir_address_format_32bit_offset: 971 assert(var->data.driver_location <= UINT32_MAX); 972 return nir_imm_int(b, var->data.driver_location); 973 974 case nir_address_format_32bit_offset_as_64bit: 975 assert(var->data.driver_location <= UINT32_MAX); 976 return nir_imm_int64(b, var->data.driver_location); 977 978 case nir_address_format_62bit_generic: 979 switch (var->data.mode) { 980 case nir_var_shader_temp: 981 case nir_var_function_temp: 982 assert(var->data.driver_location <= UINT32_MAX); 983 return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64); 984 985 case nir_var_mem_shared: 986 assert(var->data.driver_location <= UINT32_MAX); 987 return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64); 988 989 case nir_var_mem_global: 990 return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size), 991 var->data.driver_location); 992 993 default: 994 unreachable("Unsupported variable mode"); 995 } 996 997 default: 998 unreachable("Unsupported address format"); 999 } 1000} 1001 1002static nir_ssa_def * 1003build_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr, 1004 nir_address_format addr_format, 1005 nir_variable_mode mode) 1006{ 1007 /* The compile-time check failed; do a run-time check */ 1008 switch (addr_format) { 1009 case nir_address_format_62bit_generic: { 1010 assert(addr->num_components == 1); 1011 assert(addr->bit_size == 64); 1012 nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62)); 1013 switch (mode) { 1014 case nir_var_function_temp: 1015 case nir_var_shader_temp: 1016 return nir_ieq_imm(b, mode_enum, 0x2); 1017 1018 case nir_var_mem_shared: 1019 return nir_ieq_imm(b, mode_enum, 0x1); 1020 1021 case nir_var_mem_global: 1022 return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0), 1023 nir_ieq_imm(b, mode_enum, 0x3)); 1024 1025 default: 1026 unreachable("Invalid mode check intrinsic"); 1027 } 1028 } 1029 1030 default: 1031 unreachable("Unsupported address mode"); 1032 } 1033} 1034 1035unsigned 1036nir_address_format_bit_size(nir_address_format addr_format) 1037{ 1038 switch (addr_format) { 1039 case nir_address_format_32bit_global: return 32; 1040 case nir_address_format_2x32bit_global: return 32; 1041 case nir_address_format_64bit_global: return 64; 1042 case nir_address_format_64bit_global_32bit_offset: return 32; 1043 case nir_address_format_64bit_bounded_global: return 32; 1044 case nir_address_format_32bit_index_offset: return 32; 1045 case nir_address_format_32bit_index_offset_pack64: return 64; 1046 case nir_address_format_vec2_index_32bit_offset: return 32; 1047 case nir_address_format_62bit_generic: return 64; 1048 case nir_address_format_32bit_offset: return 32; 1049 case nir_address_format_32bit_offset_as_64bit: return 64; 1050 case nir_address_format_logical: return 32; 1051 } 1052 unreachable("Invalid address format"); 1053} 1054 1055unsigned 1056nir_address_format_num_components(nir_address_format addr_format) 1057{ 1058 switch (addr_format) { 1059 case nir_address_format_32bit_global: return 1; 1060 case nir_address_format_2x32bit_global: return 2; 1061 case nir_address_format_64bit_global: return 1; 1062 case nir_address_format_64bit_global_32bit_offset: return 4; 1063 case nir_address_format_64bit_bounded_global: return 4; 1064 case nir_address_format_32bit_index_offset: return 2; 1065 case nir_address_format_32bit_index_offset_pack64: return 1; 1066 case nir_address_format_vec2_index_32bit_offset: return 3; 1067 case nir_address_format_62bit_generic: return 1; 1068 case nir_address_format_32bit_offset: return 1; 1069 case nir_address_format_32bit_offset_as_64bit: return 1; 1070 case nir_address_format_logical: return 1; 1071 } 1072 unreachable("Invalid address format"); 1073} 1074 1075static nir_ssa_def * 1076addr_to_index(nir_builder *b, nir_ssa_def *addr, 1077 nir_address_format addr_format) 1078{ 1079 switch (addr_format) { 1080 case nir_address_format_32bit_index_offset: 1081 assert(addr->num_components == 2); 1082 return nir_channel(b, addr, 0); 1083 case nir_address_format_32bit_index_offset_pack64: 1084 return nir_unpack_64_2x32_split_y(b, addr); 1085 case nir_address_format_vec2_index_32bit_offset: 1086 assert(addr->num_components == 3); 1087 return nir_channels(b, addr, 0x3); 1088 default: unreachable("Invalid address format"); 1089 } 1090} 1091 1092static nir_ssa_def * 1093addr_to_offset(nir_builder *b, nir_ssa_def *addr, 1094 nir_address_format addr_format) 1095{ 1096 switch (addr_format) { 1097 case nir_address_format_32bit_index_offset: 1098 assert(addr->num_components == 2); 1099 return nir_channel(b, addr, 1); 1100 case nir_address_format_32bit_index_offset_pack64: 1101 return nir_unpack_64_2x32_split_x(b, addr); 1102 case nir_address_format_vec2_index_32bit_offset: 1103 assert(addr->num_components == 3); 1104 return nir_channel(b, addr, 2); 1105 case nir_address_format_32bit_offset: 1106 return addr; 1107 case nir_address_format_32bit_offset_as_64bit: 1108 case nir_address_format_62bit_generic: 1109 return nir_u2u32(b, addr); 1110 default: 1111 unreachable("Invalid address format"); 1112 } 1113} 1114 1115/** Returns true if the given address format resolves to a global address */ 1116static bool 1117addr_format_is_global(nir_address_format addr_format, 1118 nir_variable_mode mode) 1119{ 1120 if (addr_format == nir_address_format_62bit_generic) 1121 return mode == nir_var_mem_global; 1122 1123 return addr_format == nir_address_format_32bit_global || 1124 addr_format == nir_address_format_2x32bit_global || 1125 addr_format == nir_address_format_64bit_global || 1126 addr_format == nir_address_format_64bit_global_32bit_offset || 1127 addr_format == nir_address_format_64bit_bounded_global; 1128} 1129 1130static bool 1131addr_format_is_offset(nir_address_format addr_format, 1132 nir_variable_mode mode) 1133{ 1134 if (addr_format == nir_address_format_62bit_generic) 1135 return mode != nir_var_mem_global; 1136 1137 return addr_format == nir_address_format_32bit_offset || 1138 addr_format == nir_address_format_32bit_offset_as_64bit; 1139} 1140 1141static nir_ssa_def * 1142addr_to_global(nir_builder *b, nir_ssa_def *addr, 1143 nir_address_format addr_format) 1144{ 1145 switch (addr_format) { 1146 case nir_address_format_32bit_global: 1147 case nir_address_format_64bit_global: 1148 case nir_address_format_62bit_generic: 1149 assert(addr->num_components == 1); 1150 return addr; 1151 1152 case nir_address_format_2x32bit_global: 1153 assert(addr->num_components == 2); 1154 return addr; 1155 1156 case nir_address_format_64bit_global_32bit_offset: 1157 case nir_address_format_64bit_bounded_global: 1158 assert(addr->num_components == 4); 1159 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)), 1160 nir_u2u64(b, nir_channel(b, addr, 3))); 1161 1162 case nir_address_format_32bit_index_offset: 1163 case nir_address_format_32bit_index_offset_pack64: 1164 case nir_address_format_vec2_index_32bit_offset: 1165 case nir_address_format_32bit_offset: 1166 case nir_address_format_32bit_offset_as_64bit: 1167 case nir_address_format_logical: 1168 unreachable("Cannot get a 64-bit address with this address format"); 1169 } 1170 1171 unreachable("Invalid address format"); 1172} 1173 1174static bool 1175addr_format_needs_bounds_check(nir_address_format addr_format) 1176{ 1177 return addr_format == nir_address_format_64bit_bounded_global; 1178} 1179 1180static nir_ssa_def * 1181addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr, 1182 nir_address_format addr_format, unsigned size) 1183{ 1184 assert(addr_format == nir_address_format_64bit_bounded_global); 1185 assert(addr->num_components == 4); 1186 return nir_ige(b, nir_channel(b, addr, 2), 1187 nir_iadd_imm(b, nir_channel(b, addr, 3), size)); 1188} 1189 1190static void 1191nir_get_explicit_deref_range(nir_deref_instr *deref, 1192 nir_address_format addr_format, 1193 uint32_t *out_base, 1194 uint32_t *out_range) 1195{ 1196 uint32_t base = 0; 1197 uint32_t range = glsl_get_explicit_size(deref->type, false); 1198 1199 while (true) { 1200 nir_deref_instr *parent = nir_deref_instr_parent(deref); 1201 1202 switch (deref->deref_type) { 1203 case nir_deref_type_array: 1204 case nir_deref_type_array_wildcard: 1205 case nir_deref_type_ptr_as_array: { 1206 const unsigned stride = nir_deref_instr_array_stride(deref); 1207 if (stride == 0) 1208 goto fail; 1209 1210 if (!parent) 1211 goto fail; 1212 1213 if (deref->deref_type != nir_deref_type_array_wildcard && 1214 nir_src_is_const(deref->arr.index)) { 1215 base += stride * nir_src_as_uint(deref->arr.index); 1216 } else { 1217 if (glsl_get_length(parent->type) == 0) 1218 goto fail; 1219 range += stride * (glsl_get_length(parent->type) - 1); 1220 } 1221 break; 1222 } 1223 1224 case nir_deref_type_struct: { 1225 if (!parent) 1226 goto fail; 1227 1228 base += glsl_get_struct_field_offset(parent->type, deref->strct.index); 1229 break; 1230 } 1231 1232 case nir_deref_type_cast: { 1233 nir_instr *parent_instr = deref->parent.ssa->parent_instr; 1234 1235 switch (parent_instr->type) { 1236 case nir_instr_type_load_const: { 1237 nir_load_const_instr *load = nir_instr_as_load_const(parent_instr); 1238 1239 switch (addr_format) { 1240 case nir_address_format_32bit_offset: 1241 base += load->value[1].u32; 1242 break; 1243 case nir_address_format_32bit_index_offset: 1244 base += load->value[1].u32; 1245 break; 1246 case nir_address_format_vec2_index_32bit_offset: 1247 base += load->value[2].u32; 1248 break; 1249 default: 1250 goto fail; 1251 } 1252 1253 *out_base = base; 1254 *out_range = range; 1255 return; 1256 } 1257 1258 case nir_instr_type_intrinsic: { 1259 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr); 1260 switch (intr->intrinsic) { 1261 case nir_intrinsic_load_vulkan_descriptor: 1262 /* Assume that a load_vulkan_descriptor won't contribute to an 1263 * offset within the resource. 1264 */ 1265 break; 1266 default: 1267 goto fail; 1268 } 1269 1270 *out_base = base; 1271 *out_range = range; 1272 return; 1273 } 1274 1275 default: 1276 goto fail; 1277 } 1278 } 1279 1280 default: 1281 goto fail; 1282 } 1283 1284 deref = parent; 1285 } 1286 1287fail: 1288 *out_base = 0; 1289 *out_range = ~0; 1290} 1291 1292static nir_variable_mode 1293canonicalize_generic_modes(nir_variable_mode modes) 1294{ 1295 assert(modes != 0); 1296 if (util_bitcount(modes) == 1) 1297 return modes; 1298 1299 assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp | 1300 nir_var_mem_shared | nir_var_mem_global))); 1301 1302 /* Canonicalize by converting shader_temp to function_temp */ 1303 if (modes & nir_var_shader_temp) { 1304 modes &= ~nir_var_shader_temp; 1305 modes |= nir_var_function_temp; 1306 } 1307 1308 return modes; 1309} 1310 1311static nir_intrinsic_op 1312get_store_global_op_from_addr_format(nir_address_format addr_format) 1313{ 1314 if (addr_format != nir_address_format_2x32bit_global) 1315 return nir_intrinsic_store_global; 1316 else 1317 return nir_intrinsic_store_global_2x32; 1318} 1319 1320static nir_intrinsic_op 1321get_load_global_op_from_addr_format(nir_address_format addr_format) 1322{ 1323 if (addr_format != nir_address_format_2x32bit_global) 1324 return nir_intrinsic_load_global; 1325 else 1326 return nir_intrinsic_load_global_2x32; 1327} 1328 1329static nir_ssa_def * 1330build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, 1331 nir_ssa_def *addr, nir_address_format addr_format, 1332 nir_variable_mode modes, 1333 uint32_t align_mul, uint32_t align_offset, 1334 unsigned num_components) 1335{ 1336 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1337 modes = canonicalize_generic_modes(modes); 1338 1339 if (util_bitcount(modes) > 1) { 1340 if (addr_format_is_global(addr_format, modes)) { 1341 return build_explicit_io_load(b, intrin, addr, addr_format, 1342 nir_var_mem_global, 1343 align_mul, align_offset, 1344 num_components); 1345 } else if (modes & nir_var_function_temp) { 1346 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1347 nir_var_function_temp)); 1348 nir_ssa_def *res1 = 1349 build_explicit_io_load(b, intrin, addr, addr_format, 1350 nir_var_function_temp, 1351 align_mul, align_offset, 1352 num_components); 1353 nir_push_else(b, NULL); 1354 nir_ssa_def *res2 = 1355 build_explicit_io_load(b, intrin, addr, addr_format, 1356 modes & ~nir_var_function_temp, 1357 align_mul, align_offset, 1358 num_components); 1359 nir_pop_if(b, NULL); 1360 return nir_if_phi(b, res1, res2); 1361 } else { 1362 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1363 nir_var_mem_shared)); 1364 assert(modes & nir_var_mem_shared); 1365 nir_ssa_def *res1 = 1366 build_explicit_io_load(b, intrin, addr, addr_format, 1367 nir_var_mem_shared, 1368 align_mul, align_offset, 1369 num_components); 1370 nir_push_else(b, NULL); 1371 assert(modes & nir_var_mem_global); 1372 nir_ssa_def *res2 = 1373 build_explicit_io_load(b, intrin, addr, addr_format, 1374 nir_var_mem_global, 1375 align_mul, align_offset, 1376 num_components); 1377 nir_pop_if(b, NULL); 1378 return nir_if_phi(b, res1, res2); 1379 } 1380 } 1381 1382 assert(util_bitcount(modes) == 1); 1383 const nir_variable_mode mode = modes; 1384 1385 nir_intrinsic_op op; 1386 switch (intrin->intrinsic) { 1387 case nir_intrinsic_load_deref: 1388 switch (mode) { 1389 case nir_var_mem_ubo: 1390 if (addr_format == nir_address_format_64bit_global_32bit_offset) 1391 op = nir_intrinsic_load_global_constant_offset; 1392 else if (addr_format == nir_address_format_64bit_bounded_global) 1393 op = nir_intrinsic_load_global_constant_bounded; 1394 else if (addr_format_is_global(addr_format, mode)) 1395 op = nir_intrinsic_load_global_constant; 1396 else 1397 op = nir_intrinsic_load_ubo; 1398 break; 1399 case nir_var_mem_ssbo: 1400 if (addr_format_is_global(addr_format, mode)) 1401 op = nir_intrinsic_load_global; 1402 else 1403 op = nir_intrinsic_load_ssbo; 1404 break; 1405 case nir_var_mem_global: 1406 assert(addr_format_is_global(addr_format, mode)); 1407 op = get_load_global_op_from_addr_format(addr_format); 1408 break; 1409 case nir_var_uniform: 1410 assert(addr_format_is_offset(addr_format, mode)); 1411 assert(b->shader->info.stage == MESA_SHADER_KERNEL); 1412 op = nir_intrinsic_load_kernel_input; 1413 break; 1414 case nir_var_mem_shared: 1415 assert(addr_format_is_offset(addr_format, mode)); 1416 op = nir_intrinsic_load_shared; 1417 break; 1418 case nir_var_mem_task_payload: 1419 assert(addr_format_is_offset(addr_format, mode)); 1420 op = nir_intrinsic_load_task_payload; 1421 break; 1422 case nir_var_shader_temp: 1423 case nir_var_function_temp: 1424 if (addr_format_is_offset(addr_format, mode)) { 1425 op = nir_intrinsic_load_scratch; 1426 } else { 1427 assert(addr_format_is_global(addr_format, mode)); 1428 op = get_load_global_op_from_addr_format(addr_format); 1429 } 1430 break; 1431 case nir_var_mem_push_const: 1432 assert(addr_format == nir_address_format_32bit_offset); 1433 op = nir_intrinsic_load_push_constant; 1434 break; 1435 case nir_var_mem_constant: 1436 if (addr_format_is_offset(addr_format, mode)) { 1437 op = nir_intrinsic_load_constant; 1438 } else { 1439 assert(addr_format_is_global(addr_format, mode)); 1440 op = get_load_global_op_from_addr_format(addr_format); 1441 } 1442 break; 1443 default: 1444 unreachable("Unsupported explicit IO variable mode"); 1445 } 1446 break; 1447 1448 case nir_intrinsic_load_deref_block_intel: 1449 switch (mode) { 1450 case nir_var_mem_ssbo: 1451 if (addr_format_is_global(addr_format, mode)) 1452 op = nir_intrinsic_load_global_block_intel; 1453 else 1454 op = nir_intrinsic_load_ssbo_block_intel; 1455 break; 1456 case nir_var_mem_global: 1457 op = nir_intrinsic_load_global_block_intel; 1458 break; 1459 case nir_var_mem_shared: 1460 op = nir_intrinsic_load_shared_block_intel; 1461 break; 1462 default: 1463 unreachable("Unsupported explicit IO variable mode"); 1464 } 1465 break; 1466 1467 default: 1468 unreachable("Invalid intrinsic"); 1469 } 1470 1471 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); 1472 1473 if (op == nir_intrinsic_load_global_constant_offset) { 1474 assert(addr_format == nir_address_format_64bit_global_32bit_offset); 1475 load->src[0] = nir_src_for_ssa( 1476 nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); 1477 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); 1478 } else if (op == nir_intrinsic_load_global_constant_bounded) { 1479 assert(addr_format == nir_address_format_64bit_bounded_global); 1480 load->src[0] = nir_src_for_ssa( 1481 nir_pack_64_2x32(b, nir_channels(b, addr, 0x3))); 1482 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3)); 1483 load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2)); 1484 } else if (addr_format_is_global(addr_format, mode)) { 1485 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1486 } else if (addr_format_is_offset(addr_format, mode)) { 1487 assert(addr->num_components == 1); 1488 load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1489 } else { 1490 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1491 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1492 } 1493 1494 if (nir_intrinsic_has_access(load)) 1495 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin)); 1496 1497 if (op == nir_intrinsic_load_constant) { 1498 nir_intrinsic_set_base(load, 0); 1499 nir_intrinsic_set_range(load, b->shader->constant_data_size); 1500 } else if (mode == nir_var_mem_push_const) { 1501 /* Push constants are required to be able to be chased back to the 1502 * variable so we can provide a base/range. 1503 */ 1504 nir_variable *var = nir_deref_instr_get_variable(deref); 1505 nir_intrinsic_set_base(load, 0); 1506 nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false)); 1507 } 1508 1509 unsigned bit_size = intrin->dest.ssa.bit_size; 1510 if (bit_size == 1) { 1511 /* TODO: Make the native bool bit_size an option. */ 1512 bit_size = 32; 1513 } 1514 1515 if (nir_intrinsic_has_align(load)) 1516 nir_intrinsic_set_align(load, align_mul, align_offset); 1517 1518 if (nir_intrinsic_has_range_base(load)) { 1519 unsigned base, range; 1520 nir_get_explicit_deref_range(deref, addr_format, &base, &range); 1521 nir_intrinsic_set_range_base(load, base); 1522 nir_intrinsic_set_range(load, range); 1523 } 1524 1525 assert(intrin->dest.is_ssa); 1526 load->num_components = num_components; 1527 nir_ssa_dest_init(&load->instr, &load->dest, num_components, 1528 bit_size, NULL); 1529 1530 assert(bit_size % 8 == 0); 1531 1532 nir_ssa_def *result; 1533 if (addr_format_needs_bounds_check(addr_format) && 1534 op != nir_intrinsic_load_global_constant_bounded) { 1535 /* We don't need to bounds-check global_constant_bounded because bounds 1536 * checking is handled by the intrinsic itself. 1537 * 1538 * The Vulkan spec for robustBufferAccess gives us quite a few options 1539 * as to what we can do with an OOB read. Unfortunately, returning 1540 * undefined values isn't one of them so we return an actual zero. 1541 */ 1542 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size); 1543 1544 /* TODO: Better handle block_intel. */ 1545 const unsigned load_size = (bit_size / 8) * load->num_components; 1546 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size)); 1547 1548 nir_builder_instr_insert(b, &load->instr); 1549 1550 nir_pop_if(b, NULL); 1551 1552 result = nir_if_phi(b, &load->dest.ssa, zero); 1553 } else { 1554 nir_builder_instr_insert(b, &load->instr); 1555 result = &load->dest.ssa; 1556 } 1557 1558 if (intrin->dest.ssa.bit_size == 1) { 1559 /* For shared, we can go ahead and use NIR's and/or the back-end's 1560 * standard encoding for booleans rather than forcing a 0/1 boolean. 1561 * This should save an instruction or two. 1562 */ 1563 if (mode == nir_var_mem_shared || 1564 mode == nir_var_shader_temp || 1565 mode == nir_var_function_temp) 1566 result = nir_b2b1(b, result); 1567 else 1568 result = nir_i2b(b, result); 1569 } 1570 1571 return result; 1572} 1573 1574static void 1575build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, 1576 nir_ssa_def *addr, nir_address_format addr_format, 1577 nir_variable_mode modes, 1578 uint32_t align_mul, uint32_t align_offset, 1579 nir_ssa_def *value, nir_component_mask_t write_mask) 1580{ 1581 modes = canonicalize_generic_modes(modes); 1582 1583 if (util_bitcount(modes) > 1) { 1584 if (addr_format_is_global(addr_format, modes)) { 1585 build_explicit_io_store(b, intrin, addr, addr_format, 1586 nir_var_mem_global, 1587 align_mul, align_offset, 1588 value, write_mask); 1589 } else if (modes & nir_var_function_temp) { 1590 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1591 nir_var_function_temp)); 1592 build_explicit_io_store(b, intrin, addr, addr_format, 1593 nir_var_function_temp, 1594 align_mul, align_offset, 1595 value, write_mask); 1596 nir_push_else(b, NULL); 1597 build_explicit_io_store(b, intrin, addr, addr_format, 1598 modes & ~nir_var_function_temp, 1599 align_mul, align_offset, 1600 value, write_mask); 1601 nir_pop_if(b, NULL); 1602 } else { 1603 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1604 nir_var_mem_shared)); 1605 assert(modes & nir_var_mem_shared); 1606 build_explicit_io_store(b, intrin, addr, addr_format, 1607 nir_var_mem_shared, 1608 align_mul, align_offset, 1609 value, write_mask); 1610 nir_push_else(b, NULL); 1611 assert(modes & nir_var_mem_global); 1612 build_explicit_io_store(b, intrin, addr, addr_format, 1613 nir_var_mem_global, 1614 align_mul, align_offset, 1615 value, write_mask); 1616 nir_pop_if(b, NULL); 1617 } 1618 return; 1619 } 1620 1621 assert(util_bitcount(modes) == 1); 1622 const nir_variable_mode mode = modes; 1623 1624 nir_intrinsic_op op; 1625 switch (intrin->intrinsic) { 1626 case nir_intrinsic_store_deref: 1627 assert(write_mask != 0); 1628 1629 switch (mode) { 1630 case nir_var_mem_ssbo: 1631 if (addr_format_is_global(addr_format, mode)) 1632 op = get_store_global_op_from_addr_format(addr_format); 1633 else 1634 op = nir_intrinsic_store_ssbo; 1635 break; 1636 case nir_var_mem_global: 1637 assert(addr_format_is_global(addr_format, mode)); 1638 op = get_store_global_op_from_addr_format(addr_format); 1639 break; 1640 case nir_var_mem_shared: 1641 assert(addr_format_is_offset(addr_format, mode)); 1642 op = nir_intrinsic_store_shared; 1643 break; 1644 case nir_var_mem_task_payload: 1645 assert(addr_format_is_offset(addr_format, mode)); 1646 op = nir_intrinsic_store_task_payload; 1647 break; 1648 case nir_var_shader_temp: 1649 case nir_var_function_temp: 1650 if (addr_format_is_offset(addr_format, mode)) { 1651 op = nir_intrinsic_store_scratch; 1652 } else { 1653 assert(addr_format_is_global(addr_format, mode)); 1654 op = get_store_global_op_from_addr_format(addr_format); 1655 } 1656 break; 1657 default: 1658 unreachable("Unsupported explicit IO variable mode"); 1659 } 1660 break; 1661 1662 case nir_intrinsic_store_deref_block_intel: 1663 assert(write_mask == 0); 1664 1665 switch (mode) { 1666 case nir_var_mem_ssbo: 1667 if (addr_format_is_global(addr_format, mode)) 1668 op = nir_intrinsic_store_global_block_intel; 1669 else 1670 op = nir_intrinsic_store_ssbo_block_intel; 1671 break; 1672 case nir_var_mem_global: 1673 op = nir_intrinsic_store_global_block_intel; 1674 break; 1675 case nir_var_mem_shared: 1676 op = nir_intrinsic_store_shared_block_intel; 1677 break; 1678 default: 1679 unreachable("Unsupported explicit IO variable mode"); 1680 } 1681 break; 1682 1683 default: 1684 unreachable("Invalid intrinsic"); 1685 } 1686 1687 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); 1688 1689 if (value->bit_size == 1) { 1690 /* For shared, we can go ahead and use NIR's and/or the back-end's 1691 * standard encoding for booleans rather than forcing a 0/1 boolean. 1692 * This should save an instruction or two. 1693 * 1694 * TODO: Make the native bool bit_size an option. 1695 */ 1696 if (mode == nir_var_mem_shared || 1697 mode == nir_var_shader_temp || 1698 mode == nir_var_function_temp) 1699 value = nir_b2b32(b, value); 1700 else 1701 value = nir_b2i(b, value, 32); 1702 } 1703 1704 store->src[0] = nir_src_for_ssa(value); 1705 if (addr_format_is_global(addr_format, mode)) { 1706 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1707 } else if (addr_format_is_offset(addr_format, mode)) { 1708 assert(addr->num_components == 1); 1709 store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1710 } else { 1711 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1712 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1713 } 1714 1715 nir_intrinsic_set_write_mask(store, write_mask); 1716 1717 if (nir_intrinsic_has_access(store)) 1718 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); 1719 1720 nir_intrinsic_set_align(store, align_mul, align_offset); 1721 1722 assert(value->num_components == 1 || 1723 value->num_components == intrin->num_components); 1724 store->num_components = value->num_components; 1725 1726 assert(value->bit_size % 8 == 0); 1727 1728 if (addr_format_needs_bounds_check(addr_format)) { 1729 /* TODO: Better handle block_intel. */ 1730 const unsigned store_size = (value->bit_size / 8) * store->num_components; 1731 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size)); 1732 1733 nir_builder_instr_insert(b, &store->instr); 1734 1735 nir_pop_if(b, NULL); 1736 } else { 1737 nir_builder_instr_insert(b, &store->instr); 1738 } 1739} 1740 1741static nir_ssa_def * 1742build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, 1743 nir_ssa_def *addr, nir_address_format addr_format, 1744 nir_variable_mode modes) 1745{ 1746 modes = canonicalize_generic_modes(modes); 1747 1748 if (util_bitcount(modes) > 1) { 1749 if (addr_format_is_global(addr_format, modes)) { 1750 return build_explicit_io_atomic(b, intrin, addr, addr_format, 1751 nir_var_mem_global); 1752 } else if (modes & nir_var_function_temp) { 1753 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1754 nir_var_function_temp)); 1755 nir_ssa_def *res1 = 1756 build_explicit_io_atomic(b, intrin, addr, addr_format, 1757 nir_var_function_temp); 1758 nir_push_else(b, NULL); 1759 nir_ssa_def *res2 = 1760 build_explicit_io_atomic(b, intrin, addr, addr_format, 1761 modes & ~nir_var_function_temp); 1762 nir_pop_if(b, NULL); 1763 return nir_if_phi(b, res1, res2); 1764 } else { 1765 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format, 1766 nir_var_mem_shared)); 1767 assert(modes & nir_var_mem_shared); 1768 nir_ssa_def *res1 = 1769 build_explicit_io_atomic(b, intrin, addr, addr_format, 1770 nir_var_mem_shared); 1771 nir_push_else(b, NULL); 1772 assert(modes & nir_var_mem_global); 1773 nir_ssa_def *res2 = 1774 build_explicit_io_atomic(b, intrin, addr, addr_format, 1775 nir_var_mem_global); 1776 nir_pop_if(b, NULL); 1777 return nir_if_phi(b, res1, res2); 1778 } 1779 } 1780 1781 assert(util_bitcount(modes) == 1); 1782 const nir_variable_mode mode = modes; 1783 1784 const unsigned num_data_srcs = 1785 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1; 1786 1787 nir_intrinsic_op op; 1788 switch (mode) { 1789 case nir_var_mem_ssbo: 1790 if (addr_format_is_global(addr_format, mode)) 1791 op = global_atomic_for_deref(addr_format, intrin->intrinsic); 1792 else 1793 op = ssbo_atomic_for_deref(intrin->intrinsic); 1794 break; 1795 case nir_var_mem_global: 1796 assert(addr_format_is_global(addr_format, mode)); 1797 op = global_atomic_for_deref(addr_format, intrin->intrinsic); 1798 break; 1799 case nir_var_mem_shared: 1800 assert(addr_format_is_offset(addr_format, mode)); 1801 op = shared_atomic_for_deref(intrin->intrinsic); 1802 break; 1803 case nir_var_mem_task_payload: 1804 assert(addr_format_is_offset(addr_format, mode)); 1805 op = task_payload_atomic_for_deref(intrin->intrinsic); 1806 break; 1807 default: 1808 unreachable("Unsupported explicit IO variable mode"); 1809 } 1810 1811 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op); 1812 1813 unsigned src = 0; 1814 if (addr_format_is_global(addr_format, mode)) { 1815 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); 1816 } else if (addr_format_is_offset(addr_format, mode)) { 1817 assert(addr->num_components == 1); 1818 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1819 } else { 1820 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); 1821 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); 1822 } 1823 for (unsigned i = 0; i < num_data_srcs; i++) { 1824 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa); 1825 } 1826 1827 /* Global atomics don't have access flags because they assume that the 1828 * address may be non-uniform. 1829 */ 1830 if (nir_intrinsic_has_access(atomic)) 1831 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin)); 1832 1833 assert(intrin->dest.ssa.num_components == 1); 1834 nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1835 1, intrin->dest.ssa.bit_size, NULL); 1836 1837 assert(atomic->dest.ssa.bit_size % 8 == 0); 1838 1839 if (addr_format_needs_bounds_check(addr_format)) { 1840 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8; 1841 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size)); 1842 1843 nir_builder_instr_insert(b, &atomic->instr); 1844 1845 nir_pop_if(b, NULL); 1846 return nir_if_phi(b, &atomic->dest.ssa, 1847 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size)); 1848 } else { 1849 nir_builder_instr_insert(b, &atomic->instr); 1850 return &atomic->dest.ssa; 1851 } 1852} 1853 1854nir_ssa_def * 1855nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, 1856 nir_ssa_def *base_addr, 1857 nir_address_format addr_format) 1858{ 1859 assert(deref->dest.is_ssa); 1860 switch (deref->deref_type) { 1861 case nir_deref_type_var: 1862 return build_addr_for_var(b, deref->var, addr_format); 1863 1864 case nir_deref_type_ptr_as_array: 1865 case nir_deref_type_array: { 1866 unsigned stride = nir_deref_instr_array_stride(deref); 1867 assert(stride > 0); 1868 1869 unsigned offset_bit_size = addr_get_offset_bit_size(base_addr, addr_format); 1870 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1); 1871 nir_ssa_def *offset; 1872 1873 /* If the access chain has been declared in-bounds, then we know it doesn't 1874 * overflow the type. For nir_deref_type_array, this implies it cannot be 1875 * negative. Also, since types in NIR have a maximum 32-bit size, we know the 1876 * final result will fit in a 32-bit value so we can convert the index to 1877 * 32-bit before multiplying and save ourselves from a 64-bit multiply. 1878 */ 1879 if (deref->arr.in_bounds && deref->deref_type == nir_deref_type_array) { 1880 index = nir_u2u32(b, index); 1881 offset = nir_u2u(b, nir_amul_imm(b, index, stride), offset_bit_size); 1882 } else { 1883 index = nir_i2i(b, index, offset_bit_size); 1884 offset = nir_amul_imm(b, index, stride); 1885 } 1886 1887 return build_addr_iadd(b, base_addr, addr_format, deref->modes, offset); 1888 } 1889 1890 case nir_deref_type_array_wildcard: 1891 unreachable("Wildcards should be lowered by now"); 1892 break; 1893 1894 case nir_deref_type_struct: { 1895 nir_deref_instr *parent = nir_deref_instr_parent(deref); 1896 int offset = glsl_get_struct_field_offset(parent->type, 1897 deref->strct.index); 1898 assert(offset >= 0); 1899 return build_addr_iadd_imm(b, base_addr, addr_format, 1900 deref->modes, offset); 1901 } 1902 1903 case nir_deref_type_cast: 1904 /* Nothing to do here */ 1905 return base_addr; 1906 } 1907 1908 unreachable("Invalid NIR deref type"); 1909} 1910 1911void 1912nir_lower_explicit_io_instr(nir_builder *b, 1913 nir_intrinsic_instr *intrin, 1914 nir_ssa_def *addr, 1915 nir_address_format addr_format) 1916{ 1917 b->cursor = nir_after_instr(&intrin->instr); 1918 1919 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1920 unsigned vec_stride = glsl_get_explicit_stride(deref->type); 1921 unsigned scalar_size = type_scalar_size_bytes(deref->type); 1922 assert(vec_stride == 0 || glsl_type_is_vector(deref->type)); 1923 assert(vec_stride == 0 || vec_stride >= scalar_size); 1924 1925 uint32_t align_mul, align_offset; 1926 if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) { 1927 /* If we don't have an alignment from the deref, assume scalar */ 1928 align_mul = scalar_size; 1929 align_offset = 0; 1930 } 1931 1932 switch (intrin->intrinsic) { 1933 case nir_intrinsic_load_deref: { 1934 nir_ssa_def *value; 1935 if (vec_stride > scalar_size) { 1936 nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, }; 1937 for (unsigned i = 0; i < intrin->num_components; i++) { 1938 unsigned comp_offset = i * vec_stride; 1939 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 1940 deref->modes, 1941 comp_offset); 1942 comps[i] = build_explicit_io_load(b, intrin, comp_addr, 1943 addr_format, deref->modes, 1944 align_mul, 1945 (align_offset + comp_offset) % 1946 align_mul, 1947 1); 1948 } 1949 value = nir_vec(b, comps, intrin->num_components); 1950 } else { 1951 value = build_explicit_io_load(b, intrin, addr, addr_format, 1952 deref->modes, align_mul, align_offset, 1953 intrin->num_components); 1954 } 1955 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 1956 break; 1957 } 1958 1959 case nir_intrinsic_store_deref: { 1960 assert(intrin->src[1].is_ssa); 1961 nir_ssa_def *value = intrin->src[1].ssa; 1962 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin); 1963 if (vec_stride > scalar_size) { 1964 for (unsigned i = 0; i < intrin->num_components; i++) { 1965 if (!(write_mask & (1 << i))) 1966 continue; 1967 1968 unsigned comp_offset = i * vec_stride; 1969 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format, 1970 deref->modes, 1971 comp_offset); 1972 build_explicit_io_store(b, intrin, comp_addr, addr_format, 1973 deref->modes, align_mul, 1974 (align_offset + comp_offset) % align_mul, 1975 nir_channel(b, value, i), 1); 1976 } 1977 } else { 1978 build_explicit_io_store(b, intrin, addr, addr_format, 1979 deref->modes, align_mul, align_offset, 1980 value, write_mask); 1981 } 1982 break; 1983 } 1984 1985 case nir_intrinsic_load_deref_block_intel: { 1986 nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format, 1987 deref->modes, 1988 align_mul, align_offset, 1989 intrin->num_components); 1990 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 1991 break; 1992 } 1993 1994 case nir_intrinsic_store_deref_block_intel: { 1995 assert(intrin->src[1].is_ssa); 1996 nir_ssa_def *value = intrin->src[1].ssa; 1997 const nir_component_mask_t write_mask = 0; 1998 build_explicit_io_store(b, intrin, addr, addr_format, 1999 deref->modes, align_mul, align_offset, 2000 value, write_mask); 2001 break; 2002 } 2003 2004 default: { 2005 nir_ssa_def *value = 2006 build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes); 2007 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value); 2008 break; 2009 } 2010 } 2011 2012 nir_instr_remove(&intrin->instr); 2013} 2014 2015bool 2016nir_get_explicit_deref_align(nir_deref_instr *deref, 2017 bool default_to_type_align, 2018 uint32_t *align_mul, 2019 uint32_t *align_offset) 2020{ 2021 if (deref->deref_type == nir_deref_type_var) { 2022 /* If we see a variable, align_mul is effectively infinite because we 2023 * know the offset exactly (up to the offset of the base pointer for the 2024 * given variable mode). We have to pick something so we choose 256B 2025 * as an arbitrary alignment which seems high enough for any reasonable 2026 * wide-load use-case. Back-ends should clamp alignments down if 256B 2027 * is too large for some reason. 2028 */ 2029 *align_mul = 256; 2030 *align_offset = deref->var->data.driver_location % 256; 2031 return true; 2032 } 2033 2034 /* If we're a cast deref that has an alignment, use that. */ 2035 if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) { 2036 *align_mul = deref->cast.align_mul; 2037 *align_offset = deref->cast.align_offset; 2038 return true; 2039 } 2040 2041 /* Otherwise, we need to compute the alignment based on the parent */ 2042 nir_deref_instr *parent = nir_deref_instr_parent(deref); 2043 if (parent == NULL) { 2044 assert(deref->deref_type == nir_deref_type_cast); 2045 if (default_to_type_align) { 2046 /* If we don't have a parent, assume the type's alignment, if any. */ 2047 unsigned type_align = glsl_get_explicit_alignment(deref->type); 2048 if (type_align == 0) 2049 return false; 2050 2051 *align_mul = type_align; 2052 *align_offset = 0; 2053 return true; 2054 } else { 2055 return false; 2056 } 2057 } 2058 2059 uint32_t parent_mul, parent_offset; 2060 if (!nir_get_explicit_deref_align(parent, default_to_type_align, 2061 &parent_mul, &parent_offset)) 2062 return false; 2063 2064 switch (deref->deref_type) { 2065 case nir_deref_type_var: 2066 unreachable("Handled above"); 2067 2068 case nir_deref_type_array: 2069 case nir_deref_type_array_wildcard: 2070 case nir_deref_type_ptr_as_array: { 2071 const unsigned stride = nir_deref_instr_array_stride(deref); 2072 if (stride == 0) 2073 return false; 2074 2075 if (deref->deref_type != nir_deref_type_array_wildcard && 2076 nir_src_is_const(deref->arr.index)) { 2077 unsigned offset = nir_src_as_uint(deref->arr.index) * stride; 2078 *align_mul = parent_mul; 2079 *align_offset = (parent_offset + offset) % parent_mul; 2080 } else { 2081 /* If this is a wildcard or an indirect deref, we have to go with the 2082 * power-of-two gcd. 2083 */ 2084 *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1)); 2085 *align_offset = parent_offset % *align_mul; 2086 } 2087 return true; 2088 } 2089 2090 case nir_deref_type_struct: { 2091 const int offset = glsl_get_struct_field_offset(parent->type, 2092 deref->strct.index); 2093 if (offset < 0) 2094 return false; 2095 2096 *align_mul = parent_mul; 2097 *align_offset = (parent_offset + offset) % parent_mul; 2098 return true; 2099 } 2100 2101 case nir_deref_type_cast: 2102 /* We handled the explicit alignment case above. */ 2103 assert(deref->cast.align_mul == 0); 2104 *align_mul = parent_mul; 2105 *align_offset = parent_offset; 2106 return true; 2107 } 2108 2109 unreachable("Invalid deref_instr_type"); 2110} 2111 2112static void 2113lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref, 2114 nir_address_format addr_format) 2115{ 2116 /* Just delete the deref if it's not used. We can't use 2117 * nir_deref_instr_remove_if_unused here because it may remove more than 2118 * one deref which could break our list walking since we walk the list 2119 * backwards. 2120 */ 2121 assert(list_is_empty(&deref->dest.ssa.if_uses)); 2122 if (list_is_empty(&deref->dest.ssa.uses)) { 2123 nir_instr_remove(&deref->instr); 2124 return; 2125 } 2126 2127 b->cursor = nir_after_instr(&deref->instr); 2128 2129 nir_ssa_def *base_addr = NULL; 2130 if (deref->deref_type != nir_deref_type_var) { 2131 assert(deref->parent.is_ssa); 2132 base_addr = deref->parent.ssa; 2133 } 2134 2135 nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr, 2136 addr_format); 2137 assert(addr->bit_size == deref->dest.ssa.bit_size); 2138 assert(addr->num_components == deref->dest.ssa.num_components); 2139 2140 nir_instr_remove(&deref->instr); 2141 nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr); 2142} 2143 2144static void 2145lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin, 2146 nir_address_format addr_format) 2147{ 2148 assert(intrin->src[0].is_ssa); 2149 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format); 2150} 2151 2152static void 2153lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin, 2154 nir_address_format addr_format) 2155{ 2156 b->cursor = nir_after_instr(&intrin->instr); 2157 2158 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2159 2160 assert(glsl_type_is_array(deref->type)); 2161 assert(glsl_get_length(deref->type) == 0); 2162 assert(nir_deref_mode_is(deref, nir_var_mem_ssbo)); 2163 unsigned stride = glsl_get_explicit_stride(deref->type); 2164 assert(stride > 0); 2165 2166 nir_ssa_def *addr = &deref->dest.ssa; 2167 nir_ssa_def *index = addr_to_index(b, addr, addr_format); 2168 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format); 2169 unsigned access = nir_intrinsic_access(intrin); 2170 2171 nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access); 2172 arr_size = nir_usub_sat(b, arr_size, offset); 2173 arr_size = nir_udiv_imm(b, arr_size, stride); 2174 2175 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size); 2176 nir_instr_remove(&intrin->instr); 2177} 2178 2179static void 2180lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin, 2181 nir_address_format addr_format) 2182{ 2183 if (addr_format_is_global(addr_format, 0)) { 2184 /* If the address format is always global, then the driver can use 2185 * global addresses regardless of the mode. In that case, don't create 2186 * a check, just whack the intrinsic to addr_mode_is and delegate to the 2187 * driver lowering. 2188 */ 2189 intrin->intrinsic = nir_intrinsic_addr_mode_is; 2190 return; 2191 } 2192 2193 assert(intrin->src[0].is_ssa); 2194 nir_ssa_def *addr = intrin->src[0].ssa; 2195 2196 b->cursor = nir_instr_remove(&intrin->instr); 2197 2198 nir_ssa_def *is_mode = 2199 build_runtime_addr_mode_check(b, addr, addr_format, 2200 nir_intrinsic_memory_modes(intrin)); 2201 2202 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode); 2203} 2204 2205static bool 2206nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes, 2207 nir_address_format addr_format) 2208{ 2209 bool progress = false; 2210 2211 nir_builder b; 2212 nir_builder_init(&b, impl); 2213 2214 /* Walk in reverse order so that we can see the full deref chain when we 2215 * lower the access operations. We lower them assuming that the derefs 2216 * will be turned into address calculations later. 2217 */ 2218 nir_foreach_block_reverse(block, impl) { 2219 nir_foreach_instr_reverse_safe(instr, block) { 2220 switch (instr->type) { 2221 case nir_instr_type_deref: { 2222 nir_deref_instr *deref = nir_instr_as_deref(instr); 2223 if (nir_deref_mode_is_in_set(deref, modes)) { 2224 lower_explicit_io_deref(&b, deref, addr_format); 2225 progress = true; 2226 } 2227 break; 2228 } 2229 2230 case nir_instr_type_intrinsic: { 2231 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2232 switch (intrin->intrinsic) { 2233 case nir_intrinsic_load_deref: 2234 case nir_intrinsic_store_deref: 2235 case nir_intrinsic_load_deref_block_intel: 2236 case nir_intrinsic_store_deref_block_intel: 2237 case nir_intrinsic_deref_atomic_add: 2238 case nir_intrinsic_deref_atomic_imin: 2239 case nir_intrinsic_deref_atomic_umin: 2240 case nir_intrinsic_deref_atomic_imax: 2241 case nir_intrinsic_deref_atomic_umax: 2242 case nir_intrinsic_deref_atomic_and: 2243 case nir_intrinsic_deref_atomic_or: 2244 case nir_intrinsic_deref_atomic_xor: 2245 case nir_intrinsic_deref_atomic_exchange: 2246 case nir_intrinsic_deref_atomic_comp_swap: 2247 case nir_intrinsic_deref_atomic_fadd: 2248 case nir_intrinsic_deref_atomic_fmin: 2249 case nir_intrinsic_deref_atomic_fmax: 2250 case nir_intrinsic_deref_atomic_fcomp_swap: { 2251 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2252 if (nir_deref_mode_is_in_set(deref, modes)) { 2253 lower_explicit_io_access(&b, intrin, addr_format); 2254 progress = true; 2255 } 2256 break; 2257 } 2258 2259 case nir_intrinsic_deref_buffer_array_length: { 2260 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2261 if (nir_deref_mode_is_in_set(deref, modes)) { 2262 lower_explicit_io_array_length(&b, intrin, addr_format); 2263 progress = true; 2264 } 2265 break; 2266 } 2267 2268 case nir_intrinsic_deref_mode_is: { 2269 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2270 if (nir_deref_mode_is_in_set(deref, modes)) { 2271 lower_explicit_io_mode_check(&b, intrin, addr_format); 2272 progress = true; 2273 } 2274 break; 2275 } 2276 2277 default: 2278 break; 2279 } 2280 break; 2281 } 2282 2283 default: 2284 /* Nothing to do */ 2285 break; 2286 } 2287 } 2288 } 2289 2290 if (progress) { 2291 nir_metadata_preserve(impl, nir_metadata_block_index | 2292 nir_metadata_dominance); 2293 } else { 2294 nir_metadata_preserve(impl, nir_metadata_all); 2295 } 2296 2297 return progress; 2298} 2299 2300/** Lower explicitly laid out I/O access to byte offset/address intrinsics 2301 * 2302 * This pass is intended to be used for any I/O which touches memory external 2303 * to the shader or which is directly visible to the client. It requires that 2304 * all data types in the given modes have a explicit stride/offset decorations 2305 * to tell it exactly how to calculate the offset/address for the given load, 2306 * store, or atomic operation. If the offset/stride information does not come 2307 * from the client explicitly (as with shared variables in GL or Vulkan), 2308 * nir_lower_vars_to_explicit_types() can be used to add them. 2309 * 2310 * Unlike nir_lower_io, this pass is fully capable of handling incomplete 2311 * pointer chains which may contain cast derefs. It does so by walking the 2312 * deref chain backwards and simply replacing each deref, one at a time, with 2313 * the appropriate address calculation. The pass takes a nir_address_format 2314 * parameter which describes how the offset or address is to be represented 2315 * during calculations. By ensuring that the address is always in a 2316 * consistent format, pointers can safely be conjured from thin air by the 2317 * driver, stored to variables, passed through phis, etc. 2318 * 2319 * The one exception to the simple algorithm described above is for handling 2320 * row-major matrices in which case we may look down one additional level of 2321 * the deref chain. 2322 * 2323 * This pass is also capable of handling OpenCL generic pointers. If the 2324 * address mode is global, it will lower any ambiguous (more than one mode) 2325 * access to global and pass through the deref_mode_is run-time checks as 2326 * addr_mode_is. This assumes the driver has somehow mapped shared and 2327 * scratch memory to the global address space. For other modes such as 2328 * 62bit_generic, there is an enum embedded in the address and we lower 2329 * ambiguous access to an if-ladder and deref_mode_is to a check against the 2330 * embedded enum. If nir_lower_explicit_io is called on any shader that 2331 * contains generic pointers, it must either be used on all of the generic 2332 * modes or none. 2333 */ 2334bool 2335nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, 2336 nir_address_format addr_format) 2337{ 2338 bool progress = false; 2339 2340 nir_foreach_function(function, shader) { 2341 if (function->impl && 2342 nir_lower_explicit_io_impl(function->impl, modes, addr_format)) 2343 progress = true; 2344 } 2345 2346 return progress; 2347} 2348 2349static bool 2350nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl, 2351 nir_variable_mode modes, 2352 glsl_type_size_align_func type_info) 2353{ 2354 bool progress = false; 2355 2356 nir_foreach_block(block, impl) { 2357 nir_foreach_instr(instr, block) { 2358 if (instr->type != nir_instr_type_deref) 2359 continue; 2360 2361 nir_deref_instr *deref = nir_instr_as_deref(instr); 2362 if (!nir_deref_mode_is_in_set(deref, modes)) 2363 continue; 2364 2365 unsigned size, alignment; 2366 const struct glsl_type *new_type = 2367 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment); 2368 if (new_type != deref->type) { 2369 progress = true; 2370 deref->type = new_type; 2371 } 2372 if (deref->deref_type == nir_deref_type_cast) { 2373 /* See also glsl_type::get_explicit_type_for_size_align() */ 2374 unsigned new_stride = align(size, alignment); 2375 if (new_stride != deref->cast.ptr_stride) { 2376 deref->cast.ptr_stride = new_stride; 2377 progress = true; 2378 } 2379 } 2380 } 2381 } 2382 2383 if (progress) { 2384 nir_metadata_preserve(impl, nir_metadata_block_index | 2385 nir_metadata_dominance | 2386 nir_metadata_live_ssa_defs | 2387 nir_metadata_loop_analysis); 2388 } else { 2389 nir_metadata_preserve(impl, nir_metadata_all); 2390 } 2391 2392 return progress; 2393} 2394 2395static bool 2396lower_vars_to_explicit(nir_shader *shader, 2397 struct exec_list *vars, nir_variable_mode mode, 2398 glsl_type_size_align_func type_info) 2399{ 2400 bool progress = false; 2401 unsigned offset; 2402 switch (mode) { 2403 case nir_var_uniform: 2404 assert(shader->info.stage == MESA_SHADER_KERNEL); 2405 offset = 0; 2406 break; 2407 case nir_var_function_temp: 2408 case nir_var_shader_temp: 2409 offset = shader->scratch_size; 2410 break; 2411 case nir_var_mem_shared: 2412 offset = shader->info.shared_size; 2413 break; 2414 case nir_var_mem_task_payload: 2415 offset = shader->info.task_payload_size; 2416 break; 2417 case nir_var_mem_global: 2418 offset = shader->global_mem_size; 2419 break; 2420 case nir_var_mem_constant: 2421 offset = shader->constant_data_size; 2422 break; 2423 case nir_var_shader_call_data: 2424 case nir_var_ray_hit_attrib: 2425 offset = 0; 2426 break; 2427 default: 2428 unreachable("Unsupported mode"); 2429 } 2430 nir_foreach_variable_in_list(var, vars) { 2431 if (var->data.mode != mode) 2432 continue; 2433 2434 unsigned size, align; 2435 const struct glsl_type *explicit_type = 2436 glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align); 2437 2438 if (explicit_type != var->type) 2439 var->type = explicit_type; 2440 2441 UNUSED bool is_empty_struct = 2442 glsl_type_is_struct_or_ifc(explicit_type) && 2443 glsl_get_length(explicit_type) == 0; 2444 2445 assert(util_is_power_of_two_nonzero(align) || is_empty_struct); 2446 var->data.driver_location = ALIGN_POT(offset, align); 2447 offset = var->data.driver_location + size; 2448 progress = true; 2449 } 2450 2451 switch (mode) { 2452 case nir_var_uniform: 2453 assert(shader->info.stage == MESA_SHADER_KERNEL); 2454 shader->num_uniforms = offset; 2455 break; 2456 case nir_var_shader_temp: 2457 case nir_var_function_temp: 2458 shader->scratch_size = offset; 2459 break; 2460 case nir_var_mem_shared: 2461 shader->info.shared_size = offset; 2462 break; 2463 case nir_var_mem_task_payload: 2464 shader->info.task_payload_size = offset; 2465 break; 2466 case nir_var_mem_global: 2467 shader->global_mem_size = offset; 2468 break; 2469 case nir_var_mem_constant: 2470 shader->constant_data_size = offset; 2471 break; 2472 case nir_var_shader_call_data: 2473 case nir_var_ray_hit_attrib: 2474 break; 2475 default: 2476 unreachable("Unsupported mode"); 2477 } 2478 2479 return progress; 2480} 2481 2482/* If nir_lower_vars_to_explicit_types is called on any shader that contains 2483 * generic pointers, it must either be used on all of the generic modes or 2484 * none. 2485 */ 2486bool 2487nir_lower_vars_to_explicit_types(nir_shader *shader, 2488 nir_variable_mode modes, 2489 glsl_type_size_align_func type_info) 2490{ 2491 /* TODO: Situations which need to be handled to support more modes: 2492 * - row-major matrices 2493 * - compact shader inputs/outputs 2494 * - interface types 2495 */ 2496 ASSERTED nir_variable_mode supported = 2497 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant | 2498 nir_var_shader_temp | nir_var_function_temp | nir_var_uniform | 2499 nir_var_shader_call_data | nir_var_ray_hit_attrib | 2500 nir_var_mem_task_payload; 2501 assert(!(modes & ~supported) && "unsupported"); 2502 2503 bool progress = false; 2504 2505 if (modes & nir_var_uniform) 2506 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info); 2507 if (modes & nir_var_mem_global) 2508 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info); 2509 2510 if (modes & nir_var_mem_shared) { 2511 assert(!shader->info.shared_memory_explicit_layout); 2512 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); 2513 } 2514 2515 if (modes & nir_var_shader_temp) 2516 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info); 2517 if (modes & nir_var_mem_constant) 2518 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info); 2519 if (modes & nir_var_shader_call_data) 2520 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info); 2521 if (modes & nir_var_ray_hit_attrib) 2522 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info); 2523 if (modes & nir_var_mem_task_payload) 2524 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info); 2525 2526 nir_foreach_function(function, shader) { 2527 if (function->impl) { 2528 if (modes & nir_var_function_temp) 2529 progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info); 2530 2531 progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info); 2532 } 2533 } 2534 2535 return progress; 2536} 2537 2538static void 2539write_constant(void *dst, size_t dst_size, 2540 const nir_constant *c, const struct glsl_type *type) 2541{ 2542 if (glsl_type_is_vector_or_scalar(type)) { 2543 const unsigned num_components = glsl_get_vector_elements(type); 2544 const unsigned bit_size = glsl_get_bit_size(type); 2545 if (bit_size == 1) { 2546 /* Booleans are special-cased to be 32-bit 2547 * 2548 * TODO: Make the native bool bit_size an option. 2549 */ 2550 assert(num_components * 4 <= dst_size); 2551 for (unsigned i = 0; i < num_components; i++) { 2552 int32_t b32 = -(int)c->values[i].b; 2553 memcpy((char *)dst + i * 4, &b32, 4); 2554 } 2555 } else { 2556 assert(bit_size >= 8 && bit_size % 8 == 0); 2557 const unsigned byte_size = bit_size / 8; 2558 assert(num_components * byte_size <= dst_size); 2559 for (unsigned i = 0; i < num_components; i++) { 2560 /* Annoyingly, thanks to packed structs, we can't make any 2561 * assumptions about the alignment of dst. To avoid any strange 2562 * issues with unaligned writes, we always use memcpy. 2563 */ 2564 memcpy((char *)dst + i * byte_size, &c->values[i], byte_size); 2565 } 2566 } 2567 } else if (glsl_type_is_array_or_matrix(type)) { 2568 const unsigned array_len = glsl_get_length(type); 2569 const unsigned stride = glsl_get_explicit_stride(type); 2570 assert(stride > 0); 2571 const struct glsl_type *elem_type = glsl_get_array_element(type); 2572 for (unsigned i = 0; i < array_len; i++) { 2573 unsigned elem_offset = i * stride; 2574 assert(elem_offset < dst_size); 2575 write_constant((char *)dst + elem_offset, dst_size - elem_offset, 2576 c->elements[i], elem_type); 2577 } 2578 } else { 2579 assert(glsl_type_is_struct_or_ifc(type)); 2580 const unsigned num_fields = glsl_get_length(type); 2581 for (unsigned i = 0; i < num_fields; i++) { 2582 const int field_offset = glsl_get_struct_field_offset(type, i); 2583 assert(field_offset >= 0 && field_offset < dst_size); 2584 const struct glsl_type *field_type = glsl_get_struct_field(type, i); 2585 write_constant((char *)dst + field_offset, dst_size - field_offset, 2586 c->elements[i], field_type); 2587 } 2588 } 2589} 2590 2591void 2592nir_gather_explicit_io_initializers(nir_shader *shader, 2593 void *dst, size_t dst_size, 2594 nir_variable_mode mode) 2595{ 2596 /* It doesn't really make sense to gather initializers for more than one 2597 * mode at a time. If this ever becomes well-defined, we can drop the 2598 * assert then. 2599 */ 2600 assert(util_bitcount(mode) == 1); 2601 2602 nir_foreach_variable_with_modes(var, shader, mode) { 2603 assert(var->data.driver_location < dst_size); 2604 write_constant((char *)dst + var->data.driver_location, 2605 dst_size - var->data.driver_location, 2606 var->constant_initializer, var->type); 2607 } 2608} 2609 2610/** 2611 * Return the offset source for a load/store intrinsic. 2612 */ 2613nir_src * 2614nir_get_io_offset_src(nir_intrinsic_instr *instr) 2615{ 2616 switch (instr->intrinsic) { 2617 case nir_intrinsic_load_input: 2618 case nir_intrinsic_load_output: 2619 case nir_intrinsic_load_shared: 2620 case nir_intrinsic_load_task_payload: 2621 case nir_intrinsic_load_uniform: 2622 case nir_intrinsic_load_kernel_input: 2623 case nir_intrinsic_load_global: 2624 case nir_intrinsic_load_global_2x32: 2625 case nir_intrinsic_load_global_constant: 2626 case nir_intrinsic_load_scratch: 2627 case nir_intrinsic_load_fs_input_interp_deltas: 2628 case nir_intrinsic_shared_atomic_add: 2629 case nir_intrinsic_shared_atomic_and: 2630 case nir_intrinsic_shared_atomic_comp_swap: 2631 case nir_intrinsic_shared_atomic_exchange: 2632 case nir_intrinsic_shared_atomic_fadd: 2633 case nir_intrinsic_shared_atomic_fcomp_swap: 2634 case nir_intrinsic_shared_atomic_fmax: 2635 case nir_intrinsic_shared_atomic_fmin: 2636 case nir_intrinsic_shared_atomic_imax: 2637 case nir_intrinsic_shared_atomic_imin: 2638 case nir_intrinsic_shared_atomic_or: 2639 case nir_intrinsic_shared_atomic_umax: 2640 case nir_intrinsic_shared_atomic_umin: 2641 case nir_intrinsic_shared_atomic_xor: 2642 case nir_intrinsic_task_payload_atomic_add: 2643 case nir_intrinsic_task_payload_atomic_imin: 2644 case nir_intrinsic_task_payload_atomic_umin: 2645 case nir_intrinsic_task_payload_atomic_imax: 2646 case nir_intrinsic_task_payload_atomic_umax: 2647 case nir_intrinsic_task_payload_atomic_and: 2648 case nir_intrinsic_task_payload_atomic_or: 2649 case nir_intrinsic_task_payload_atomic_xor: 2650 case nir_intrinsic_task_payload_atomic_exchange: 2651 case nir_intrinsic_task_payload_atomic_comp_swap: 2652 case nir_intrinsic_task_payload_atomic_fadd: 2653 case nir_intrinsic_task_payload_atomic_fmin: 2654 case nir_intrinsic_task_payload_atomic_fmax: 2655 case nir_intrinsic_task_payload_atomic_fcomp_swap: 2656 case nir_intrinsic_global_atomic_add: 2657 case nir_intrinsic_global_atomic_and: 2658 case nir_intrinsic_global_atomic_comp_swap: 2659 case nir_intrinsic_global_atomic_exchange: 2660 case nir_intrinsic_global_atomic_fadd: 2661 case nir_intrinsic_global_atomic_fcomp_swap: 2662 case nir_intrinsic_global_atomic_fmax: 2663 case nir_intrinsic_global_atomic_fmin: 2664 case nir_intrinsic_global_atomic_imax: 2665 case nir_intrinsic_global_atomic_imin: 2666 case nir_intrinsic_global_atomic_or: 2667 case nir_intrinsic_global_atomic_umax: 2668 case nir_intrinsic_global_atomic_umin: 2669 case nir_intrinsic_global_atomic_xor: 2670 return &instr->src[0]; 2671 case nir_intrinsic_load_ubo: 2672 case nir_intrinsic_load_ssbo: 2673 case nir_intrinsic_load_input_vertex: 2674 case nir_intrinsic_load_per_vertex_input: 2675 case nir_intrinsic_load_per_vertex_output: 2676 case nir_intrinsic_load_per_primitive_output: 2677 case nir_intrinsic_load_interpolated_input: 2678 case nir_intrinsic_store_output: 2679 case nir_intrinsic_store_shared: 2680 case nir_intrinsic_store_task_payload: 2681 case nir_intrinsic_store_global: 2682 case nir_intrinsic_store_global_2x32: 2683 case nir_intrinsic_store_scratch: 2684 case nir_intrinsic_ssbo_atomic_add: 2685 case nir_intrinsic_ssbo_atomic_imin: 2686 case nir_intrinsic_ssbo_atomic_umin: 2687 case nir_intrinsic_ssbo_atomic_imax: 2688 case nir_intrinsic_ssbo_atomic_umax: 2689 case nir_intrinsic_ssbo_atomic_and: 2690 case nir_intrinsic_ssbo_atomic_or: 2691 case nir_intrinsic_ssbo_atomic_xor: 2692 case nir_intrinsic_ssbo_atomic_exchange: 2693 case nir_intrinsic_ssbo_atomic_comp_swap: 2694 case nir_intrinsic_ssbo_atomic_fadd: 2695 case nir_intrinsic_ssbo_atomic_fmin: 2696 case nir_intrinsic_ssbo_atomic_fmax: 2697 case nir_intrinsic_ssbo_atomic_fcomp_swap: 2698 return &instr->src[1]; 2699 case nir_intrinsic_store_ssbo: 2700 case nir_intrinsic_store_per_vertex_output: 2701 case nir_intrinsic_store_per_primitive_output: 2702 return &instr->src[2]; 2703 default: 2704 return NULL; 2705 } 2706} 2707 2708/** 2709 * Return the vertex index source for a load/store per_vertex intrinsic. 2710 */ 2711nir_src * 2712nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr) 2713{ 2714 switch (instr->intrinsic) { 2715 case nir_intrinsic_load_per_vertex_input: 2716 case nir_intrinsic_load_per_vertex_output: 2717 case nir_intrinsic_load_per_primitive_output: 2718 return &instr->src[0]; 2719 case nir_intrinsic_store_per_vertex_output: 2720 case nir_intrinsic_store_per_primitive_output: 2721 return &instr->src[1]; 2722 default: 2723 return NULL; 2724 } 2725} 2726 2727/** 2728 * Return the numeric constant that identify a NULL pointer for each address 2729 * format. 2730 */ 2731const nir_const_value * 2732nir_address_format_null_value(nir_address_format addr_format) 2733{ 2734 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = { 2735 [nir_address_format_32bit_global] = {{0}}, 2736 [nir_address_format_2x32bit_global] = {{0}}, 2737 [nir_address_format_64bit_global] = {{0}}, 2738 [nir_address_format_64bit_global_32bit_offset] = {{0}}, 2739 [nir_address_format_64bit_bounded_global] = {{0}}, 2740 [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}}, 2741 [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}}, 2742 [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}}, 2743 [nir_address_format_32bit_offset] = {{.u32 = ~0}}, 2744 [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}}, 2745 [nir_address_format_62bit_generic] = {{.u64 = 0}}, 2746 [nir_address_format_logical] = {{.u32 = ~0}}, 2747 }; 2748 2749 assert(addr_format < ARRAY_SIZE(null_values)); 2750 return null_values[addr_format]; 2751} 2752 2753nir_ssa_def * 2754nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 2755 nir_address_format addr_format) 2756{ 2757 switch (addr_format) { 2758 case nir_address_format_32bit_global: 2759 case nir_address_format_2x32bit_global: 2760 case nir_address_format_64bit_global: 2761 case nir_address_format_64bit_bounded_global: 2762 case nir_address_format_32bit_index_offset: 2763 case nir_address_format_vec2_index_32bit_offset: 2764 case nir_address_format_32bit_offset: 2765 case nir_address_format_62bit_generic: 2766 return nir_ball_iequal(b, addr0, addr1); 2767 2768 case nir_address_format_64bit_global_32bit_offset: 2769 return nir_ball_iequal(b, nir_channels(b, addr0, 0xb), 2770 nir_channels(b, addr1, 0xb)); 2771 2772 case nir_address_format_32bit_offset_as_64bit: 2773 assert(addr0->num_components == 1 && addr1->num_components == 1); 2774 return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)); 2775 2776 case nir_address_format_32bit_index_offset_pack64: 2777 assert(addr0->num_components == 1 && addr1->num_components == 1); 2778 return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1)); 2779 2780 case nir_address_format_logical: 2781 unreachable("Unsupported address format"); 2782 } 2783 2784 unreachable("Invalid address format"); 2785} 2786 2787nir_ssa_def * 2788nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, 2789 nir_address_format addr_format) 2790{ 2791 switch (addr_format) { 2792 case nir_address_format_32bit_global: 2793 case nir_address_format_64bit_global: 2794 case nir_address_format_32bit_offset: 2795 case nir_address_format_32bit_index_offset_pack64: 2796 case nir_address_format_62bit_generic: 2797 assert(addr0->num_components == 1); 2798 assert(addr1->num_components == 1); 2799 return nir_isub(b, addr0, addr1); 2800 2801 case nir_address_format_2x32bit_global: 2802 return nir_isub(b, addr_to_global(b, addr0, addr_format), 2803 addr_to_global(b, addr1, addr_format)); 2804 2805 case nir_address_format_32bit_offset_as_64bit: 2806 assert(addr0->num_components == 1); 2807 assert(addr1->num_components == 1); 2808 return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1))); 2809 2810 case nir_address_format_64bit_global_32bit_offset: 2811 case nir_address_format_64bit_bounded_global: 2812 return nir_isub(b, addr_to_global(b, addr0, addr_format), 2813 addr_to_global(b, addr1, addr_format)); 2814 2815 case nir_address_format_32bit_index_offset: 2816 assert(addr0->num_components == 2); 2817 assert(addr1->num_components == 2); 2818 /* Assume the same buffer index. */ 2819 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1)); 2820 2821 case nir_address_format_vec2_index_32bit_offset: 2822 assert(addr0->num_components == 3); 2823 assert(addr1->num_components == 3); 2824 /* Assume the same buffer index. */ 2825 return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2)); 2826 2827 case nir_address_format_logical: 2828 unreachable("Unsupported address format"); 2829 } 2830 2831 unreachable("Invalid address format"); 2832} 2833 2834static bool 2835is_input(nir_intrinsic_instr *intrin) 2836{ 2837 return intrin->intrinsic == nir_intrinsic_load_input || 2838 intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 2839 intrin->intrinsic == nir_intrinsic_load_interpolated_input || 2840 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas; 2841} 2842 2843static bool 2844is_output(nir_intrinsic_instr *intrin) 2845{ 2846 return intrin->intrinsic == nir_intrinsic_load_output || 2847 intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 2848 intrin->intrinsic == nir_intrinsic_load_per_primitive_output || 2849 intrin->intrinsic == nir_intrinsic_store_output || 2850 intrin->intrinsic == nir_intrinsic_store_per_vertex_output || 2851 intrin->intrinsic == nir_intrinsic_store_per_primitive_output; 2852} 2853 2854static bool is_dual_slot(nir_intrinsic_instr *intrin) 2855{ 2856 if (intrin->intrinsic == nir_intrinsic_store_output || 2857 intrin->intrinsic == nir_intrinsic_store_per_vertex_output || 2858 intrin->intrinsic == nir_intrinsic_store_per_primitive_output) { 2859 return nir_src_bit_size(intrin->src[0]) == 64 && 2860 nir_src_num_components(intrin->src[0]) >= 3; 2861 } 2862 2863 return nir_dest_bit_size(intrin->dest) == 64 && 2864 nir_dest_num_components(intrin->dest) >= 3; 2865} 2866 2867/** 2868 * This pass adds constant offsets to instr->const_index[0] for input/output 2869 * intrinsics, and resets the offset source to 0. Non-constant offsets remain 2870 * unchanged - since we don't know what part of a compound variable is 2871 * accessed, we allocate storage for the entire thing. For drivers that use 2872 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that 2873 * the offset source will be 0, so that they don't have to add it in manually. 2874 */ 2875 2876static bool 2877add_const_offset_to_base_block(nir_block *block, nir_builder *b, 2878 nir_variable_mode modes) 2879{ 2880 bool progress = false; 2881 nir_foreach_instr_safe(instr, block) { 2882 if (instr->type != nir_instr_type_intrinsic) 2883 continue; 2884 2885 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2886 2887 if (((modes & nir_var_shader_in) && is_input(intrin)) || 2888 ((modes & nir_var_shader_out) && is_output(intrin))) { 2889 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); 2890 2891 /* NV_mesh_shader: ignore MS primitive indices. */ 2892 if (b->shader->info.stage == MESA_SHADER_MESH && 2893 sem.location == VARYING_SLOT_PRIMITIVE_INDICES && 2894 !(b->shader->info.per_primitive_outputs & 2895 BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES))) 2896 continue; 2897 2898 nir_src *offset = nir_get_io_offset_src(intrin); 2899 2900 /* TODO: Better handling of per-view variables here */ 2901 if (nir_src_is_const(*offset) && 2902 !nir_intrinsic_io_semantics(intrin).per_view) { 2903 unsigned off = nir_src_as_uint(*offset); 2904 2905 nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off); 2906 2907 sem.location += off; 2908 /* non-indirect indexing should reduce num_slots */ 2909 sem.num_slots = is_dual_slot(intrin) ? 2 : 1; 2910 nir_intrinsic_set_io_semantics(intrin, sem); 2911 2912 b->cursor = nir_before_instr(&intrin->instr); 2913 nir_instr_rewrite_src(&intrin->instr, offset, 2914 nir_src_for_ssa(nir_imm_int(b, 0))); 2915 progress = true; 2916 } 2917 } 2918 } 2919 2920 return progress; 2921} 2922 2923bool 2924nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes) 2925{ 2926 bool progress = false; 2927 2928 nir_foreach_function(f, nir) { 2929 if (f->impl) { 2930 bool impl_progress = false; 2931 nir_builder b; 2932 nir_builder_init(&b, f->impl); 2933 nir_foreach_block(block, f->impl) { 2934 impl_progress |= add_const_offset_to_base_block(block, &b, modes); 2935 } 2936 progress |= impl_progress; 2937 if (impl_progress) 2938 nir_metadata_preserve(f->impl, nir_metadata_block_index | nir_metadata_dominance); 2939 else 2940 nir_metadata_preserve(f->impl, nir_metadata_all); 2941 } 2942 } 2943 2944 return progress; 2945} 2946 2947static bool 2948nir_lower_color_inputs(nir_shader *nir) 2949{ 2950 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 2951 bool progress = false; 2952 2953 nir_builder b; 2954 nir_builder_init(&b, impl); 2955 2956 nir_foreach_block (block, impl) { 2957 nir_foreach_instr_safe (instr, block) { 2958 if (instr->type != nir_instr_type_intrinsic) 2959 continue; 2960 2961 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2962 2963 if (intrin->intrinsic != nir_intrinsic_load_deref) 2964 continue; 2965 2966 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2967 if (!nir_deref_mode_is(deref, nir_var_shader_in)) 2968 continue; 2969 2970 b.cursor = nir_before_instr(instr); 2971 nir_variable *var = nir_deref_instr_get_variable(deref); 2972 nir_ssa_def *def; 2973 2974 if (var->data.location == VARYING_SLOT_COL0) { 2975 def = nir_load_color0(&b); 2976 nir->info.fs.color0_interp = var->data.interpolation; 2977 nir->info.fs.color0_sample = var->data.sample; 2978 nir->info.fs.color0_centroid = var->data.centroid; 2979 } else if (var->data.location == VARYING_SLOT_COL1) { 2980 def = nir_load_color1(&b); 2981 nir->info.fs.color1_interp = var->data.interpolation; 2982 nir->info.fs.color1_sample = var->data.sample; 2983 nir->info.fs.color1_centroid = var->data.centroid; 2984 } else { 2985 continue; 2986 } 2987 2988 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def); 2989 nir_instr_remove(instr); 2990 progress = true; 2991 } 2992 } 2993 2994 if (progress) { 2995 nir_metadata_preserve(impl, nir_metadata_dominance | 2996 nir_metadata_block_index); 2997 } else { 2998 nir_metadata_preserve(impl, nir_metadata_all); 2999 } 3000 return progress; 3001} 3002 3003bool 3004nir_io_add_intrinsic_xfb_info(nir_shader *nir) 3005{ 3006 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 3007 bool progress = false; 3008 3009 for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) 3010 nir->info.xfb_stride[i] = nir->xfb_info->buffers[i].stride / 4; 3011 3012 nir_foreach_block (block, impl) { 3013 nir_foreach_instr_safe (instr, block) { 3014 if (instr->type != nir_instr_type_intrinsic) 3015 continue; 3016 3017 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 3018 3019 if (!nir_intrinsic_has_io_xfb(intr)) 3020 continue; 3021 3022 /* No indirect indexing allowed. The index is implied to be 0. */ 3023 ASSERTED nir_src offset = *nir_get_io_offset_src(intr); 3024 assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0); 3025 3026 /* Calling this pass for the second time shouldn't do anything. */ 3027 if (nir_intrinsic_io_xfb(intr).out[0].num_components || 3028 nir_intrinsic_io_xfb(intr).out[1].num_components || 3029 nir_intrinsic_io_xfb2(intr).out[0].num_components || 3030 nir_intrinsic_io_xfb2(intr).out[1].num_components) 3031 continue; 3032 3033 nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 3034 unsigned writemask = nir_intrinsic_write_mask(intr) << 3035 nir_intrinsic_component(intr); 3036 3037 nir_io_xfb xfb[2]; 3038 memset(xfb, 0, sizeof(xfb)); 3039 3040 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) { 3041 nir_xfb_output_info *out = &nir->xfb_info->outputs[i]; 3042 if (out->location == sem.location) { 3043 unsigned xfb_mask = writemask & out->component_mask; 3044 3045 /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, " 3046 "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n", 3047 i, out->buffer, 3048 out->offset, 3049 out->location, 3050 out->component_offset, 3051 out->component_mask, 3052 xfb_mask, sem.num_slots);*/ 3053 3054 while (xfb_mask) { 3055 int start, count; 3056 u_bit_scan_consecutive_range(&xfb_mask, &start, &count); 3057 3058 xfb[start / 2].out[start % 2].num_components = count; 3059 xfb[start / 2].out[start % 2].buffer = out->buffer; 3060 /* out->offset is relative to the first stored xfb component */ 3061 /* start is relative to component 0 */ 3062 xfb[start / 2].out[start % 2].offset = 3063 out->offset / 4 - out->component_offset + start; 3064 3065 progress = true; 3066 } 3067 } 3068 } 3069 3070 nir_intrinsic_set_io_xfb(intr, xfb[0]); 3071 nir_intrinsic_set_io_xfb2(intr, xfb[1]); 3072 } 3073 } 3074 3075 nir_metadata_preserve(impl, nir_metadata_all); 3076 return progress; 3077} 3078 3079static int 3080type_size_vec4(const struct glsl_type *type, bool bindless) 3081{ 3082 return glsl_count_attribute_slots(type, false); 3083} 3084 3085void 3086nir_lower_io_passes(nir_shader *nir) 3087{ 3088 if (!nir->options->lower_io_variables) 3089 return; 3090 3091 bool has_indirect_inputs = 3092 (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1; 3093 3094 /* Transform feedback requires that indirect outputs are lowered. */ 3095 bool has_indirect_outputs = 3096 (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 && 3097 nir->xfb_info == NULL; 3098 3099 if (!has_indirect_inputs || !has_indirect_outputs) { 3100 NIR_PASS_V(nir, nir_lower_io_to_temporaries, 3101 nir_shader_get_entrypoint(nir), !has_indirect_outputs, 3102 !has_indirect_inputs); 3103 3104 /* We need to lower all the copy_deref's introduced by lower_io_to- 3105 * _temporaries before calling nir_lower_io. 3106 */ 3107 NIR_PASS_V(nir, nir_split_var_copies); 3108 NIR_PASS_V(nir, nir_lower_var_copies); 3109 NIR_PASS_V(nir, nir_lower_global_vars_to_local); 3110 } 3111 3112 if (nir->info.stage == MESA_SHADER_FRAGMENT && 3113 nir->options->lower_fs_color_inputs) 3114 NIR_PASS_V(nir, nir_lower_color_inputs); 3115 3116 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in, 3117 type_size_vec4, nir_lower_io_lower_64bit_to_32); 3118 3119 /* nir_io_add_const_offset_to_base needs actual constants. */ 3120 NIR_PASS_V(nir, nir_opt_constant_folding); 3121 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | 3122 nir_var_shader_out); 3123 3124 /* Lower and remove dead derefs and variables to clean up the IR. */ 3125 NIR_PASS_V(nir, nir_lower_vars_to_ssa); 3126 NIR_PASS_V(nir, nir_opt_dce); 3127 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp | 3128 nir_var_shader_in | nir_var_shader_out, NULL); 3129 3130 if (nir->xfb_info) 3131 NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info); 3132 3133 nir->info.io_lowered = true; 3134} 3135