1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "util/set.h" 27#include "util/hash_table.h" 28 29/* This file contains various little helpers for doing simple linking in 30 * NIR. Eventually, we'll probably want a full-blown varying packing 31 * implementation in here. Right now, it just deletes unused things. 32 */ 33 34/** 35 * Returns the bits in the inputs_read, or outputs_written 36 * bitfield corresponding to this variable. 37 */ 38static uint64_t 39get_variable_io_mask(nir_variable *var, gl_shader_stage stage) 40{ 41 if (var->data.location < 0) 42 return 0; 43 44 unsigned location = var->data.patch ? 45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location; 46 47 assert(var->data.mode == nir_var_shader_in || 48 var->data.mode == nir_var_shader_out); 49 assert(var->data.location >= 0); 50 assert(location < 64); 51 52 const struct glsl_type *type = var->type; 53 if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 54 assert(glsl_type_is_array(type)); 55 type = glsl_get_array_element(type); 56 } 57 58 unsigned slots = glsl_count_attribute_slots(type, false); 59 return BITFIELD64_MASK(slots) << location; 60} 61 62static bool 63is_non_generic_patch_var(nir_variable *var) 64{ 65 return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 66 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER || 67 var->data.location == VARYING_SLOT_BOUNDING_BOX0 || 68 var->data.location == VARYING_SLOT_BOUNDING_BOX1; 69} 70 71static uint8_t 72get_num_components(nir_variable *var) 73{ 74 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 75 return 4; 76 77 return glsl_get_vector_elements(glsl_without_array(var->type)); 78} 79 80static void 81tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) 82{ 83 nir_foreach_function(function, shader) { 84 if (!function->impl) 85 continue; 86 87 nir_foreach_block(block, function->impl) { 88 nir_foreach_instr(instr, block) { 89 if (instr->type != nir_instr_type_intrinsic) 90 continue; 91 92 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 93 if (intrin->intrinsic != nir_intrinsic_load_deref) 94 continue; 95 96 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 97 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 98 continue; 99 100 nir_variable *var = nir_deref_instr_get_variable(deref); 101 for (unsigned i = 0; i < get_num_components(var); i++) { 102 if (var->data.patch) { 103 if (is_non_generic_patch_var(var)) 104 continue; 105 106 patches_read[var->data.location_frac + i] |= 107 get_variable_io_mask(var, shader->info.stage); 108 } else { 109 read[var->data.location_frac + i] |= 110 get_variable_io_mask(var, shader->info.stage); 111 } 112 } 113 } 114 } 115 } 116} 117 118/** 119 * Helper for removing unused shader I/O variables, by demoting them to global 120 * variables (which may then by dead code eliminated). 121 * 122 * Example usage is: 123 * 124 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, 125 * read, patches_read) || 126 * progress; 127 * 128 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*) 129 * representing each .location_frac used. Note that for vector variables, 130 * only the first channel (.location_frac) is examined for deciding if the 131 * variable is used! 132 */ 133bool 134nir_remove_unused_io_vars(nir_shader *shader, 135 nir_variable_mode mode, 136 uint64_t *used_by_other_stage, 137 uint64_t *used_by_other_stage_patches) 138{ 139 bool progress = false; 140 uint64_t *used; 141 142 assert(mode == nir_var_shader_in || mode == nir_var_shader_out); 143 144 nir_foreach_variable_with_modes_safe(var, shader, mode) { 145 if (var->data.patch) 146 used = used_by_other_stage_patches; 147 else 148 used = used_by_other_stage; 149 150 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0) 151 if (shader->info.stage != MESA_SHADER_MESH || var->data.location != VARYING_SLOT_PRIMITIVE_ID) 152 continue; 153 154 if (var->data.always_active_io) 155 continue; 156 157 if (var->data.explicit_xfb_buffer) 158 continue; 159 160 uint64_t other_stage = used[var->data.location_frac]; 161 162 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) { 163 /* This one is invalid, make it a global variable instead */ 164 if (shader->info.stage == MESA_SHADER_MESH && 165 (shader->info.outputs_read & BITFIELD64_BIT(var->data.location))) 166 var->data.mode = nir_var_mem_shared; 167 else 168 var->data.mode = nir_var_shader_temp; 169 var->data.location = 0; 170 171 progress = true; 172 } 173 } 174 175 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 176 if (progress) { 177 nir_metadata_preserve(impl, nir_metadata_dominance | 178 nir_metadata_block_index); 179 nir_fixup_deref_modes(shader); 180 } else { 181 nir_metadata_preserve(impl, nir_metadata_all); 182 } 183 184 return progress; 185} 186 187bool 188nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) 189{ 190 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 191 assert(consumer->info.stage != MESA_SHADER_VERTEX); 192 193 uint64_t read[4] = { 0 }, written[4] = { 0 }; 194 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 }; 195 196 nir_foreach_shader_out_variable(var, producer) { 197 for (unsigned i = 0; i < get_num_components(var); i++) { 198 if (var->data.patch) { 199 if (is_non_generic_patch_var(var)) 200 continue; 201 202 patches_written[var->data.location_frac + i] |= 203 get_variable_io_mask(var, producer->info.stage); 204 } else { 205 written[var->data.location_frac + i] |= 206 get_variable_io_mask(var, producer->info.stage); 207 } 208 } 209 } 210 211 nir_foreach_shader_in_variable(var, consumer) { 212 for (unsigned i = 0; i < get_num_components(var); i++) { 213 if (var->data.patch) { 214 if (is_non_generic_patch_var(var)) 215 continue; 216 217 patches_read[var->data.location_frac + i] |= 218 get_variable_io_mask(var, consumer->info.stage); 219 } else { 220 read[var->data.location_frac + i] |= 221 get_variable_io_mask(var, consumer->info.stage); 222 } 223 } 224 } 225 226 /* Each TCS invocation can read data written by other TCS invocations, 227 * so even if the outputs are not used by the TES we must also make 228 * sure they are not read by the TCS before demoting them to globals. 229 */ 230 if (producer->info.stage == MESA_SHADER_TESS_CTRL) 231 tcs_add_output_reads(producer, read, patches_read); 232 233 bool progress = false; 234 progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read, 235 patches_read); 236 237 progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written, 238 patches_written) || progress; 239 240 return progress; 241} 242 243static uint8_t 244get_interp_type(nir_variable *var, const struct glsl_type *type, 245 bool default_to_smooth_interp) 246{ 247 if (var->data.per_primitive) 248 return INTERP_MODE_NONE; 249 if (glsl_type_is_integer(type)) 250 return INTERP_MODE_FLAT; 251 else if (var->data.interpolation != INTERP_MODE_NONE) 252 return var->data.interpolation; 253 else if (default_to_smooth_interp) 254 return INTERP_MODE_SMOOTH; 255 else 256 return INTERP_MODE_NONE; 257} 258 259#define INTERPOLATE_LOC_SAMPLE 0 260#define INTERPOLATE_LOC_CENTROID 1 261#define INTERPOLATE_LOC_CENTER 2 262 263static uint8_t 264get_interp_loc(nir_variable *var) 265{ 266 if (var->data.sample) 267 return INTERPOLATE_LOC_SAMPLE; 268 else if (var->data.centroid) 269 return INTERPOLATE_LOC_CENTROID; 270 else 271 return INTERPOLATE_LOC_CENTER; 272} 273 274static bool 275is_packing_supported_for_type(const struct glsl_type *type) 276{ 277 /* We ignore complex types such as arrays, matrices, structs and bitsizes 278 * other then 32bit. All other vector types should have been split into 279 * scalar variables by the lower_io_to_scalar pass. The only exception 280 * should be OpenGL xfb varyings. 281 * TODO: add support for more complex types? 282 */ 283 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type); 284} 285 286struct assigned_comps 287{ 288 uint8_t comps; 289 uint8_t interp_type; 290 uint8_t interp_loc; 291 bool is_32bit; 292 bool is_mediump; 293 bool is_per_primitive; 294}; 295 296/* Packing arrays and dual slot varyings is difficult so to avoid complex 297 * algorithms this function just assigns them their existing location for now. 298 * TODO: allow better packing of complex types. 299 */ 300static void 301get_unmoveable_components_masks(nir_shader *shader, 302 nir_variable_mode mode, 303 struct assigned_comps *comps, 304 gl_shader_stage stage, 305 bool default_to_smooth_interp) 306{ 307 nir_foreach_variable_with_modes_safe(var, shader, mode) { 308 assert(var->data.location >= 0); 309 310 /* Only remap things that aren't built-ins. */ 311 if (var->data.location >= VARYING_SLOT_VAR0 && 312 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 313 314 const struct glsl_type *type = var->type; 315 if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 316 assert(glsl_type_is_array(type)); 317 type = glsl_get_array_element(type); 318 } 319 320 /* If we can pack this varying then don't mark the components as 321 * used. 322 */ 323 if (is_packing_supported_for_type(type) && 324 !var->data.always_active_io) 325 continue; 326 327 unsigned location = var->data.location - VARYING_SLOT_VAR0; 328 329 unsigned elements = 330 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? 331 glsl_get_vector_elements(glsl_without_array(type)) : 4; 332 333 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type)); 334 unsigned slots = glsl_count_attribute_slots(type, false); 335 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1; 336 unsigned comps_slot2 = 0; 337 for (unsigned i = 0; i < slots; i++) { 338 if (dual_slot) { 339 if (i & 1) { 340 comps[location + i].comps |= ((1 << comps_slot2) - 1); 341 } else { 342 unsigned num_comps = 4 - var->data.location_frac; 343 comps_slot2 = (elements * dmul) - num_comps; 344 345 /* Assume ARB_enhanced_layouts packing rules for doubles */ 346 assert(var->data.location_frac == 0 || 347 var->data.location_frac == 2); 348 assert(comps_slot2 <= 4); 349 350 comps[location + i].comps |= 351 ((1 << num_comps) - 1) << var->data.location_frac; 352 } 353 } else { 354 comps[location + i].comps |= 355 ((1 << (elements * dmul)) - 1) << var->data.location_frac; 356 } 357 358 comps[location + i].interp_type = 359 get_interp_type(var, type, default_to_smooth_interp); 360 comps[location + i].interp_loc = get_interp_loc(var); 361 comps[location + i].is_32bit = 362 glsl_type_is_32bit(glsl_without_array(type)); 363 comps[location + i].is_mediump = 364 var->data.precision == GLSL_PRECISION_MEDIUM || 365 var->data.precision == GLSL_PRECISION_LOW; 366 comps[location + i].is_per_primitive = var->data.per_primitive; 367 } 368 } 369 } 370} 371 372struct varying_loc 373{ 374 uint8_t component; 375 uint32_t location; 376}; 377 378static void 379mark_all_used_slots(nir_variable *var, uint64_t *slots_used, 380 uint64_t slots_used_mask, unsigned num_slots) 381{ 382 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 383 384 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask & 385 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 386} 387 388static void 389mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset) 390{ 391 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 392 393 slots_used[var->data.patch ? 1 : 0] |= 394 BITFIELD64_BIT(var->data.location - loc_offset + offset); 395} 396 397static void 398remap_slots_and_components(nir_shader *shader, nir_variable_mode mode, 399 struct varying_loc (*remap)[4], 400 uint64_t *slots_used, uint64_t *out_slots_read, 401 uint32_t *p_slots_used, uint32_t *p_out_slots_read) 402 { 403 const gl_shader_stage stage = shader->info.stage; 404 uint64_t out_slots_read_tmp[2] = {0}; 405 uint64_t slots_used_tmp[2] = {0}; 406 407 /* We don't touch builtins so just copy the bitmask */ 408 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0); 409 410 nir_foreach_variable_with_modes(var, shader, mode) { 411 assert(var->data.location >= 0); 412 413 /* Only remap things that aren't built-ins */ 414 if (var->data.location >= VARYING_SLOT_VAR0 && 415 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 416 417 const struct glsl_type *type = var->type; 418 if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 419 assert(glsl_type_is_array(type)); 420 type = glsl_get_array_element(type); 421 } 422 423 unsigned num_slots = glsl_count_attribute_slots(type, false); 424 bool used_across_stages = false; 425 bool outputs_read = false; 426 427 unsigned location = var->data.location - VARYING_SLOT_VAR0; 428 struct varying_loc *new_loc = &remap[location][var->data.location_frac]; 429 430 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 431 uint64_t used = var->data.patch ? *p_slots_used : *slots_used; 432 uint64_t outs_used = 433 var->data.patch ? *p_out_slots_read : *out_slots_read; 434 uint64_t slots = 435 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 436 437 if (slots & used) 438 used_across_stages = true; 439 440 if (slots & outs_used) 441 outputs_read = true; 442 443 if (new_loc->location) { 444 var->data.location = new_loc->location; 445 var->data.location_frac = new_loc->component; 446 } 447 448 if (var->data.always_active_io) { 449 /* We can't apply link time optimisations (specifically array 450 * splitting) to these so we need to copy the existing mask 451 * otherwise we will mess up the mask for things like partially 452 * marked arrays. 453 */ 454 if (used_across_stages) 455 mark_all_used_slots(var, slots_used_tmp, used, num_slots); 456 457 if (outputs_read) { 458 mark_all_used_slots(var, out_slots_read_tmp, outs_used, 459 num_slots); 460 } 461 } else { 462 for (unsigned i = 0; i < num_slots; i++) { 463 if (used_across_stages) 464 mark_used_slot(var, slots_used_tmp, i); 465 466 if (outputs_read) 467 mark_used_slot(var, out_slots_read_tmp, i); 468 } 469 } 470 } 471 } 472 473 *slots_used = slots_used_tmp[0]; 474 *out_slots_read = out_slots_read_tmp[0]; 475 *p_slots_used = slots_used_tmp[1]; 476 *p_out_slots_read = out_slots_read_tmp[1]; 477} 478 479struct varying_component { 480 nir_variable *var; 481 uint8_t interp_type; 482 uint8_t interp_loc; 483 bool is_32bit; 484 bool is_patch; 485 bool is_per_primitive; 486 bool is_mediump; 487 bool is_intra_stage_only; 488 bool initialised; 489}; 490 491static int 492cmp_varying_component(const void *comp1_v, const void *comp2_v) 493{ 494 struct varying_component *comp1 = (struct varying_component *) comp1_v; 495 struct varying_component *comp2 = (struct varying_component *) comp2_v; 496 497 /* We want patches to be order at the end of the array */ 498 if (comp1->is_patch != comp2->is_patch) 499 return comp1->is_patch ? 1 : -1; 500 501 /* Sort per-primitive outputs after per-vertex ones to allow 502 * better compaction when they are mixed in the shader's source. 503 */ 504 if (comp1->is_per_primitive != comp2->is_per_primitive) 505 return comp1->is_per_primitive ? 1 : -1; 506 507 /* We want to try to group together TCS outputs that are only read by other 508 * TCS invocations and not consumed by the follow stage. 509 */ 510 if (comp1->is_intra_stage_only != comp2->is_intra_stage_only) 511 return comp1->is_intra_stage_only ? 1 : -1; 512 513 /* Group mediump varyings together. */ 514 if (comp1->is_mediump != comp2->is_mediump) 515 return comp1->is_mediump ? 1 : -1; 516 517 /* We can only pack varyings with matching interpolation types so group 518 * them together. 519 */ 520 if (comp1->interp_type != comp2->interp_type) 521 return comp1->interp_type - comp2->interp_type; 522 523 /* Interpolation loc must match also. */ 524 if (comp1->interp_loc != comp2->interp_loc) 525 return comp1->interp_loc - comp2->interp_loc; 526 527 /* If everything else matches just use the original location to sort */ 528 const struct nir_variable_data *const data1 = &comp1->var->data; 529 const struct nir_variable_data *const data2 = &comp2->var->data; 530 if (data1->location != data2->location) 531 return data1->location - data2->location; 532 return (int)data1->location_frac - (int)data2->location_frac; 533} 534 535static void 536gather_varying_component_info(nir_shader *producer, nir_shader *consumer, 537 struct varying_component **varying_comp_info, 538 unsigned *varying_comp_info_size, 539 bool default_to_smooth_interp) 540{ 541 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; 542 unsigned num_of_comps_to_pack = 0; 543 544 /* Count the number of varying that can be packed and create a mapping 545 * of those varyings to the array we will pass to qsort. 546 */ 547 nir_foreach_shader_out_variable(var, producer) { 548 549 /* Only remap things that aren't builtins. */ 550 if (var->data.location >= VARYING_SLOT_VAR0 && 551 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 552 553 /* We can't repack xfb varyings. */ 554 if (var->data.always_active_io) 555 continue; 556 557 const struct glsl_type *type = var->type; 558 if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) { 559 assert(glsl_type_is_array(type)); 560 type = glsl_get_array_element(type); 561 } 562 563 if (!is_packing_supported_for_type(type)) 564 continue; 565 566 unsigned loc = var->data.location - VARYING_SLOT_VAR0; 567 store_varying_info_idx[loc][var->data.location_frac] = 568 ++num_of_comps_to_pack; 569 } 570 } 571 572 *varying_comp_info_size = num_of_comps_to_pack; 573 *varying_comp_info = rzalloc_array(NULL, struct varying_component, 574 num_of_comps_to_pack); 575 576 nir_function_impl *impl = nir_shader_get_entrypoint(consumer); 577 578 /* Walk over the shader and populate the varying component info array */ 579 nir_foreach_block(block, impl) { 580 nir_foreach_instr(instr, block) { 581 if (instr->type != nir_instr_type_intrinsic) 582 continue; 583 584 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 585 if (intr->intrinsic != nir_intrinsic_load_deref && 586 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid && 587 intr->intrinsic != nir_intrinsic_interp_deref_at_sample && 588 intr->intrinsic != nir_intrinsic_interp_deref_at_offset && 589 intr->intrinsic != nir_intrinsic_interp_deref_at_vertex) 590 continue; 591 592 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 593 if (!nir_deref_mode_is(deref, nir_var_shader_in)) 594 continue; 595 596 /* We only remap things that aren't builtins. */ 597 nir_variable *in_var = nir_deref_instr_get_variable(deref); 598 if (in_var->data.location < VARYING_SLOT_VAR0) 599 continue; 600 601 unsigned location = in_var->data.location - VARYING_SLOT_VAR0; 602 if (location >= MAX_VARYINGS_INCL_PATCH) 603 continue; 604 605 unsigned var_info_idx = 606 store_varying_info_idx[location][in_var->data.location_frac]; 607 if (!var_info_idx) 608 continue; 609 610 struct varying_component *vc_info = 611 &(*varying_comp_info)[var_info_idx-1]; 612 613 if (!vc_info->initialised) { 614 const struct glsl_type *type = in_var->type; 615 if (nir_is_arrayed_io(in_var, consumer->info.stage) || 616 in_var->data.per_view) { 617 assert(glsl_type_is_array(type)); 618 type = glsl_get_array_element(type); 619 } 620 621 vc_info->var = in_var; 622 vc_info->interp_type = 623 get_interp_type(in_var, type, default_to_smooth_interp); 624 vc_info->interp_loc = get_interp_loc(in_var); 625 vc_info->is_32bit = glsl_type_is_32bit(type); 626 vc_info->is_patch = in_var->data.patch; 627 vc_info->is_per_primitive = in_var->data.per_primitive; 628 vc_info->is_mediump = !producer->options->linker_ignore_precision && 629 (in_var->data.precision == GLSL_PRECISION_MEDIUM || 630 in_var->data.precision == GLSL_PRECISION_LOW); 631 vc_info->is_intra_stage_only = false; 632 vc_info->initialised = true; 633 } 634 } 635 } 636 637 /* Walk over the shader and populate the varying component info array 638 * for varyings which are read by other TCS instances but are not consumed 639 * by the TES. 640 */ 641 if (producer->info.stage == MESA_SHADER_TESS_CTRL) { 642 impl = nir_shader_get_entrypoint(producer); 643 644 nir_foreach_block(block, impl) { 645 nir_foreach_instr(instr, block) { 646 if (instr->type != nir_instr_type_intrinsic) 647 continue; 648 649 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 650 if (intr->intrinsic != nir_intrinsic_load_deref) 651 continue; 652 653 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 654 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 655 continue; 656 657 /* We only remap things that aren't builtins. */ 658 nir_variable *out_var = nir_deref_instr_get_variable(deref); 659 if (out_var->data.location < VARYING_SLOT_VAR0) 660 continue; 661 662 unsigned location = out_var->data.location - VARYING_SLOT_VAR0; 663 if (location >= MAX_VARYINGS_INCL_PATCH) 664 continue; 665 666 unsigned var_info_idx = 667 store_varying_info_idx[location][out_var->data.location_frac]; 668 if (!var_info_idx) { 669 /* Something went wrong, the shader interfaces didn't match, so 670 * abandon packing. This can happen for example when the 671 * inputs are scalars but the outputs are struct members. 672 */ 673 *varying_comp_info_size = 0; 674 break; 675 } 676 677 struct varying_component *vc_info = 678 &(*varying_comp_info)[var_info_idx-1]; 679 680 if (!vc_info->initialised) { 681 const struct glsl_type *type = out_var->type; 682 if (nir_is_arrayed_io(out_var, producer->info.stage)) { 683 assert(glsl_type_is_array(type)); 684 type = glsl_get_array_element(type); 685 } 686 687 vc_info->var = out_var; 688 vc_info->interp_type = 689 get_interp_type(out_var, type, default_to_smooth_interp); 690 vc_info->interp_loc = get_interp_loc(out_var); 691 vc_info->is_32bit = glsl_type_is_32bit(type); 692 vc_info->is_patch = out_var->data.patch; 693 vc_info->is_per_primitive = out_var->data.per_primitive; 694 vc_info->is_mediump = !producer->options->linker_ignore_precision && 695 (out_var->data.precision == GLSL_PRECISION_MEDIUM || 696 out_var->data.precision == GLSL_PRECISION_LOW); 697 vc_info->is_intra_stage_only = true; 698 vc_info->initialised = true; 699 } 700 } 701 } 702 } 703 704 for (unsigned i = 0; i < *varying_comp_info_size; i++ ) { 705 struct varying_component *vc_info = &(*varying_comp_info)[i]; 706 if (!vc_info->initialised) { 707 /* Something went wrong, the shader interfaces didn't match, so 708 * abandon packing. This can happen for example when the outputs are 709 * scalars but the inputs are struct members. 710 */ 711 *varying_comp_info_size = 0; 712 break; 713 } 714 } 715} 716 717static bool 718allow_pack_interp_type(nir_pack_varying_options options, int type) 719{ 720 int sel; 721 722 switch (type) { 723 case INTERP_MODE_NONE: 724 sel = nir_pack_varying_interp_mode_none; 725 break; 726 case INTERP_MODE_SMOOTH: 727 sel = nir_pack_varying_interp_mode_smooth; 728 break; 729 case INTERP_MODE_FLAT: 730 sel = nir_pack_varying_interp_mode_flat; 731 break; 732 case INTERP_MODE_NOPERSPECTIVE: 733 sel = nir_pack_varying_interp_mode_noperspective; 734 break; 735 default: 736 return false; 737 } 738 739 return options & sel; 740} 741 742static bool 743allow_pack_interp_loc(nir_pack_varying_options options, int loc) 744{ 745 int sel; 746 747 switch (loc) { 748 case INTERPOLATE_LOC_SAMPLE: 749 sel = nir_pack_varying_interp_loc_sample; 750 break; 751 case INTERPOLATE_LOC_CENTROID: 752 sel = nir_pack_varying_interp_loc_centroid; 753 break; 754 case INTERPOLATE_LOC_CENTER: 755 sel = nir_pack_varying_interp_loc_center; 756 break; 757 default: 758 return false; 759 } 760 761 return options & sel; 762} 763 764static void 765assign_remap_locations(struct varying_loc (*remap)[4], 766 struct assigned_comps *assigned_comps, 767 struct varying_component *info, 768 unsigned *cursor, unsigned *comp, 769 unsigned max_location, 770 nir_pack_varying_options options) 771{ 772 unsigned tmp_cursor = *cursor; 773 unsigned tmp_comp = *comp; 774 775 for (; tmp_cursor < max_location; tmp_cursor++) { 776 777 if (assigned_comps[tmp_cursor].comps) { 778 /* Don't pack per-primitive and per-vertex varyings together. */ 779 if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) { 780 tmp_comp = 0; 781 continue; 782 } 783 784 /* We can only pack varyings with matching precision. */ 785 if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) { 786 tmp_comp = 0; 787 continue; 788 } 789 790 /* We can only pack varyings with matching interpolation type 791 * if driver does not support it. 792 */ 793 if (assigned_comps[tmp_cursor].interp_type != info->interp_type && 794 (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) || 795 !allow_pack_interp_type(options, info->interp_type))) { 796 tmp_comp = 0; 797 continue; 798 } 799 800 /* We can only pack varyings with matching interpolation location 801 * if driver does not support it. 802 */ 803 if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc && 804 (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) || 805 !allow_pack_interp_loc(options, info->interp_loc))) { 806 tmp_comp = 0; 807 continue; 808 } 809 810 /* We can only pack varyings with matching types, and the current 811 * algorithm only supports packing 32-bit. 812 */ 813 if (!assigned_comps[tmp_cursor].is_32bit) { 814 tmp_comp = 0; 815 continue; 816 } 817 818 while (tmp_comp < 4 && 819 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) { 820 tmp_comp++; 821 } 822 } 823 824 if (tmp_comp == 4) { 825 tmp_comp = 0; 826 continue; 827 } 828 829 unsigned location = info->var->data.location - VARYING_SLOT_VAR0; 830 831 /* Once we have assigned a location mark it as used */ 832 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp); 833 assigned_comps[tmp_cursor].interp_type = info->interp_type; 834 assigned_comps[tmp_cursor].interp_loc = info->interp_loc; 835 assigned_comps[tmp_cursor].is_32bit = info->is_32bit; 836 assigned_comps[tmp_cursor].is_mediump = info->is_mediump; 837 assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive; 838 839 /* Assign remap location */ 840 remap[location][info->var->data.location_frac].component = tmp_comp++; 841 remap[location][info->var->data.location_frac].location = 842 tmp_cursor + VARYING_SLOT_VAR0; 843 844 break; 845 } 846 847 *cursor = tmp_cursor; 848 *comp = tmp_comp; 849} 850 851/* If there are empty components in the slot compact the remaining components 852 * as close to component 0 as possible. This will make it easier to fill the 853 * empty components with components from a different slot in a following pass. 854 */ 855static void 856compact_components(nir_shader *producer, nir_shader *consumer, 857 struct assigned_comps *assigned_comps, 858 bool default_to_smooth_interp) 859{ 860 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}}; 861 struct varying_component *varying_comp_info; 862 unsigned varying_comp_info_size; 863 864 /* Gather varying component info */ 865 gather_varying_component_info(producer, consumer, &varying_comp_info, 866 &varying_comp_info_size, 867 default_to_smooth_interp); 868 869 /* Sort varying components. */ 870 qsort(varying_comp_info, varying_comp_info_size, 871 sizeof(struct varying_component), cmp_varying_component); 872 873 nir_pack_varying_options options = consumer->options->pack_varying_options; 874 875 unsigned cursor = 0; 876 unsigned comp = 0; 877 878 /* Set the remap array based on the sorted components */ 879 for (unsigned i = 0; i < varying_comp_info_size; i++ ) { 880 struct varying_component *info = &varying_comp_info[i]; 881 882 assert(info->is_patch || cursor < MAX_VARYING); 883 if (info->is_patch) { 884 /* The list should be sorted with all non-patch inputs first followed 885 * by patch inputs. When we hit our first patch input, we need to 886 * reset the cursor to MAX_VARYING so we put them in the right slot. 887 */ 888 if (cursor < MAX_VARYING) { 889 cursor = MAX_VARYING; 890 comp = 0; 891 } 892 893 assign_remap_locations(remap, assigned_comps, info, 894 &cursor, &comp, MAX_VARYINGS_INCL_PATCH, 895 options); 896 } else { 897 assign_remap_locations(remap, assigned_comps, info, 898 &cursor, &comp, MAX_VARYING, 899 options); 900 901 /* Check if we failed to assign a remap location. This can happen if 902 * for example there are a bunch of unmovable components with 903 * mismatching interpolation types causing us to skip over locations 904 * that would have been useful for packing later components. 905 * The solution is to iterate over the locations again (this should 906 * happen very rarely in practice). 907 */ 908 if (cursor == MAX_VARYING) { 909 cursor = 0; 910 comp = 0; 911 assign_remap_locations(remap, assigned_comps, info, 912 &cursor, &comp, MAX_VARYING, 913 options); 914 } 915 } 916 } 917 918 ralloc_free(varying_comp_info); 919 920 uint64_t zero = 0; 921 uint32_t zero32 = 0; 922 remap_slots_and_components(consumer, nir_var_shader_in, remap, 923 &consumer->info.inputs_read, &zero, 924 &consumer->info.patch_inputs_read, &zero32); 925 remap_slots_and_components(producer, nir_var_shader_out, remap, 926 &producer->info.outputs_written, 927 &producer->info.outputs_read, 928 &producer->info.patch_outputs_written, 929 &producer->info.patch_outputs_read); 930} 931 932/* We assume that this has been called more-or-less directly after 933 * remove_unused_varyings. At this point, all of the varyings that we 934 * aren't going to be using have been completely removed and the 935 * inputs_read and outputs_written fields in nir_shader_info reflect 936 * this. Therefore, the total set of valid slots is the OR of the two 937 * sets of varyings; this accounts for varyings which one side may need 938 * to read/write even if the other doesn't. This can happen if, for 939 * instance, an array is used indirectly from one side causing it to be 940 * unsplittable but directly from the other. 941 */ 942void 943nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 944 bool default_to_smooth_interp) 945{ 946 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 947 assert(consumer->info.stage != MESA_SHADER_VERTEX); 948 949 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}}; 950 951 get_unmoveable_components_masks(producer, nir_var_shader_out, 952 assigned_comps, 953 producer->info.stage, 954 default_to_smooth_interp); 955 get_unmoveable_components_masks(consumer, nir_var_shader_in, 956 assigned_comps, 957 consumer->info.stage, 958 default_to_smooth_interp); 959 960 compact_components(producer, consumer, assigned_comps, 961 default_to_smooth_interp); 962} 963 964/* 965 * Mark XFB varyings as always_active_io in the consumer so the linking opts 966 * don't touch them. 967 */ 968void 969nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer) 970{ 971 nir_variable *input_vars[MAX_VARYING][4] = { 0 }; 972 973 nir_foreach_shader_in_variable(var, consumer) { 974 if (var->data.location >= VARYING_SLOT_VAR0 && 975 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 976 977 unsigned location = var->data.location - VARYING_SLOT_VAR0; 978 input_vars[location][var->data.location_frac] = var; 979 } 980 } 981 982 nir_foreach_shader_out_variable(var, producer) { 983 if (var->data.location >= VARYING_SLOT_VAR0 && 984 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 985 986 if (!var->data.always_active_io) 987 continue; 988 989 unsigned location = var->data.location - VARYING_SLOT_VAR0; 990 if (input_vars[location][var->data.location_frac]) { 991 input_vars[location][var->data.location_frac]->data.always_active_io = true; 992 } 993 } 994 } 995} 996 997static bool 998does_varying_match(nir_variable *out_var, nir_variable *in_var) 999{ 1000 return in_var->data.location == out_var->data.location && 1001 in_var->data.location_frac == out_var->data.location_frac; 1002} 1003 1004static nir_variable * 1005get_matching_input_var(nir_shader *consumer, nir_variable *out_var) 1006{ 1007 nir_foreach_shader_in_variable(var, consumer) { 1008 if (does_varying_match(out_var, var)) 1009 return var; 1010 } 1011 1012 return NULL; 1013} 1014 1015static bool 1016can_replace_varying(nir_variable *out_var) 1017{ 1018 /* Skip types that require more complex handling. 1019 * TODO: add support for these types. 1020 */ 1021 if (glsl_type_is_array(out_var->type) || 1022 glsl_type_is_dual_slot(out_var->type) || 1023 glsl_type_is_matrix(out_var->type) || 1024 glsl_type_is_struct_or_ifc(out_var->type)) 1025 return false; 1026 1027 /* Limit this pass to scalars for now to keep things simple. Most varyings 1028 * should have been lowered to scalars at this point anyway. 1029 */ 1030 if (!glsl_type_is_scalar(out_var->type)) 1031 return false; 1032 1033 if (out_var->data.location < VARYING_SLOT_VAR0 || 1034 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING) 1035 return false; 1036 1037 return true; 1038} 1039 1040static bool 1041replace_varying_input_by_constant_load(nir_shader *shader, 1042 nir_intrinsic_instr *store_intr) 1043{ 1044 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1045 1046 nir_builder b; 1047 nir_builder_init(&b, impl); 1048 1049 nir_variable *out_var = 1050 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0])); 1051 1052 bool progress = false; 1053 nir_foreach_block(block, impl) { 1054 nir_foreach_instr(instr, block) { 1055 if (instr->type != nir_instr_type_intrinsic) 1056 continue; 1057 1058 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1059 if (intr->intrinsic != nir_intrinsic_load_deref) 1060 continue; 1061 1062 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1063 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1064 continue; 1065 1066 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1067 1068 if (!does_varying_match(out_var, in_var)) 1069 continue; 1070 1071 b.cursor = nir_before_instr(instr); 1072 1073 nir_load_const_instr *out_const = 1074 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr); 1075 1076 /* Add new const to replace the input */ 1077 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components, 1078 intr->dest.ssa.bit_size, 1079 out_const->value); 1080 1081 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst); 1082 1083 progress = true; 1084 } 1085 } 1086 1087 return progress; 1088} 1089 1090static bool 1091replace_duplicate_input(nir_shader *shader, nir_variable *input_var, 1092 nir_intrinsic_instr *dup_store_intr) 1093{ 1094 assert(input_var); 1095 1096 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1097 1098 nir_builder b; 1099 nir_builder_init(&b, impl); 1100 1101 nir_variable *dup_out_var = 1102 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0])); 1103 1104 bool progress = false; 1105 nir_foreach_block(block, impl) { 1106 nir_foreach_instr(instr, block) { 1107 if (instr->type != nir_instr_type_intrinsic) 1108 continue; 1109 1110 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1111 if (intr->intrinsic != nir_intrinsic_load_deref) 1112 continue; 1113 1114 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1115 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1116 continue; 1117 1118 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1119 1120 if (!does_varying_match(dup_out_var, in_var) || 1121 in_var->data.interpolation != input_var->data.interpolation || 1122 get_interp_loc(in_var) != get_interp_loc(input_var)) 1123 continue; 1124 1125 b.cursor = nir_before_instr(instr); 1126 1127 nir_ssa_def *load = nir_load_var(&b, input_var); 1128 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); 1129 1130 progress = true; 1131 } 1132 } 1133 1134 return progress; 1135} 1136 1137static bool 1138is_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s) 1139{ 1140 /* def is sure to be scalar as can_replace_varying() filter out vector case. */ 1141 assert(def->num_components == 1); 1142 1143 /* Uniform load may hide behind some move instruction for converting 1144 * vector to scalar: 1145 * 1146 * vec1 32 ssa_1 = deref_var &color (uniform vec3) 1147 * vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0) 1148 * vec1 32 ssa_3 = mov ssa_2.x 1149 * vec1 32 ssa_4 = deref_var &color_out (shader_out float) 1150 * intrinsic store_deref (ssa_4, ssa_3) (1, 0) 1151 */ 1152 *s = nir_ssa_scalar_resolved(def, 0); 1153 1154 nir_ssa_def *ssa = s->def; 1155 if (ssa->parent_instr->type != nir_instr_type_intrinsic) 1156 return false; 1157 1158 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr); 1159 if (intr->intrinsic != nir_intrinsic_load_deref) 1160 return false; 1161 1162 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 1163 /* TODO: support nir_var_mem_ubo. */ 1164 if (!nir_deref_mode_is(deref, nir_var_uniform)) 1165 return false; 1166 1167 /* Does not support indirect uniform load. */ 1168 return !nir_deref_instr_has_indirect(deref); 1169} 1170 1171static nir_variable * 1172get_uniform_var_in_consumer(nir_shader *consumer, 1173 nir_variable *var_in_producer) 1174{ 1175 /* Find if uniform already exists in consumer. */ 1176 nir_variable *new_var = NULL; 1177 nir_foreach_uniform_variable(v, consumer) { 1178 if (!strcmp(var_in_producer->name, v->name)) { 1179 new_var = v; 1180 break; 1181 } 1182 } 1183 1184 /* Create a variable if not exist. */ 1185 if (!new_var) { 1186 new_var = nir_variable_clone(var_in_producer, consumer); 1187 nir_shader_add_variable(consumer, new_var); 1188 } 1189 1190 return new_var; 1191} 1192 1193static nir_deref_instr * 1194clone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref) 1195{ 1196 if (deref->deref_type == nir_deref_type_var) 1197 return nir_build_deref_var(b, var); 1198 1199 nir_deref_instr *parent_deref = nir_deref_instr_parent(deref); 1200 nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref); 1201 1202 /* Build array and struct deref instruction. 1203 * "deref" instr is sure to be direct (see is_direct_uniform_load()). 1204 */ 1205 switch (deref->deref_type) { 1206 case nir_deref_type_array: { 1207 nir_load_const_instr *index = 1208 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr); 1209 return nir_build_deref_array_imm(b, parent, index->value->i64); 1210 } 1211 case nir_deref_type_ptr_as_array: { 1212 nir_load_const_instr *index = 1213 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr); 1214 nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64, 1215 parent->dest.ssa.bit_size); 1216 return nir_build_deref_ptr_as_array(b, parent, ssa); 1217 } 1218 case nir_deref_type_struct: 1219 return nir_build_deref_struct(b, parent, deref->strct.index); 1220 default: 1221 unreachable("invalid type"); 1222 return NULL; 1223 } 1224} 1225 1226static bool 1227replace_varying_input_by_uniform_load(nir_shader *shader, 1228 nir_intrinsic_instr *store_intr, 1229 nir_ssa_scalar *scalar) 1230{ 1231 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1232 1233 nir_builder b; 1234 nir_builder_init(&b, impl); 1235 1236 nir_variable *out_var = 1237 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0])); 1238 1239 nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr); 1240 nir_deref_instr *deref = nir_src_as_deref(load->src[0]); 1241 nir_variable *uni_var = nir_deref_instr_get_variable(deref); 1242 uni_var = get_uniform_var_in_consumer(shader, uni_var); 1243 1244 bool progress = false; 1245 nir_foreach_block(block, impl) { 1246 nir_foreach_instr(instr, block) { 1247 if (instr->type != nir_instr_type_intrinsic) 1248 continue; 1249 1250 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1251 if (intr->intrinsic != nir_intrinsic_load_deref) 1252 continue; 1253 1254 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1255 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1256 continue; 1257 1258 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1259 1260 if (!does_varying_match(out_var, in_var)) 1261 continue; 1262 1263 b.cursor = nir_before_instr(instr); 1264 1265 /* Clone instructions start from deref load to variable deref. */ 1266 nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref); 1267 nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref); 1268 1269 /* Add a vector to scalar move if uniform is a vector. */ 1270 if (uni_def->num_components > 1) { 1271 nir_alu_src src = {0}; 1272 src.src = nir_src_for_ssa(uni_def); 1273 src.swizzle[0] = scalar->comp; 1274 uni_def = nir_mov_alu(&b, src, 1); 1275 } 1276 1277 /* Replace load input with load uniform. */ 1278 nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def); 1279 1280 progress = true; 1281 } 1282 } 1283 1284 return progress; 1285} 1286 1287/* The GLSL ES 3.20 spec says: 1288 * 1289 * "The precision of a vertex output does not need to match the precision of 1290 * the corresponding fragment input. The minimum precision at which vertex 1291 * outputs are interpolated is the minimum of the vertex output precision and 1292 * the fragment input precision, with the exception that for highp, 1293 * implementations do not have to support full IEEE 754 precision." (9.1 "Input 1294 * Output Matching by Name in Linked Programs") 1295 * 1296 * To implement this, when linking shaders we will take the minimum precision 1297 * qualifier (allowing drivers to interpolate at lower precision). For 1298 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec 1299 * requires we use the *last* specified precision if there is a conflict. 1300 * 1301 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is 1302 * NONE, we'll return the other precision, since there is no conflict. 1303 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH, 1304 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is 1305 * "backwards". For non-fragment stages, we'll pick the latter precision to 1306 * comply with the spec. (Note that the order matters.) 1307 * 1308 * For streamout, "Variables declared with lowp or mediump precision are 1309 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341 1310 * of OpenGL ES 3.2 specification). So drivers should promote them 1311 * the transform feedback memory store, but not the output store. 1312 */ 1313 1314static unsigned 1315nir_link_precision(unsigned producer, unsigned consumer, bool fs) 1316{ 1317 if (producer == GLSL_PRECISION_NONE) 1318 return consumer; 1319 else if (consumer == GLSL_PRECISION_NONE) 1320 return producer; 1321 else 1322 return fs ? MAX2(producer, consumer) : consumer; 1323} 1324 1325void 1326nir_link_varying_precision(nir_shader *producer, nir_shader *consumer) 1327{ 1328 bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT; 1329 1330 nir_foreach_shader_out_variable(producer_var, producer) { 1331 /* Skip if the slot is not assigned */ 1332 if (producer_var->data.location < 0) 1333 continue; 1334 1335 nir_variable *consumer_var = nir_find_variable_with_location(consumer, 1336 nir_var_shader_in, producer_var->data.location); 1337 1338 /* Skip if the variable will be eliminated */ 1339 if (!consumer_var) 1340 continue; 1341 1342 /* Now we have a pair of variables. Let's pick the smaller precision. */ 1343 unsigned precision_1 = producer_var->data.precision; 1344 unsigned precision_2 = consumer_var->data.precision; 1345 unsigned minimum = nir_link_precision(precision_1, precision_2, frag); 1346 1347 /* Propagate the new precision */ 1348 producer_var->data.precision = consumer_var->data.precision = minimum; 1349 } 1350} 1351 1352bool 1353nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer) 1354{ 1355 /* TODO: Add support for more shader stage combinations */ 1356 if (consumer->info.stage != MESA_SHADER_FRAGMENT || 1357 (producer->info.stage != MESA_SHADER_VERTEX && 1358 producer->info.stage != MESA_SHADER_TESS_EVAL)) 1359 return false; 1360 1361 bool progress = false; 1362 1363 nir_function_impl *impl = nir_shader_get_entrypoint(producer); 1364 1365 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL); 1366 1367 /* If we find a store in the last block of the producer we can be sure this 1368 * is the only possible value for this output. 1369 */ 1370 nir_block *last_block = nir_impl_last_block(impl); 1371 nir_foreach_instr_reverse(instr, last_block) { 1372 if (instr->type != nir_instr_type_intrinsic) 1373 continue; 1374 1375 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1376 1377 if (intr->intrinsic != nir_intrinsic_store_deref) 1378 continue; 1379 1380 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]); 1381 if (!nir_deref_mode_is(out_deref, nir_var_shader_out)) 1382 continue; 1383 1384 nir_variable *out_var = nir_deref_instr_get_variable(out_deref); 1385 if (!can_replace_varying(out_var)) 1386 continue; 1387 1388 nir_ssa_def *ssa = intr->src[1].ssa; 1389 if (ssa->parent_instr->type == nir_instr_type_load_const) { 1390 progress |= replace_varying_input_by_constant_load(consumer, intr); 1391 continue; 1392 } 1393 1394 nir_ssa_scalar uni_scalar; 1395 if (is_direct_uniform_load(ssa, &uni_scalar)) { 1396 if (consumer->options->lower_varying_from_uniform) { 1397 progress |= replace_varying_input_by_uniform_load(consumer, intr, 1398 &uni_scalar); 1399 continue; 1400 } else { 1401 nir_variable *in_var = get_matching_input_var(consumer, out_var); 1402 /* The varying is loaded from same uniform, so no need to do any 1403 * interpolation. Mark it as flat explicitly. 1404 */ 1405 if (!consumer->options->no_integers && 1406 in_var && in_var->data.interpolation <= INTERP_MODE_NOPERSPECTIVE) { 1407 in_var->data.interpolation = INTERP_MODE_FLAT; 1408 out_var->data.interpolation = INTERP_MODE_FLAT; 1409 } 1410 } 1411 } 1412 1413 struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa); 1414 if (entry) { 1415 progress |= replace_duplicate_input(consumer, 1416 (nir_variable *) entry->data, 1417 intr); 1418 } else { 1419 nir_variable *in_var = get_matching_input_var(consumer, out_var); 1420 if (in_var) { 1421 _mesa_hash_table_insert(varying_values, ssa, in_var); 1422 } 1423 } 1424 } 1425 1426 _mesa_hash_table_destroy(varying_values, NULL); 1427 1428 return progress; 1429} 1430 1431/* TODO any better helper somewhere to sort a list? */ 1432 1433static void 1434insert_sorted(struct exec_list *var_list, nir_variable *new_var) 1435{ 1436 nir_foreach_variable_in_list(var, var_list) { 1437 /* Use the `per_primitive` bool to sort per-primitive variables 1438 * to the end of the list, so they get the last driver locations 1439 * by nir_assign_io_var_locations. 1440 * 1441 * This is done because AMD HW requires that per-primitive outputs 1442 * are the last params. 1443 * In the future we can add an option for this, if needed by other HW. 1444 */ 1445 if (new_var->data.per_primitive < var->data.per_primitive || 1446 (new_var->data.per_primitive == var->data.per_primitive && 1447 (var->data.location > new_var->data.location || 1448 (var->data.location == new_var->data.location && 1449 var->data.location_frac > new_var->data.location_frac)))) { 1450 exec_node_insert_node_before(&var->node, &new_var->node); 1451 return; 1452 } 1453 } 1454 exec_list_push_tail(var_list, &new_var->node); 1455} 1456 1457static void 1458sort_varyings(nir_shader *shader, nir_variable_mode mode, 1459 struct exec_list *sorted_list) 1460{ 1461 exec_list_make_empty(sorted_list); 1462 nir_foreach_variable_with_modes_safe(var, shader, mode) { 1463 exec_node_remove(&var->node); 1464 insert_sorted(sorted_list, var); 1465 } 1466} 1467 1468void 1469nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode, 1470 unsigned *size, gl_shader_stage stage) 1471{ 1472 unsigned location = 0; 1473 unsigned assigned_locations[VARYING_SLOT_TESS_MAX]; 1474 uint64_t processed_locs[2] = {0}; 1475 1476 struct exec_list io_vars; 1477 sort_varyings(shader, mode, &io_vars); 1478 1479 int ASSERTED last_loc = 0; 1480 bool ASSERTED last_per_prim = false; 1481 bool last_partial = false; 1482 nir_foreach_variable_in_list(var, &io_vars) { 1483 const struct glsl_type *type = var->type; 1484 if (nir_is_arrayed_io(var, stage)) { 1485 assert(glsl_type_is_array(type)); 1486 type = glsl_get_array_element(type); 1487 } 1488 1489 int base; 1490 if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX) 1491 base = VERT_ATTRIB_GENERIC0; 1492 else if (var->data.mode == nir_var_shader_out && 1493 stage == MESA_SHADER_FRAGMENT) 1494 base = FRAG_RESULT_DATA0; 1495 else 1496 base = VARYING_SLOT_VAR0; 1497 1498 unsigned var_size, driver_size; 1499 if (var->data.compact) { 1500 /* If we are inside a partial compact, 1501 * don't allow another compact to be in this slot 1502 * if it starts at component 0. 1503 */ 1504 if (last_partial && var->data.location_frac == 0) { 1505 location++; 1506 } 1507 1508 /* compact variables must be arrays of scalars */ 1509 assert(!var->data.per_view); 1510 assert(glsl_type_is_array(type)); 1511 assert(glsl_type_is_scalar(glsl_get_array_element(type))); 1512 unsigned start = 4 * location + var->data.location_frac; 1513 unsigned end = start + glsl_get_length(type); 1514 var_size = driver_size = end / 4 - location; 1515 last_partial = end % 4 != 0; 1516 } else { 1517 /* Compact variables bypass the normal varying compacting pass, 1518 * which means they cannot be in the same vec4 slot as a normal 1519 * variable. If part of the current slot is taken up by a compact 1520 * variable, we need to go to the next one. 1521 */ 1522 if (last_partial) { 1523 location++; 1524 last_partial = false; 1525 } 1526 1527 /* per-view variables have an extra array dimension, which is ignored 1528 * when counting user-facing slots (var->data.location), but *not* 1529 * with driver slots (var->data.driver_location). That is, each user 1530 * slot maps to multiple driver slots. 1531 */ 1532 driver_size = glsl_count_attribute_slots(type, false); 1533 if (var->data.per_view) { 1534 assert(glsl_type_is_array(type)); 1535 var_size = 1536 glsl_count_attribute_slots(glsl_get_array_element(type), false); 1537 } else { 1538 var_size = driver_size; 1539 } 1540 } 1541 1542 /* Builtins don't allow component packing so we only need to worry about 1543 * user defined varyings sharing the same location. 1544 */ 1545 bool processed = false; 1546 if (var->data.location >= base) { 1547 unsigned glsl_location = var->data.location - base; 1548 1549 for (unsigned i = 0; i < var_size; i++) { 1550 if (processed_locs[var->data.index] & 1551 ((uint64_t)1 << (glsl_location + i))) 1552 processed = true; 1553 else 1554 processed_locs[var->data.index] |= 1555 ((uint64_t)1 << (glsl_location + i)); 1556 } 1557 } 1558 1559 /* Because component packing allows varyings to share the same location 1560 * we may have already have processed this location. 1561 */ 1562 if (processed) { 1563 /* TODO handle overlapping per-view variables */ 1564 assert(!var->data.per_view); 1565 unsigned driver_location = assigned_locations[var->data.location]; 1566 var->data.driver_location = driver_location; 1567 1568 /* An array may be packed such that is crosses multiple other arrays 1569 * or variables, we need to make sure we have allocated the elements 1570 * consecutively if the previously proccessed var was shorter than 1571 * the current array we are processing. 1572 * 1573 * NOTE: The code below assumes the var list is ordered in ascending 1574 * location order, but per-vertex/per-primitive outputs may be 1575 * grouped separately. 1576 */ 1577 assert(last_loc <= var->data.location || 1578 last_per_prim != var->data.per_primitive); 1579 last_loc = var->data.location; 1580 last_per_prim = var->data.per_primitive; 1581 unsigned last_slot_location = driver_location + var_size; 1582 if (last_slot_location > location) { 1583 unsigned num_unallocated_slots = last_slot_location - location; 1584 unsigned first_unallocated_slot = var_size - num_unallocated_slots; 1585 for (unsigned i = first_unallocated_slot; i < var_size; i++) { 1586 assigned_locations[var->data.location + i] = location; 1587 location++; 1588 } 1589 } 1590 continue; 1591 } 1592 1593 for (unsigned i = 0; i < var_size; i++) { 1594 assigned_locations[var->data.location + i] = location + i; 1595 } 1596 1597 var->data.driver_location = location; 1598 location += driver_size; 1599 } 1600 1601 if (last_partial) 1602 location++; 1603 1604 exec_list_append(&shader->variables, &io_vars); 1605 *size = location; 1606} 1607 1608static uint64_t 1609get_linked_variable_location(unsigned location, bool patch) 1610{ 1611 if (!patch) 1612 return location; 1613 1614 /* Reserve locations 0...3 for special patch variables 1615 * like tess factors and bounding boxes, and the generic patch 1616 * variables will come after them. 1617 */ 1618 if (location >= VARYING_SLOT_PATCH0) 1619 return location - VARYING_SLOT_PATCH0 + 4; 1620 else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER && 1621 location <= VARYING_SLOT_BOUNDING_BOX1) 1622 return location - VARYING_SLOT_TESS_LEVEL_OUTER; 1623 else 1624 unreachable("Unsupported variable in get_linked_variable_location."); 1625} 1626 1627static uint64_t 1628get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage) 1629{ 1630 const struct glsl_type *type = variable->type; 1631 1632 if (nir_is_arrayed_io(variable, stage)) { 1633 assert(glsl_type_is_array(type)); 1634 type = glsl_get_array_element(type); 1635 } 1636 1637 unsigned slots = glsl_count_attribute_slots(type, false); 1638 if (variable->data.compact) { 1639 unsigned component_count = variable->data.location_frac + glsl_get_length(type); 1640 slots = DIV_ROUND_UP(component_count, 4); 1641 } 1642 1643 uint64_t mask = u_bit_consecutive64(0, slots); 1644 return mask; 1645} 1646 1647nir_linked_io_var_info 1648nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer) 1649{ 1650 assert(producer); 1651 assert(consumer); 1652 1653 uint64_t producer_output_mask = 0; 1654 uint64_t producer_patch_output_mask = 0; 1655 1656 nir_foreach_shader_out_variable(variable, producer) { 1657 uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage); 1658 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 1659 1660 if (variable->data.patch) 1661 producer_patch_output_mask |= mask << loc; 1662 else 1663 producer_output_mask |= mask << loc; 1664 } 1665 1666 uint64_t consumer_input_mask = 0; 1667 uint64_t consumer_patch_input_mask = 0; 1668 1669 nir_foreach_shader_in_variable(variable, consumer) { 1670 uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage); 1671 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 1672 1673 if (variable->data.patch) 1674 consumer_patch_input_mask |= mask << loc; 1675 else 1676 consumer_input_mask |= mask << loc; 1677 } 1678 1679 uint64_t io_mask = producer_output_mask | consumer_input_mask; 1680 uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask; 1681 1682 nir_foreach_shader_out_variable(variable, producer) { 1683 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 1684 1685 if (variable->data.patch) 1686 variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)); 1687 else 1688 variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)); 1689 } 1690 1691 nir_foreach_shader_in_variable(variable, consumer) { 1692 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 1693 1694 if (variable->data.patch) 1695 variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)); 1696 else 1697 variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)); 1698 } 1699 1700 nir_linked_io_var_info result = { 1701 .num_linked_io_vars = util_bitcount64(io_mask), 1702 .num_linked_patch_io_vars = util_bitcount64(patch_io_mask), 1703 }; 1704 1705 return result; 1706} 1707