1/* 2 * Copyright © 2012 Intel Corporation 3 * Copyright © 2021 Valve Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 */ 24 25/** 26 * Linker functions related specifically to linking varyings between shader 27 * stages. 28 */ 29 30#include "main/errors.h" 31#include "main/macros.h" 32#include "main/menums.h" 33#include "main/mtypes.h" 34#include "util/hash_table.h" 35#include "util/u_math.h" 36 37#include "nir.h" 38#include "nir_builder.h" 39#include "gl_nir.h" 40#include "gl_nir_link_varyings.h" 41#include "gl_nir_linker.h" 42#include "linker_util.h" 43#include "nir_gl_types.h" 44 45 46/** 47 * Get the varying type stripped of the outermost array if we're processing 48 * a stage whose varyings are arrays indexed by a vertex number (such as 49 * geometry shader inputs). 50 */ 51static const struct glsl_type * 52get_varying_type(const nir_variable *var, gl_shader_stage stage) 53{ 54 const struct glsl_type *type = var->type; 55 if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 56 assert(glsl_type_is_array(type)); 57 type = glsl_get_array_element(type); 58 } 59 60 return type; 61} 62 63static bool 64varying_has_user_specified_location(const nir_variable *var) 65{ 66 return var->data.explicit_location && 67 var->data.location >= VARYING_SLOT_VAR0; 68} 69 70static void 71create_xfb_varying_names(void *mem_ctx, const struct glsl_type *t, char **name, 72 size_t name_length, unsigned *count, 73 const char *ifc_member_name, 74 const struct glsl_type *ifc_member_t, 75 char ***varying_names) 76{ 77 if (glsl_type_is_interface(t)) { 78 size_t new_length = name_length; 79 80 assert(ifc_member_name && ifc_member_t); 81 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name); 82 83 create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count, 84 NULL, NULL, varying_names); 85 } else if (glsl_type_is_struct(t)) { 86 for (unsigned i = 0; i < glsl_get_length(t); i++) { 87 const char *field = glsl_get_struct_elem_name(t, i); 88 size_t new_length = name_length; 89 90 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); 91 92 create_xfb_varying_names(mem_ctx, glsl_get_struct_field(t, i), name, 93 new_length, count, NULL, NULL, 94 varying_names); 95 } 96 } else if (glsl_type_is_struct(glsl_without_array(t)) || 97 glsl_type_is_interface(glsl_without_array(t)) || 98 (glsl_type_is_array(t) && glsl_type_is_array(glsl_get_array_element(t)))) { 99 for (unsigned i = 0; i < glsl_get_length(t); i++) { 100 size_t new_length = name_length; 101 102 /* Append the subscript to the current variable name */ 103 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); 104 105 create_xfb_varying_names(mem_ctx, glsl_get_array_element(t), name, 106 new_length, count, ifc_member_name, 107 ifc_member_t, varying_names); 108 } 109 } else { 110 (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name); 111 } 112} 113 114static bool 115process_xfb_layout_qualifiers(void *mem_ctx, const struct gl_linked_shader *sh, 116 struct gl_shader_program *prog, 117 unsigned *num_xfb_decls, 118 char ***varying_names) 119{ 120 bool has_xfb_qualifiers = false; 121 122 /* We still need to enable transform feedback mode even if xfb_stride is 123 * only applied to a global out. Also we don't bother to propagate 124 * xfb_stride to interface block members so this will catch that case also. 125 */ 126 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 127 if (prog->TransformFeedback.BufferStride[j]) { 128 has_xfb_qualifiers = true; 129 break; 130 } 131 } 132 133 nir_foreach_shader_out_variable(var, sh->Program->nir) { 134 /* From the ARB_enhanced_layouts spec: 135 * 136 * "Any shader making any static use (after preprocessing) of any of 137 * these *xfb_* qualifiers will cause the shader to be in a 138 * transform feedback capturing mode and hence responsible for 139 * describing the transform feedback setup. This mode will capture 140 * any output selected by *xfb_offset*, directly or indirectly, to 141 * a transform feedback buffer." 142 */ 143 if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) { 144 has_xfb_qualifiers = true; 145 } 146 147 if (var->data.explicit_offset) { 148 *num_xfb_decls += glsl_varying_count(var->type); 149 has_xfb_qualifiers = true; 150 } 151 } 152 153 if (*num_xfb_decls == 0) 154 return has_xfb_qualifiers; 155 156 unsigned i = 0; 157 *varying_names = ralloc_array(mem_ctx, char *, *num_xfb_decls); 158 nir_foreach_shader_out_variable(var, sh->Program->nir) { 159 if (var->data.explicit_offset) { 160 char *name; 161 const struct glsl_type *type, *member_type; 162 163 if (var->data.from_named_ifc_block) { 164 type = var->interface_type; 165 166 /* Find the member type before it was altered by lowering */ 167 const struct glsl_type *type_wa = glsl_without_array(type); 168 member_type = 169 glsl_get_struct_field(type_wa, glsl_get_field_index(type_wa, var->name)); 170 name = ralloc_strdup(NULL, glsl_get_type_name(type_wa)); 171 } else { 172 type = var->type; 173 member_type = NULL; 174 name = ralloc_strdup(NULL, var->name); 175 } 176 create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i, 177 var->name, member_type, varying_names); 178 ralloc_free(name); 179 } 180 } 181 182 assert(i == *num_xfb_decls); 183 return has_xfb_qualifiers; 184} 185 186/** 187 * Initialize this struct based on a string that was passed to 188 * glTransformFeedbackVaryings. 189 * 190 * If the input is mal-formed, this call still succeeds, but it sets 191 * this->var_name to a mal-formed input, so xfb_decl_find_output_var() 192 * will fail to find any matching variable. 193 */ 194static void 195xfb_decl_init(struct xfb_decl *xfb_decl, const struct gl_constants *consts, 196 const struct gl_extensions *exts, const void *mem_ctx, 197 const char *input) 198{ 199 /* We don't have to be pedantic about what is a valid GLSL variable name, 200 * because any variable with an invalid name can't exist in the IR anyway. 201 */ 202 xfb_decl->location = -1; 203 xfb_decl->orig_name = input; 204 xfb_decl->lowered_builtin_array_variable = none; 205 xfb_decl->skip_components = 0; 206 xfb_decl->next_buffer_separator = false; 207 xfb_decl->matched_candidate = NULL; 208 xfb_decl->stream_id = 0; 209 xfb_decl->buffer = 0; 210 xfb_decl->offset = 0; 211 212 if (exts->ARB_transform_feedback3) { 213 /* Parse gl_NextBuffer. */ 214 if (strcmp(input, "gl_NextBuffer") == 0) { 215 xfb_decl->next_buffer_separator = true; 216 return; 217 } 218 219 /* Parse gl_SkipComponents. */ 220 if (strcmp(input, "gl_SkipComponents1") == 0) 221 xfb_decl->skip_components = 1; 222 else if (strcmp(input, "gl_SkipComponents2") == 0) 223 xfb_decl->skip_components = 2; 224 else if (strcmp(input, "gl_SkipComponents3") == 0) 225 xfb_decl->skip_components = 3; 226 else if (strcmp(input, "gl_SkipComponents4") == 0) 227 xfb_decl->skip_components = 4; 228 229 if (xfb_decl->skip_components) 230 return; 231 } 232 233 /* Parse a declaration. */ 234 const char *base_name_end; 235 long subscript = link_util_parse_program_resource_name(input, strlen(input), 236 &base_name_end); 237 xfb_decl->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); 238 if (xfb_decl->var_name == NULL) { 239 _mesa_error_no_memory(__func__); 240 return; 241 } 242 243 if (subscript >= 0) { 244 xfb_decl->array_subscript = subscript; 245 xfb_decl->is_subscripted = true; 246 } else { 247 xfb_decl->is_subscripted = false; 248 } 249 250 /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this 251 * class must behave specially to account for the fact that gl_ClipDistance 252 * is converted from a float[8] to a vec4[2]. 253 */ 254 if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 255 strcmp(xfb_decl->var_name, "gl_ClipDistance") == 0) { 256 xfb_decl->lowered_builtin_array_variable = clip_distance; 257 } 258 if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 259 strcmp(xfb_decl->var_name, "gl_CullDistance") == 0) { 260 xfb_decl->lowered_builtin_array_variable = cull_distance; 261 } 262 263 if (consts->LowerTessLevel && 264 (strcmp(xfb_decl->var_name, "gl_TessLevelOuter") == 0)) 265 xfb_decl->lowered_builtin_array_variable = tess_level_outer; 266 if (consts->LowerTessLevel && 267 (strcmp(xfb_decl->var_name, "gl_TessLevelInner") == 0)) 268 xfb_decl->lowered_builtin_array_variable = tess_level_inner; 269} 270 271/** 272 * Determine whether two xfb_decl structs refer to the same variable and 273 * array index (if applicable). 274 */ 275static bool 276xfb_decl_is_same(const struct xfb_decl *x, const struct xfb_decl *y) 277{ 278 assert(xfb_decl_is_varying(x) && xfb_decl_is_varying(y)); 279 280 if (strcmp(x->var_name, y->var_name) != 0) 281 return false; 282 if (x->is_subscripted != y->is_subscripted) 283 return false; 284 if (x->is_subscripted && x->array_subscript != y->array_subscript) 285 return false; 286 return true; 287} 288 289/** 290 * The total number of varying components taken up by this variable. Only 291 * valid if assign_location() has been called. 292 */ 293static unsigned 294xfb_decl_num_components(struct xfb_decl *xfb_decl) 295{ 296 if (xfb_decl->lowered_builtin_array_variable) 297 return xfb_decl->size; 298 else 299 return xfb_decl->vector_elements * xfb_decl->matrix_columns * 300 xfb_decl->size * (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1); 301} 302 303/** 304 * Assign a location and stream ID for this xfb_decl object based on the 305 * transform feedback candidate found by find_candidate. 306 * 307 * If an error occurs, the error is reported through linker_error() and false 308 * is returned. 309 */ 310static bool 311xfb_decl_assign_location(struct xfb_decl *xfb_decl, 312 const struct gl_constants *consts, 313 struct gl_shader_program *prog) 314{ 315 assert(xfb_decl_is_varying(xfb_decl)); 316 317 unsigned fine_location 318 = xfb_decl->matched_candidate->toplevel_var->data.location * 4 319 + xfb_decl->matched_candidate->toplevel_var->data.location_frac 320 + xfb_decl->matched_candidate->struct_offset_floats; 321 const unsigned dmul = 322 glsl_type_is_64bit(glsl_without_array(xfb_decl->matched_candidate->type)) ? 2 : 1; 323 324 if (glsl_type_is_array(xfb_decl->matched_candidate->type)) { 325 /* Array variable */ 326 const struct glsl_type *element_type = 327 glsl_get_array_element(xfb_decl->matched_candidate->type); 328 const unsigned matrix_cols = glsl_get_matrix_columns(element_type); 329 const unsigned vector_elements = glsl_get_vector_elements(element_type); 330 unsigned actual_array_size; 331 switch (xfb_decl->lowered_builtin_array_variable) { 332 case clip_distance: 333 actual_array_size = prog->last_vert_prog ? 334 prog->last_vert_prog->info.clip_distance_array_size : 0; 335 break; 336 case cull_distance: 337 actual_array_size = prog->last_vert_prog ? 338 prog->last_vert_prog->info.cull_distance_array_size : 0; 339 break; 340 case tess_level_outer: 341 actual_array_size = 4; 342 break; 343 case tess_level_inner: 344 actual_array_size = 2; 345 break; 346 case none: 347 default: 348 actual_array_size = glsl_array_size(xfb_decl->matched_candidate->type); 349 break; 350 } 351 352 if (xfb_decl->is_subscripted) { 353 /* Check array bounds. */ 354 if (xfb_decl->array_subscript >= actual_array_size) { 355 linker_error(prog, "Transform feedback varying %s has index " 356 "%i, but the array size is %u.", 357 xfb_decl->orig_name, xfb_decl->array_subscript, 358 actual_array_size); 359 return false; 360 } 361 unsigned array_elem_size = xfb_decl->lowered_builtin_array_variable ? 362 1 : vector_elements * matrix_cols * dmul; 363 fine_location += array_elem_size * xfb_decl->array_subscript; 364 xfb_decl->size = 1; 365 } else { 366 xfb_decl->size = actual_array_size; 367 } 368 xfb_decl->vector_elements = vector_elements; 369 xfb_decl->matrix_columns = matrix_cols; 370 if (xfb_decl->lowered_builtin_array_variable) 371 xfb_decl->type = GL_FLOAT; 372 else 373 xfb_decl->type = glsl_get_gl_type(element_type); 374 } else { 375 /* Regular variable (scalar, vector, or matrix) */ 376 if (xfb_decl->is_subscripted) { 377 linker_error(prog, "Transform feedback varying %s requested, " 378 "but %s is not an array.", 379 xfb_decl->orig_name, xfb_decl->var_name); 380 return false; 381 } 382 xfb_decl->size = 1; 383 xfb_decl->vector_elements = glsl_get_vector_elements(xfb_decl->matched_candidate->type); 384 xfb_decl->matrix_columns = glsl_get_matrix_columns(xfb_decl->matched_candidate->type); 385 xfb_decl->type = glsl_get_gl_type(xfb_decl->matched_candidate->type); 386 } 387 xfb_decl->location = fine_location / 4; 388 xfb_decl->location_frac = fine_location % 4; 389 390 /* From GL_EXT_transform_feedback: 391 * A program will fail to link if: 392 * 393 * * the total number of components to capture in any varying 394 * variable in <varyings> is greater than the constant 395 * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the 396 * buffer mode is SEPARATE_ATTRIBS_EXT; 397 */ 398 if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 399 xfb_decl_num_components(xfb_decl) > 400 consts->MaxTransformFeedbackSeparateComponents) { 401 linker_error(prog, "Transform feedback varying %s exceeds " 402 "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", 403 xfb_decl->orig_name); 404 return false; 405 } 406 407 /* Only transform feedback varyings can be assigned to non-zero streams, 408 * so assign the stream id here. 409 */ 410 xfb_decl->stream_id = xfb_decl->matched_candidate->toplevel_var->data.stream; 411 412 unsigned array_offset = xfb_decl->array_subscript * 4 * dmul; 413 unsigned struct_offset = xfb_decl->matched_candidate->xfb_offset_floats * 4; 414 xfb_decl->buffer = xfb_decl->matched_candidate->toplevel_var->data.xfb.buffer; 415 xfb_decl->offset = xfb_decl->matched_candidate->toplevel_var->data.offset + 416 array_offset + struct_offset; 417 418 return true; 419} 420 421static unsigned 422xfb_decl_get_num_outputs(struct xfb_decl *xfb_decl) 423{ 424 if (!xfb_decl_is_varying(xfb_decl)) { 425 return 0; 426 } 427 428 if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) { 429 unsigned dmul = _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1; 430 unsigned rows_per_element = DIV_ROUND_UP(xfb_decl->vector_elements * dmul, 4); 431 return xfb_decl->size * xfb_decl->matrix_columns * rows_per_element; 432 } else { 433 return (xfb_decl_num_components(xfb_decl) + xfb_decl->location_frac + 3) / 4; 434 } 435} 436 437static bool 438xfb_decl_is_varying_written(struct xfb_decl *xfb_decl) 439{ 440 if (xfb_decl->next_buffer_separator || xfb_decl->skip_components) 441 return false; 442 443 return xfb_decl->matched_candidate->toplevel_var->data.assigned; 444} 445 446/** 447 * Update gl_transform_feedback_info to reflect this xfb_decl. 448 * 449 * If an error occurs, the error is reported through linker_error() and false 450 * is returned. 451 */ 452static bool 453xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts, 454 struct gl_shader_program *prog, 455 struct gl_transform_feedback_info *info, 456 unsigned buffer, unsigned buffer_index, 457 const unsigned max_outputs, 458 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS], 459 bool *explicit_stride, unsigned *max_member_alignment, 460 bool has_xfb_qualifiers, const void* mem_ctx) 461{ 462 unsigned xfb_offset = 0; 463 unsigned size = xfb_decl->size; 464 /* Handle gl_SkipComponents. */ 465 if (xfb_decl->skip_components) { 466 info->Buffers[buffer].Stride += xfb_decl->skip_components; 467 size = xfb_decl->skip_components; 468 goto store_varying; 469 } 470 471 if (xfb_decl->next_buffer_separator) { 472 size = 0; 473 goto store_varying; 474 } 475 476 if (has_xfb_qualifiers) { 477 xfb_offset = xfb_decl->offset / 4; 478 } else { 479 xfb_offset = info->Buffers[buffer].Stride; 480 } 481 info->Varyings[info->NumVarying].Offset = xfb_offset * 4; 482 483 { 484 unsigned location = xfb_decl->location; 485 unsigned location_frac = xfb_decl->location_frac; 486 unsigned num_components = xfb_decl_num_components(xfb_decl); 487 488 /* From GL_EXT_transform_feedback: 489 * 490 * " A program will fail to link if: 491 * 492 * * the total number of components to capture is greater than the 493 * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT 494 * and the buffer mode is INTERLEAVED_ATTRIBS_EXT." 495 * 496 * From GL_ARB_enhanced_layouts: 497 * 498 * " The resulting stride (implicit or explicit) must be less than or 499 * equal to the implementation-dependent constant 500 * gl_MaxTransformFeedbackInterleavedComponents." 501 */ 502 if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS || 503 has_xfb_qualifiers) && 504 xfb_offset + num_components > 505 consts->MaxTransformFeedbackInterleavedComponents) { 506 linker_error(prog, 507 "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " 508 "limit has been exceeded."); 509 return false; 510 } 511 512 /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers, 513 * Page 76, (Transform Feedback Layout Qualifiers): 514 * 515 * " No aliasing in output buffers is allowed: It is a compile-time or 516 * link-time error to specify variables with overlapping transform 517 * feedback offsets." 518 */ 519 const unsigned max_components = 520 consts->MaxTransformFeedbackInterleavedComponents; 521 const unsigned first_component = xfb_offset; 522 const unsigned last_component = xfb_offset + num_components - 1; 523 const unsigned start_word = BITSET_BITWORD(first_component); 524 const unsigned end_word = BITSET_BITWORD(last_component); 525 BITSET_WORD *used; 526 assert(last_component < max_components); 527 528 if (!used_components[buffer]) { 529 used_components[buffer] = 530 rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components)); 531 } 532 used = used_components[buffer]; 533 534 for (unsigned word = start_word; word <= end_word; word++) { 535 unsigned start_range = 0; 536 unsigned end_range = BITSET_WORDBITS - 1; 537 538 if (word == start_word) 539 start_range = first_component % BITSET_WORDBITS; 540 541 if (word == end_word) 542 end_range = last_component % BITSET_WORDBITS; 543 544 if (used[word] & BITSET_RANGE(start_range, end_range)) { 545 linker_error(prog, 546 "variable '%s', xfb_offset (%d) is causing aliasing.", 547 xfb_decl->orig_name, xfb_offset * 4); 548 return false; 549 } 550 used[word] |= BITSET_RANGE(start_range, end_range); 551 } 552 553 const unsigned type_num_components = 554 xfb_decl->vector_elements * 555 (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1); 556 unsigned current_type_components_left = type_num_components; 557 558 while (num_components > 0) { 559 unsigned output_size = 0; 560 561 /* From GL_ARB_enhanced_layouts: 562 * 563 * "When an attribute variable declared using an array type is bound to 564 * generic attribute index <i>, the active array elements are assigned to 565 * consecutive generic attributes beginning with generic attribute <i>. The 566 * number of attributes and components assigned to each element are 567 * determined according to the data type of array elements and "component" 568 * layout qualifier (if any) specified in the declaration of the array." 569 * 570 * "When an attribute variable declared using a matrix type is bound to a 571 * generic attribute index <i>, its values are taken from consecutive generic 572 * attributes beginning with generic attribute <i>. Such matrices are 573 * treated as an array of column vectors with values taken from the generic 574 * attributes. 575 * This means there may be gaps in the varyings we are taking values from." 576 * 577 * Examples: 578 * 579 * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; | 580 * | | | 581 * | 32b 32b 32b 32b | 32b 32b 32b 32b | 582 * | 0 X X Y Y | 4 X Y 0 0 | 583 * | 1 Z Z 0 0 | 5 X Y 0 0 | 584 * | 2 X X Y Y | 6 X Y 0 0 | 585 * | 3 Z Z 0 0 | 7 X Y 0 0 | 586 * 587 */ 588 if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) { 589 output_size = MIN3(num_components, current_type_components_left, 4); 590 current_type_components_left -= output_size; 591 if (current_type_components_left == 0) { 592 current_type_components_left = type_num_components; 593 } 594 } else { 595 output_size = MIN2(num_components, 4 - location_frac); 596 } 597 598 assert((info->NumOutputs == 0 && max_outputs == 0) || 599 info->NumOutputs < max_outputs); 600 601 /* From the ARB_enhanced_layouts spec: 602 * 603 * "If such a block member or variable is not written during a shader 604 * invocation, the buffer contents at the assigned offset will be 605 * undefined. Even if there are no static writes to a variable or 606 * member that is assigned a transform feedback offset, the space is 607 * still allocated in the buffer and still affects the stride." 608 */ 609 if (xfb_decl_is_varying_written(xfb_decl)) { 610 info->Outputs[info->NumOutputs].ComponentOffset = location_frac; 611 info->Outputs[info->NumOutputs].OutputRegister = location; 612 info->Outputs[info->NumOutputs].NumComponents = output_size; 613 info->Outputs[info->NumOutputs].StreamId = xfb_decl->stream_id; 614 info->Outputs[info->NumOutputs].OutputBuffer = buffer; 615 info->Outputs[info->NumOutputs].DstOffset = xfb_offset; 616 ++info->NumOutputs; 617 } 618 info->Buffers[buffer].Stream = xfb_decl->stream_id; 619 xfb_offset += output_size; 620 621 num_components -= output_size; 622 location++; 623 location_frac = 0; 624 } 625 } 626 627 if (explicit_stride && explicit_stride[buffer]) { 628 if (_mesa_gl_datatype_is_64bit(xfb_decl->type) && 629 info->Buffers[buffer].Stride % 2) { 630 linker_error(prog, "invalid qualifier xfb_stride=%d must be a " 631 "multiple of 8 as its applied to a type that is or " 632 "contains a double.", 633 info->Buffers[buffer].Stride * 4); 634 return false; 635 } 636 637 if (xfb_offset > info->Buffers[buffer].Stride) { 638 linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for " 639 "buffer (%d)", xfb_offset * 4, 640 info->Buffers[buffer].Stride * 4, buffer); 641 return false; 642 } 643 } else { 644 if (max_member_alignment && has_xfb_qualifiers) { 645 max_member_alignment[buffer] = MAX2(max_member_alignment[buffer], 646 _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1); 647 info->Buffers[buffer].Stride = ALIGN(xfb_offset, 648 max_member_alignment[buffer]); 649 } else { 650 info->Buffers[buffer].Stride = xfb_offset; 651 } 652 } 653 654 store_varying: 655 info->Varyings[info->NumVarying].name.string = 656 ralloc_strdup(prog, xfb_decl->orig_name); 657 resource_name_updated(&info->Varyings[info->NumVarying].name); 658 info->Varyings[info->NumVarying].Type = xfb_decl->type; 659 info->Varyings[info->NumVarying].Size = size; 660 info->Varyings[info->NumVarying].BufferIndex = buffer_index; 661 info->NumVarying++; 662 info->Buffers[buffer].NumVaryings++; 663 664 return true; 665} 666 667static const struct tfeedback_candidate * 668xfb_decl_find_candidate(struct xfb_decl *xfb_decl, 669 struct gl_shader_program *prog, 670 struct hash_table *tfeedback_candidates) 671{ 672 const char *name = xfb_decl->var_name; 673 switch (xfb_decl->lowered_builtin_array_variable) { 674 case none: 675 name = xfb_decl->var_name; 676 break; 677 case clip_distance: 678 name = "gl_ClipDistanceMESA"; 679 break; 680 case cull_distance: 681 name = "gl_CullDistanceMESA"; 682 break; 683 case tess_level_outer: 684 name = "gl_TessLevelOuterMESA"; 685 break; 686 case tess_level_inner: 687 name = "gl_TessLevelInnerMESA"; 688 break; 689 } 690 struct hash_entry *entry = 691 _mesa_hash_table_search(tfeedback_candidates, name); 692 693 xfb_decl->matched_candidate = entry ? 694 (struct tfeedback_candidate *) entry->data : NULL; 695 696 if (!xfb_decl->matched_candidate) { 697 /* From GL_EXT_transform_feedback: 698 * A program will fail to link if: 699 * 700 * * any variable name specified in the <varyings> array is not 701 * declared as an output in the geometry shader (if present) or 702 * the vertex shader (if no geometry shader is present); 703 */ 704 linker_error(prog, "Transform feedback varying %s undeclared.", 705 xfb_decl->orig_name); 706 } 707 708 return xfb_decl->matched_candidate; 709} 710 711/** 712 * Force a candidate over the previously matched one. It happens when a new 713 * varying needs to be created to match the xfb declaration, for example, 714 * to fullfil an alignment criteria. 715 */ 716static void 717xfb_decl_set_lowered_candidate(struct xfb_decl *xfb_decl, 718 struct tfeedback_candidate *candidate) 719{ 720 xfb_decl->matched_candidate = candidate; 721 722 /* The subscript part is no longer relevant */ 723 xfb_decl->is_subscripted = false; 724 xfb_decl->array_subscript = 0; 725} 726 727/** 728 * Parse all the transform feedback declarations that were passed to 729 * glTransformFeedbackVaryings() and store them in xfb_decl objects. 730 * 731 * If an error occurs, the error is reported through linker_error() and false 732 * is returned. 733 */ 734static bool 735parse_xfb_decls(const struct gl_constants *consts, 736 const struct gl_extensions *exts, 737 struct gl_shader_program *prog, 738 const void *mem_ctx, unsigned num_names, 739 char **varying_names, struct xfb_decl *decls) 740{ 741 for (unsigned i = 0; i < num_names; ++i) { 742 xfb_decl_init(&decls[i], consts, exts, mem_ctx, varying_names[i]); 743 744 if (!xfb_decl_is_varying(&decls[i])) 745 continue; 746 747 /* From GL_EXT_transform_feedback: 748 * A program will fail to link if: 749 * 750 * * any two entries in the <varyings> array specify the same varying 751 * variable; 752 * 753 * We interpret this to mean "any two entries in the <varyings> array 754 * specify the same varying variable and array index", since transform 755 * feedback of arrays would be useless otherwise. 756 */ 757 for (unsigned j = 0; j < i; ++j) { 758 if (xfb_decl_is_varying(&decls[j])) { 759 if (xfb_decl_is_same(&decls[i], &decls[j])) { 760 linker_error(prog, "Transform feedback varying %s specified " 761 "more than once.", varying_names[i]); 762 return false; 763 } 764 } 765 } 766 } 767 return true; 768} 769 770static int 771cmp_xfb_offset(const void * x_generic, const void * y_generic) 772{ 773 struct xfb_decl *x = (struct xfb_decl *) x_generic; 774 struct xfb_decl *y = (struct xfb_decl *) y_generic; 775 776 if (x->buffer != y->buffer) 777 return x->buffer - y->buffer; 778 return x->offset - y->offset; 779} 780 781/** 782 * Store transform feedback location assignments into 783 * prog->sh.LinkedTransformFeedback based on the data stored in 784 * xfb_decls. 785 * 786 * If an error occurs, the error is reported through linker_error() and false 787 * is returned. 788 */ 789static bool 790store_tfeedback_info(const struct gl_constants *consts, 791 struct gl_shader_program *prog, unsigned num_xfb_decls, 792 struct xfb_decl *xfb_decls, bool has_xfb_qualifiers, 793 const void *mem_ctx) 794{ 795 if (!prog->last_vert_prog) 796 return true; 797 798 /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for 799 * tracking the number of buffers doesn't overflow. 800 */ 801 assert(consts->MaxTransformFeedbackBuffers < 32); 802 803 bool separate_attribs_mode = 804 prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; 805 806 struct gl_program *xfb_prog = prog->last_vert_prog; 807 xfb_prog->sh.LinkedTransformFeedback = 808 rzalloc(xfb_prog, struct gl_transform_feedback_info); 809 810 /* The xfb_offset qualifier does not have to be used in increasing order 811 * however some drivers expect to receive the list of transform feedback 812 * declarations in order so sort it now for convenience. 813 */ 814 if (has_xfb_qualifiers) { 815 qsort(xfb_decls, num_xfb_decls, sizeof(*xfb_decls), 816 cmp_xfb_offset); 817 } 818 819 xfb_prog->sh.LinkedTransformFeedback->Varyings = 820 rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info, 821 num_xfb_decls); 822 823 unsigned num_outputs = 0; 824 for (unsigned i = 0; i < num_xfb_decls; ++i) { 825 if (xfb_decl_is_varying_written(&xfb_decls[i])) 826 num_outputs += xfb_decl_get_num_outputs(&xfb_decls[i]); 827 } 828 829 xfb_prog->sh.LinkedTransformFeedback->Outputs = 830 rzalloc_array(xfb_prog, struct gl_transform_feedback_output, 831 num_outputs); 832 833 unsigned num_buffers = 0; 834 unsigned buffers = 0; 835 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {0}; 836 837 if (!has_xfb_qualifiers && separate_attribs_mode) { 838 /* GL_SEPARATE_ATTRIBS */ 839 for (unsigned i = 0; i < num_xfb_decls; ++i) { 840 if (!xfb_decl_store(&xfb_decls[i], consts, prog, 841 xfb_prog->sh.LinkedTransformFeedback, 842 num_buffers, num_buffers, num_outputs, 843 used_components, NULL, NULL, has_xfb_qualifiers, 844 mem_ctx)) 845 return false; 846 847 buffers |= 1 << num_buffers; 848 num_buffers++; 849 } 850 } 851 else { 852 /* GL_INVERLEAVED_ATTRIBS */ 853 int buffer_stream_id = -1; 854 unsigned buffer = 855 num_xfb_decls ? xfb_decls[0].buffer : 0; 856 bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false }; 857 unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 }; 858 /* Apply any xfb_stride global qualifiers */ 859 if (has_xfb_qualifiers) { 860 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 861 if (prog->TransformFeedback.BufferStride[j]) { 862 explicit_stride[j] = true; 863 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride = 864 prog->TransformFeedback.BufferStride[j] / 4; 865 } 866 } 867 } 868 869 for (unsigned i = 0; i < num_xfb_decls; ++i) { 870 if (has_xfb_qualifiers && 871 buffer != xfb_decls[i].buffer) { 872 /* we have moved to the next buffer so reset stream id */ 873 buffer_stream_id = -1; 874 num_buffers++; 875 } 876 877 if (xfb_decls[i].next_buffer_separator) { 878 if (!xfb_decl_store(&xfb_decls[i], consts, prog, 879 xfb_prog->sh.LinkedTransformFeedback, 880 buffer, num_buffers, num_outputs, 881 used_components, explicit_stride, 882 max_member_alignment, has_xfb_qualifiers, 883 mem_ctx)) 884 return false; 885 num_buffers++; 886 buffer_stream_id = -1; 887 continue; 888 } 889 890 if (has_xfb_qualifiers) { 891 buffer = xfb_decls[i].buffer; 892 } else { 893 buffer = num_buffers; 894 } 895 896 if (xfb_decl_is_varying(&xfb_decls[i])) { 897 if (buffer_stream_id == -1) { 898 /* First varying writing to this buffer: remember its stream */ 899 buffer_stream_id = (int) xfb_decls[i].stream_id; 900 901 /* Only mark a buffer as active when there is a varying 902 * attached to it. This behaviour is based on a revised version 903 * of section 13.2.2 of the GL 4.6 spec. 904 */ 905 buffers |= 1 << buffer; 906 } else if (buffer_stream_id != 907 (int) xfb_decls[i].stream_id) { 908 /* Varying writes to the same buffer from a different stream */ 909 linker_error(prog, 910 "Transform feedback can't capture varyings belonging " 911 "to different vertex streams in a single buffer. " 912 "Varying %s writes to buffer from stream %u, other " 913 "varyings in the same buffer write from stream %u.", 914 xfb_decls[i].orig_name, 915 xfb_decls[i].stream_id, 916 buffer_stream_id); 917 return false; 918 } 919 } 920 921 if (!xfb_decl_store(&xfb_decls[i], consts, prog, 922 xfb_prog->sh.LinkedTransformFeedback, 923 buffer, num_buffers, num_outputs, used_components, 924 explicit_stride, max_member_alignment, 925 has_xfb_qualifiers, mem_ctx)) 926 return false; 927 } 928 } 929 assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs); 930 931 xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers; 932 return true; 933} 934 935/** 936 * Enum representing the order in which varyings are packed within a 937 * packing class. 938 * 939 * Currently we pack vec4's first, then vec2's, then scalar values, then 940 * vec3's. This order ensures that the only vectors that are at risk of 941 * having to be "double parked" (split between two adjacent varying slots) 942 * are the vec3's. 943 */ 944enum packing_order_enum { 945 PACKING_ORDER_VEC4, 946 PACKING_ORDER_VEC2, 947 PACKING_ORDER_SCALAR, 948 PACKING_ORDER_VEC3, 949}; 950 951/** 952 * Structure recording the relationship between a single producer output 953 * and a single consumer input. 954 */ 955struct match { 956 /** 957 * Packing class for this varying, computed by compute_packing_class(). 958 */ 959 unsigned packing_class; 960 961 /** 962 * Packing order for this varying, computed by compute_packing_order(). 963 */ 964 enum packing_order_enum packing_order; 965 966 /** 967 * The output variable in the producer stage. 968 */ 969 nir_variable *producer_var; 970 971 /** 972 * The input variable in the consumer stage. 973 */ 974 nir_variable *consumer_var; 975 976 /** 977 * The location which has been assigned for this varying. This is 978 * expressed in multiples of a float, with the first generic varying 979 * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the 980 * value 0. 981 */ 982 unsigned generic_location; 983}; 984 985/** 986 * Data structure recording the relationship between outputs of one shader 987 * stage (the "producer") and inputs of another (the "consumer"). 988 */ 989struct varying_matches 990{ 991 /** 992 * If true, this driver disables varying packing, so all varyings need to 993 * be aligned on slot boundaries, and take up a number of slots equal to 994 * their number of matrix columns times their array size. 995 * 996 * Packing may also be disabled because our current packing method is not 997 * safe in SSO or versions of OpenGL where interpolation qualifiers are not 998 * guaranteed to match across stages. 999 */ 1000 bool disable_varying_packing; 1001 1002 /** 1003 * If true, this driver disables packing for varyings used by transform 1004 * feedback. 1005 */ 1006 bool disable_xfb_packing; 1007 1008 /** 1009 * If true, this driver has transform feedback enabled. The transform 1010 * feedback code usually requires at least some packing be done even 1011 * when varying packing is disabled, fortunately where transform feedback 1012 * requires packing it's safe to override the disabled setting. See 1013 * is_varying_packing_safe(). 1014 */ 1015 bool xfb_enabled; 1016 1017 bool enhanced_layouts_enabled; 1018 1019 /** 1020 * If true, this driver prefers varyings to be aligned to power of two 1021 * in a slot. 1022 */ 1023 bool prefer_pot_aligned_varyings; 1024 1025 struct match *matches; 1026 1027 /** 1028 * The number of elements in the \c matches array that are currently in 1029 * use. 1030 */ 1031 unsigned num_matches; 1032 1033 /** 1034 * The number of elements that were set aside for the \c matches array when 1035 * it was allocated. 1036 */ 1037 unsigned matches_capacity; 1038 1039 gl_shader_stage producer_stage; 1040 gl_shader_stage consumer_stage; 1041}; 1042 1043/** 1044 * Comparison function passed to qsort() to sort varyings by packing_class and 1045 * then by packing_order. 1046 */ 1047static int 1048varying_matches_match_comparator(const void *x_generic, const void *y_generic) 1049{ 1050 const struct match *x = (const struct match *) x_generic; 1051 const struct match *y = (const struct match *) y_generic; 1052 1053 if (x->packing_class != y->packing_class) 1054 return x->packing_class - y->packing_class; 1055 return x->packing_order - y->packing_order; 1056} 1057 1058/** 1059 * Comparison function passed to qsort() to sort varyings used only by 1060 * transform feedback when packing of other varyings is disabled. 1061 */ 1062static int 1063varying_matches_xfb_comparator(const void *x_generic, const void *y_generic) 1064{ 1065 const struct match *x = (const struct match *) x_generic; 1066 1067 if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) 1068 return varying_matches_match_comparator(x_generic, y_generic); 1069 1070 /* FIXME: When the comparator returns 0 it means the elements being 1071 * compared are equivalent. However the qsort documentation says: 1072 * 1073 * "The order of equivalent elements is undefined." 1074 * 1075 * In practice the sort ends up reversing the order of the varyings which 1076 * means locations are also assigned in this reversed order and happens to 1077 * be what we want. This is also whats happening in 1078 * varying_matches_match_comparator(). 1079 */ 1080 return 0; 1081} 1082 1083/** 1084 * Comparison function passed to qsort() to sort varyings NOT used by 1085 * transform feedback when packing of xfb varyings is disabled. 1086 */ 1087static int 1088varying_matches_not_xfb_comparator(const void *x_generic, const void *y_generic) 1089{ 1090 const struct match *x = (const struct match *) x_generic; 1091 1092 if (x->producer_var != NULL && !x->producer_var->data.is_xfb) 1093 return varying_matches_match_comparator(x_generic, y_generic); 1094 1095 /* FIXME: When the comparator returns 0 it means the elements being 1096 * compared are equivalent. However the qsort documentation says: 1097 * 1098 * "The order of equivalent elements is undefined." 1099 * 1100 * In practice the sort ends up reversing the order of the varyings which 1101 * means locations are also assigned in this reversed order and happens to 1102 * be what we want. This is also whats happening in 1103 * varying_matches_match_comparator(). 1104 */ 1105 return 0; 1106} 1107 1108static bool 1109is_unpackable_tess(gl_shader_stage producer_stage, 1110 gl_shader_stage consumer_stage) 1111{ 1112 if (consumer_stage == MESA_SHADER_TESS_EVAL || 1113 consumer_stage == MESA_SHADER_TESS_CTRL || 1114 producer_stage == MESA_SHADER_TESS_CTRL) 1115 return true; 1116 1117 return false; 1118} 1119 1120static void 1121init_varying_matches(void *mem_ctx, struct varying_matches *vm, 1122 const struct gl_constants *consts, 1123 const struct gl_extensions *exts, 1124 gl_shader_stage producer_stage, 1125 gl_shader_stage consumer_stage, 1126 bool sso) 1127{ 1128 /* Tessellation shaders treat inputs and outputs as shared memory and can 1129 * access inputs and outputs of other invocations. 1130 * Therefore, they can't be lowered to temps easily (and definitely not 1131 * efficiently). 1132 */ 1133 bool unpackable_tess = 1134 is_unpackable_tess(producer_stage, consumer_stage); 1135 1136 /* Transform feedback code assumes varying arrays are packed, so if the 1137 * driver has disabled varying packing, make sure to at least enable 1138 * packing required by transform feedback. See below for exception. 1139 */ 1140 bool xfb_enabled = exts->EXT_transform_feedback && !unpackable_tess; 1141 1142 /* Some drivers actually requires packing to be explicitly disabled 1143 * for varyings used by transform feedback. 1144 */ 1145 bool disable_xfb_packing = consts->DisableTransformFeedbackPacking; 1146 1147 /* Disable packing on outward facing interfaces for SSO because in ES we 1148 * need to retain the unpacked varying information for draw time 1149 * validation. 1150 * 1151 * Packing is still enabled on individual arrays, structs, and matrices as 1152 * these are required by the transform feedback code and it is still safe 1153 * to do so. We also enable packing when a varying is only used for 1154 * transform feedback and its not a SSO. 1155 */ 1156 bool disable_varying_packing = 1157 consts->DisableVaryingPacking || unpackable_tess; 1158 if (sso && (producer_stage == MESA_SHADER_NONE || consumer_stage == MESA_SHADER_NONE)) 1159 disable_varying_packing = true; 1160 1161 /* Note: this initial capacity is rather arbitrarily chosen to be large 1162 * enough for many cases without wasting an unreasonable amount of space. 1163 * varying_matches_record() will resize the array if there are more than 1164 * this number of varyings. 1165 */ 1166 vm->matches_capacity = 8; 1167 vm->matches = (struct match *) 1168 ralloc_array(mem_ctx, struct match, vm->matches_capacity); 1169 vm->num_matches = 0; 1170 1171 vm->disable_varying_packing = disable_varying_packing; 1172 vm->disable_xfb_packing = disable_xfb_packing; 1173 vm->xfb_enabled = xfb_enabled; 1174 vm->enhanced_layouts_enabled = exts->ARB_enhanced_layouts; 1175 vm->prefer_pot_aligned_varyings = consts->PreferPOTAlignedVaryings; 1176 vm->producer_stage = producer_stage; 1177 vm->consumer_stage = consumer_stage; 1178} 1179 1180/** 1181 * Packing is always safe on individual arrays, structures, and matrices. It 1182 * is also safe if the varying is only used for transform feedback. 1183 */ 1184static bool 1185is_varying_packing_safe(struct varying_matches *vm, 1186 const struct glsl_type *type, const nir_variable *var) 1187{ 1188 if (is_unpackable_tess(vm->producer_stage, vm->consumer_stage)) 1189 return false; 1190 1191 return vm->xfb_enabled && (glsl_type_is_array_or_matrix(type) || 1192 glsl_type_is_struct(type) || 1193 var->data.is_xfb_only); 1194} 1195 1196static bool 1197is_packing_disabled(struct varying_matches *vm, const struct glsl_type *type, 1198 const nir_variable *var) 1199{ 1200 return (vm->disable_varying_packing && !is_varying_packing_safe(vm, type, var)) || 1201 (vm->disable_xfb_packing && var->data.is_xfb && 1202 !(glsl_type_is_array(type) || glsl_type_is_struct(type) || 1203 glsl_type_is_matrix(type))) || var->data.must_be_shader_input; 1204} 1205 1206/** 1207 * Compute the "packing class" of the given varying. This is an unsigned 1208 * integer with the property that two variables in the same packing class can 1209 * be safely backed into the same vec4. 1210 */ 1211static unsigned 1212varying_matches_compute_packing_class(const nir_variable *var) 1213{ 1214 /* Without help from the back-end, there is no way to pack together 1215 * variables with different interpolation types, because 1216 * lower_packed_varyings must choose exactly one interpolation type for 1217 * each packed varying it creates. 1218 * 1219 * However, we can safely pack together floats, ints, and uints, because: 1220 * 1221 * - varyings of base type "int" and "uint" must use the "flat" 1222 * interpolation type, which can only occur in GLSL 1.30 and above. 1223 * 1224 * - On platforms that support GLSL 1.30 and above, lower_packed_varyings 1225 * can store flat floats as ints without losing any information (using 1226 * the ir_unop_bitcast_* opcodes). 1227 * 1228 * Therefore, the packing class depends only on the interpolation type. 1229 */ 1230 bool is_interpolation_flat = var->data.interpolation == INTERP_MODE_FLAT || 1231 glsl_contains_integer(var->type) || glsl_contains_double(var->type); 1232 1233 const unsigned interp = is_interpolation_flat 1234 ? (unsigned) INTERP_MODE_FLAT : var->data.interpolation; 1235 1236 assert(interp < (1 << 3)); 1237 1238 const unsigned packing_class = (interp << 0) | 1239 (var->data.centroid << 3) | 1240 (var->data.sample << 4) | 1241 (var->data.patch << 5) | 1242 (var->data.must_be_shader_input << 6); 1243 1244 return packing_class; 1245} 1246 1247/** 1248 * Compute the "packing order" of the given varying. This is a sort key we 1249 * use to determine when to attempt to pack the given varying relative to 1250 * other varyings in the same packing class. 1251 */ 1252static enum packing_order_enum 1253varying_matches_compute_packing_order(const nir_variable *var) 1254{ 1255 const struct glsl_type *element_type = glsl_without_array(var->type); 1256 1257 switch (glsl_get_component_slots(element_type) % 4) { 1258 case 1: return PACKING_ORDER_SCALAR; 1259 case 2: return PACKING_ORDER_VEC2; 1260 case 3: return PACKING_ORDER_VEC3; 1261 case 0: return PACKING_ORDER_VEC4; 1262 default: 1263 assert(!"Unexpected value of vector_elements"); 1264 return PACKING_ORDER_VEC4; 1265 } 1266} 1267 1268/** 1269 * Built-in / reserved GL variables names start with "gl_" 1270 */ 1271static bool 1272is_gl_identifier(const char *s) 1273{ 1274 return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_'; 1275} 1276 1277/** 1278 * Record the given producer/consumer variable pair in the list of variables 1279 * that should later be assigned locations. 1280 * 1281 * It is permissible for \c consumer_var to be NULL (this happens if a 1282 * variable is output by the producer and consumed by transform feedback, but 1283 * not consumed by the consumer). 1284 * 1285 * If \c producer_var has already been paired up with a consumer_var, or 1286 * producer_var is part of fixed pipeline functionality (and hence already has 1287 * a location assigned), this function has no effect. 1288 * 1289 * Note: as a side effect this function may change the interpolation type of 1290 * \c producer_var, but only when the change couldn't possibly affect 1291 * rendering. 1292 */ 1293static void 1294varying_matches_record(void *mem_ctx, struct varying_matches *vm, 1295 nir_variable *producer_var, nir_variable *consumer_var) 1296{ 1297 assert(producer_var != NULL || consumer_var != NULL); 1298 1299 if ((producer_var && 1300 (producer_var->data.explicit_location || producer_var->data.location != -1)) || 1301 (consumer_var && 1302 (consumer_var->data.explicit_location || consumer_var->data.location != -1))) { 1303 /* Either a location already exists for this variable (since it is part 1304 * of fixed functionality), or it has already been assigned explicitly. 1305 */ 1306 return; 1307 } 1308 1309 /* The varyings should not have been matched and assgned previously */ 1310 assert((producer_var == NULL || producer_var->data.location == -1) && 1311 (consumer_var == NULL || consumer_var->data.location == -1)); 1312 1313 bool needs_flat_qualifier = consumer_var == NULL && 1314 (glsl_contains_integer(producer_var->type) || 1315 glsl_contains_double(producer_var->type)); 1316 1317 if (!vm->disable_varying_packing && 1318 (!vm->disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) && 1319 (needs_flat_qualifier || 1320 (vm->consumer_stage != MESA_SHADER_NONE && vm->consumer_stage != MESA_SHADER_FRAGMENT))) { 1321 /* Since this varying is not being consumed by the fragment shader, its 1322 * interpolation type varying cannot possibly affect rendering. 1323 * Also, this variable is non-flat and is (or contains) an integer 1324 * or a double. 1325 * If the consumer stage is unknown, don't modify the interpolation 1326 * type as it could affect rendering later with separate shaders. 1327 * 1328 * lower_packed_varyings requires all integer varyings to flat, 1329 * regardless of where they appear. We can trivially satisfy that 1330 * requirement by changing the interpolation type to flat here. 1331 */ 1332 if (producer_var) { 1333 producer_var->data.centroid = false; 1334 producer_var->data.sample = false; 1335 producer_var->data.interpolation = INTERP_MODE_FLAT; 1336 } 1337 1338 if (consumer_var) { 1339 consumer_var->data.centroid = false; 1340 consumer_var->data.sample = false; 1341 consumer_var->data.interpolation = INTERP_MODE_FLAT; 1342 } 1343 } 1344 1345 if (vm->num_matches == vm->matches_capacity) { 1346 vm->matches_capacity *= 2; 1347 vm->matches = (struct match *) 1348 reralloc(mem_ctx, vm->matches, struct match, vm->matches_capacity); 1349 } 1350 1351 /* We must use the consumer to compute the packing class because in GL4.4+ 1352 * there is no guarantee interpolation qualifiers will match across stages. 1353 * 1354 * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: 1355 * 1356 * "The type and presence of interpolation qualifiers of variables with 1357 * the same name declared in all linked shaders for the same cross-stage 1358 * interface must match, otherwise the link command will fail. 1359 * 1360 * When comparing an output from one stage to an input of a subsequent 1361 * stage, the input and output don't match if their interpolation 1362 * qualifiers (or lack thereof) are not the same." 1363 * 1364 * This text was also in at least revison 7 of the 4.40 spec but is no 1365 * longer in revision 9 and not in the 4.50 spec. 1366 */ 1367 const nir_variable *const var = (consumer_var != NULL) 1368 ? consumer_var : producer_var; 1369 1370 if (producer_var && consumer_var && 1371 consumer_var->data.must_be_shader_input) { 1372 producer_var->data.must_be_shader_input = 1; 1373 } 1374 1375 vm->matches[vm->num_matches].packing_class 1376 = varying_matches_compute_packing_class(var); 1377 vm->matches[vm->num_matches].packing_order 1378 = varying_matches_compute_packing_order(var); 1379 1380 vm->matches[vm->num_matches].producer_var = producer_var; 1381 vm->matches[vm->num_matches].consumer_var = consumer_var; 1382 vm->num_matches++; 1383} 1384 1385/** 1386 * Choose locations for all of the variable matches that were previously 1387 * passed to varying_matches_record(). 1388 * \param components returns array[slot] of number of components used 1389 * per slot (1, 2, 3 or 4) 1390 * \param reserved_slots bitmask indicating which varying slots are already 1391 * allocated 1392 * \return number of slots (4-element vectors) allocated 1393 */ 1394static unsigned 1395varying_matches_assign_locations(struct varying_matches *vm, 1396 struct gl_shader_program *prog, 1397 uint8_t components[], uint64_t reserved_slots) 1398{ 1399 /* If packing has been disabled then we cannot safely sort the varyings by 1400 * class as it may mean we are using a version of OpenGL where 1401 * interpolation qualifiers are not guaranteed to be matching across 1402 * shaders, sorting in this case could result in mismatching shader 1403 * interfaces. 1404 * When packing is disabled the sort orders varyings used by transform 1405 * feedback first, but also depends on *undefined behaviour* of qsort to 1406 * reverse the order of the varyings. See: xfb_comparator(). 1407 * 1408 * If packing is only disabled for xfb varyings (mutually exclusive with 1409 * disable_varying_packing), we then group varyings depending on if they 1410 * are captured for transform feedback. The same *undefined behaviour* is 1411 * taken advantage of. 1412 */ 1413 if (vm->disable_varying_packing) { 1414 /* Only sort varyings that are only used by transform feedback. */ 1415 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches), 1416 &varying_matches_xfb_comparator); 1417 } else if (vm->disable_xfb_packing) { 1418 /* Only sort varyings that are NOT used by transform feedback. */ 1419 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches), 1420 &varying_matches_not_xfb_comparator); 1421 } else { 1422 /* Sort varying matches into an order that makes them easy to pack. */ 1423 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches), 1424 &varying_matches_match_comparator); 1425 } 1426 1427 unsigned generic_location = 0; 1428 unsigned generic_patch_location = MAX_VARYING*4; 1429 bool previous_var_xfb = false; 1430 bool previous_var_xfb_only = false; 1431 unsigned previous_packing_class = ~0u; 1432 1433 /* For tranform feedback separate mode, we know the number of attributes 1434 * is <= the number of buffers. So packing isn't critical. In fact, 1435 * packing vec3 attributes can cause trouble because splitting a vec3 1436 * effectively creates an additional transform feedback output. The 1437 * extra TFB output may exceed device driver limits. 1438 * 1439 * Also don't pack vec3 if the driver prefers power of two aligned 1440 * varyings. Packing order guarantees that vec4, vec2 and vec1 will be 1441 * pot-aligned, we only need to take care of vec3s 1442 */ 1443 const bool dont_pack_vec3 = 1444 (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 1445 prog->TransformFeedback.NumVarying > 0) || 1446 vm->prefer_pot_aligned_varyings; 1447 1448 for (unsigned i = 0; i < vm->num_matches; i++) { 1449 unsigned *location = &generic_location; 1450 const nir_variable *var; 1451 const struct glsl_type *type; 1452 bool is_vertex_input = false; 1453 1454 if (vm->matches[i].consumer_var) { 1455 var = vm->matches[i].consumer_var; 1456 type = get_varying_type(var, vm->consumer_stage); 1457 if (vm->consumer_stage == MESA_SHADER_VERTEX) 1458 is_vertex_input = true; 1459 } else { 1460 if (!vm->matches[i].producer_var) 1461 continue; /* The varying was optimised away */ 1462 1463 var = vm->matches[i].producer_var; 1464 type = get_varying_type(var, vm->producer_stage); 1465 } 1466 1467 if (var->data.patch) 1468 location = &generic_patch_location; 1469 1470 /* Advance to the next slot if this varying has a different packing 1471 * class than the previous one, and we're not already on a slot 1472 * boundary. 1473 * 1474 * Also advance if varying packing is disabled for transform feedback, 1475 * and previous or current varying is used for transform feedback. 1476 * 1477 * Also advance to the next slot if packing is disabled. This makes sure 1478 * we don't assign varyings the same locations which is possible 1479 * because we still pack individual arrays, records and matrices even 1480 * when packing is disabled. Note we don't advance to the next slot if 1481 * we can pack varyings together that are only used for transform 1482 * feedback. 1483 */ 1484 if (var->data.must_be_shader_input || 1485 (vm->disable_xfb_packing && 1486 (previous_var_xfb || var->data.is_xfb)) || 1487 (vm->disable_varying_packing && 1488 !(previous_var_xfb_only && var->data.is_xfb_only)) || 1489 (previous_packing_class != vm->matches[i].packing_class) || 1490 (vm->matches[i].packing_order == PACKING_ORDER_VEC3 && 1491 dont_pack_vec3)) { 1492 *location = ALIGN(*location, 4); 1493 } 1494 1495 previous_var_xfb = var->data.is_xfb; 1496 previous_var_xfb_only = var->data.is_xfb_only; 1497 previous_packing_class = vm->matches[i].packing_class; 1498 1499 /* The number of components taken up by this variable. For vertex shader 1500 * inputs, we use the number of slots * 4, as they have different 1501 * counting rules. 1502 */ 1503 unsigned num_components = 0; 1504 if (is_vertex_input) { 1505 num_components = glsl_count_attribute_slots(type, is_vertex_input) * 4; 1506 } else { 1507 if (is_packing_disabled(vm, type, var)) { 1508 num_components = glsl_count_attribute_slots(type, false) * 4; 1509 } else { 1510 num_components = glsl_get_component_slots_aligned(type, *location); 1511 } 1512 } 1513 1514 /* The last slot for this variable, inclusive. */ 1515 unsigned slot_end = *location + num_components - 1; 1516 1517 /* FIXME: We could be smarter in the below code and loop back over 1518 * trying to fill any locations that we skipped because we couldn't pack 1519 * the varying between an explicit location. For now just let the user 1520 * hit the linking error if we run out of room and suggest they use 1521 * explicit locations. 1522 */ 1523 while (slot_end < MAX_VARYING * 4u) { 1524 const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1; 1525 const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u); 1526 1527 assert(slots > 0); 1528 1529 if ((reserved_slots & slot_mask) == 0) { 1530 break; 1531 } 1532 1533 *location = ALIGN(*location + 1, 4); 1534 slot_end = *location + num_components - 1; 1535 } 1536 1537 if (!var->data.patch && slot_end >= MAX_VARYING * 4u) { 1538 linker_error(prog, "insufficient contiguous locations available for " 1539 "%s it is possible an array or struct could not be " 1540 "packed between varyings with explicit locations. Try " 1541 "using an explicit location for arrays and structs.", 1542 var->name); 1543 } 1544 1545 if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) { 1546 for (unsigned j = *location / 4u; j < slot_end / 4u; j++) 1547 components[j] = 4; 1548 components[slot_end / 4u] = (slot_end & 3) + 1; 1549 } 1550 1551 vm->matches[i].generic_location = *location; 1552 1553 *location = slot_end + 1; 1554 } 1555 1556 return (generic_location + 3) / 4; 1557} 1558 1559static void 1560varying_matches_assign_temp_locations(struct varying_matches *vm, 1561 struct gl_shader_program *prog, 1562 uint64_t reserved_slots) 1563{ 1564 unsigned tmp_loc = 0; 1565 for (unsigned i = 0; i < vm->num_matches; i++) { 1566 nir_variable *producer_var = vm->matches[i].producer_var; 1567 nir_variable *consumer_var = vm->matches[i].consumer_var; 1568 1569 while (tmp_loc < MAX_VARYINGS_INCL_PATCH) { 1570 if (reserved_slots & (UINT64_C(1) << tmp_loc)) 1571 tmp_loc++; 1572 else 1573 break; 1574 } 1575 1576 if (producer_var) { 1577 assert(producer_var->data.location == -1); 1578 producer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc; 1579 } 1580 1581 if (consumer_var) { 1582 assert(consumer_var->data.location == -1); 1583 consumer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc; 1584 } 1585 1586 tmp_loc++; 1587 } 1588} 1589 1590/** 1591 * Update the producer and consumer shaders to reflect the locations 1592 * assignments that were made by varying_matches_assign_locations(). 1593 */ 1594static void 1595varying_matches_store_locations(struct varying_matches *vm) 1596{ 1597 /* Check is location needs to be packed with lower_packed_varyings() or if 1598 * we can just use ARB_enhanced_layouts packing. 1599 */ 1600 bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {0}; 1601 const struct glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} }; 1602 1603 for (unsigned i = 0; i < vm->num_matches; i++) { 1604 nir_variable *producer_var = vm->matches[i].producer_var; 1605 nir_variable *consumer_var = vm->matches[i].consumer_var; 1606 unsigned generic_location = vm->matches[i].generic_location; 1607 unsigned slot = generic_location / 4; 1608 unsigned offset = generic_location % 4; 1609 1610 if (producer_var) { 1611 producer_var->data.location = VARYING_SLOT_VAR0 + slot; 1612 producer_var->data.location_frac = offset; 1613 } 1614 1615 if (consumer_var) { 1616 consumer_var->data.location = VARYING_SLOT_VAR0 + slot; 1617 consumer_var->data.location_frac = offset; 1618 } 1619 1620 /* Find locations suitable for native packing via 1621 * ARB_enhanced_layouts. 1622 */ 1623 if (vm->enhanced_layouts_enabled) { 1624 nir_variable *var = producer_var ? producer_var : consumer_var; 1625 unsigned stage = producer_var ? vm->producer_stage : vm->consumer_stage; 1626 const struct glsl_type *type = 1627 get_varying_type(var, stage); 1628 unsigned comp_slots = glsl_get_component_slots(type) + offset; 1629 unsigned slots = comp_slots / 4; 1630 if (comp_slots % 4) 1631 slots += 1; 1632 1633 if (producer_var && consumer_var) { 1634 if (glsl_type_is_array_or_matrix(type) || glsl_type_is_struct(type) || 1635 glsl_type_is_64bit(type)) { 1636 for (unsigned j = 0; j < slots; j++) { 1637 pack_loc[slot + j] = true; 1638 } 1639 } else if (offset + glsl_get_vector_elements(type) > 4) { 1640 pack_loc[slot] = true; 1641 pack_loc[slot + 1] = true; 1642 } else { 1643 loc_type[slot][offset] = type; 1644 } 1645 } else { 1646 for (unsigned j = 0; j < slots; j++) { 1647 pack_loc[slot + j] = true; 1648 } 1649 } 1650 } 1651 } 1652 1653 /* Attempt to use ARB_enhanced_layouts for more efficient packing if 1654 * suitable. 1655 */ 1656 if (vm->enhanced_layouts_enabled) { 1657 for (unsigned i = 0; i < vm->num_matches; i++) { 1658 nir_variable *producer_var = vm->matches[i].producer_var; 1659 nir_variable *consumer_var = vm->matches[i].consumer_var; 1660 if (!producer_var || !consumer_var) 1661 continue; 1662 1663 unsigned generic_location = vm->matches[i].generic_location; 1664 unsigned slot = generic_location / 4; 1665 if (pack_loc[slot]) 1666 continue; 1667 1668 const struct glsl_type *type = 1669 get_varying_type(producer_var, vm->producer_stage); 1670 bool type_match = true; 1671 for (unsigned j = 0; j < 4; j++) { 1672 if (loc_type[slot][j]) { 1673 if (glsl_get_base_type(type) != 1674 glsl_get_base_type(loc_type[slot][j])) 1675 type_match = false; 1676 } 1677 } 1678 1679 if (type_match) { 1680 producer_var->data.explicit_location = 1; 1681 consumer_var->data.explicit_location = 1; 1682 } 1683 } 1684 } 1685} 1686 1687/** 1688 * Is the given variable a varying variable to be counted against the 1689 * limit in ctx->Const.MaxVarying? 1690 * This includes variables such as texcoords, colors and generic 1691 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. 1692 */ 1693static bool 1694var_counts_against_varying_limit(gl_shader_stage stage, const nir_variable *var) 1695{ 1696 /* Only fragment shaders will take a varying variable as an input */ 1697 if (stage == MESA_SHADER_FRAGMENT && 1698 var->data.mode == nir_var_shader_in) { 1699 switch (var->data.location) { 1700 case VARYING_SLOT_POS: 1701 case VARYING_SLOT_FACE: 1702 case VARYING_SLOT_PNTC: 1703 return false; 1704 default: 1705 return true; 1706 } 1707 } 1708 return false; 1709} 1710 1711struct tfeedback_candidate_generator_state { 1712 /** 1713 * Memory context used to allocate hash table keys and values. 1714 */ 1715 void *mem_ctx; 1716 1717 /** 1718 * Hash table in which tfeedback_candidate objects should be stored. 1719 */ 1720 struct hash_table *tfeedback_candidates; 1721 1722 gl_shader_stage stage; 1723 1724 /** 1725 * Pointer to the toplevel variable that is being traversed. 1726 */ 1727 nir_variable *toplevel_var; 1728 1729 /** 1730 * Total number of varying floats that have been visited so far. This is 1731 * used to determine the offset to each varying within the toplevel 1732 * variable. 1733 */ 1734 unsigned varying_floats; 1735 1736 /** 1737 * Offset within the xfb. Counted in floats. 1738 */ 1739 unsigned xfb_offset_floats; 1740}; 1741 1742/** 1743 * Generates tfeedback_candidate structs describing all possible targets of 1744 * transform feedback. 1745 * 1746 * tfeedback_candidate structs are stored in the hash table 1747 * tfeedback_candidates. This hash table maps varying names to instances of the 1748 * tfeedback_candidate struct. 1749 */ 1750static void 1751tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state, 1752 char **name, size_t name_length, 1753 const struct glsl_type *type, 1754 const struct glsl_struct_field *named_ifc_member) 1755{ 1756 switch (glsl_get_base_type(type)) { 1757 case GLSL_TYPE_INTERFACE: 1758 if (named_ifc_member) { 1759 ralloc_asprintf_rewrite_tail(name, &name_length, ".%s", 1760 named_ifc_member->name); 1761 tfeedback_candidate_generator(state, name, name_length, 1762 named_ifc_member->type, NULL); 1763 return; 1764 } 1765 FALLTHROUGH; 1766 case GLSL_TYPE_STRUCT: 1767 for (unsigned i = 0; i < glsl_get_length(type); i++) { 1768 size_t new_length = name_length; 1769 1770 /* Append '.field' to the current variable name. */ 1771 if (name) { 1772 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", 1773 glsl_get_struct_elem_name(type, i)); 1774 } 1775 1776 tfeedback_candidate_generator(state, name, new_length, 1777 glsl_get_struct_field(type, i), NULL); 1778 } 1779 1780 return; 1781 case GLSL_TYPE_ARRAY: 1782 if (glsl_type_is_struct(glsl_without_array(type)) || 1783 glsl_type_is_interface(glsl_without_array(type)) || 1784 glsl_type_is_array(glsl_get_array_element(type))) { 1785 1786 for (unsigned i = 0; i < glsl_get_length(type); i++) { 1787 size_t new_length = name_length; 1788 1789 /* Append the subscript to the current variable name */ 1790 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); 1791 1792 tfeedback_candidate_generator(state, name, new_length, 1793 glsl_get_array_element(type), 1794 named_ifc_member); 1795 } 1796 1797 return; 1798 } 1799 FALLTHROUGH; 1800 default: 1801 assert(!glsl_type_is_struct(glsl_without_array(type))); 1802 assert(!glsl_type_is_interface(glsl_without_array(type))); 1803 1804 struct tfeedback_candidate *candidate 1805 = rzalloc(state->mem_ctx, struct tfeedback_candidate); 1806 candidate->toplevel_var = state->toplevel_var; 1807 candidate->type = type; 1808 1809 if (glsl_type_is_64bit(glsl_without_array(type))) { 1810 /* From ARB_gpu_shader_fp64: 1811 * 1812 * If any variable captured in transform feedback has double-precision 1813 * components, the practical requirements for defined behavior are: 1814 * ... 1815 * (c) each double-precision variable captured must be aligned to a 1816 * multiple of eight bytes relative to the beginning of a vertex. 1817 */ 1818 state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2); 1819 /* 64-bit members of structs are also aligned. */ 1820 state->varying_floats = ALIGN(state->varying_floats, 2); 1821 } 1822 1823 candidate->xfb_offset_floats = state->xfb_offset_floats; 1824 candidate->struct_offset_floats = state->varying_floats; 1825 1826 _mesa_hash_table_insert(state->tfeedback_candidates, 1827 ralloc_strdup(state->mem_ctx, *name), 1828 candidate); 1829 1830 const unsigned component_slots = glsl_get_component_slots(type); 1831 1832 if (varying_has_user_specified_location(state->toplevel_var)) { 1833 state->varying_floats += glsl_count_attribute_slots(type, false) * 4; 1834 } else { 1835 state->varying_floats += component_slots; 1836 } 1837 1838 state->xfb_offset_floats += component_slots; 1839 } 1840} 1841 1842static void 1843populate_consumer_input_sets(void *mem_ctx, nir_shader *nir, 1844 struct hash_table *consumer_inputs, 1845 struct hash_table *consumer_interface_inputs, 1846 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 1847{ 1848 memset(consumer_inputs_with_locations, 0, 1849 sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX); 1850 1851 nir_foreach_shader_in_variable(input_var, nir) { 1852 /* All interface blocks should have been lowered by this point */ 1853 assert(!glsl_type_is_interface(input_var->type)); 1854 1855 if (input_var->data.explicit_location) { 1856 /* assign_varying_locations only cares about finding the 1857 * nir_variable at the start of a contiguous location block. 1858 * 1859 * - For !producer, consumer_inputs_with_locations isn't used. 1860 * 1861 * - For !consumer, consumer_inputs_with_locations is empty. 1862 * 1863 * For consumer && producer, if you were trying to set some 1864 * nir_variable to the middle of a location block on the other side 1865 * of producer/consumer, cross_validate_outputs_to_inputs() should 1866 * be link-erroring due to either type mismatch or location 1867 * overlaps. If the variables do match up, then they've got a 1868 * matching data.location and you only looked at 1869 * consumer_inputs_with_locations[var->data.location], not any 1870 * following entries for the array/structure. 1871 */ 1872 consumer_inputs_with_locations[input_var->data.location] = 1873 input_var; 1874 } else if (input_var->interface_type != NULL) { 1875 char *const iface_field_name = 1876 ralloc_asprintf(mem_ctx, "%s.%s", 1877 glsl_get_type_name(glsl_without_array(input_var->interface_type)), 1878 input_var->name); 1879 _mesa_hash_table_insert(consumer_interface_inputs, 1880 iface_field_name, input_var); 1881 } else { 1882 _mesa_hash_table_insert(consumer_inputs, 1883 ralloc_strdup(mem_ctx, input_var->name), 1884 input_var); 1885 } 1886 } 1887} 1888 1889/** 1890 * Find a variable from the consumer that "matches" the specified variable 1891 * 1892 * This function only finds inputs with names that match. There is no 1893 * validation (here) that the types, etc. are compatible. 1894 */ 1895static nir_variable * 1896get_matching_input(void *mem_ctx, 1897 const nir_variable *output_var, 1898 struct hash_table *consumer_inputs, 1899 struct hash_table *consumer_interface_inputs, 1900 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 1901{ 1902 nir_variable *input_var; 1903 1904 if (output_var->data.explicit_location) { 1905 input_var = consumer_inputs_with_locations[output_var->data.location]; 1906 } else if (output_var->interface_type != NULL) { 1907 char *const iface_field_name = 1908 ralloc_asprintf(mem_ctx, "%s.%s", 1909 glsl_get_type_name(glsl_without_array(output_var->interface_type)), 1910 output_var->name); 1911 struct hash_entry *entry = 1912 _mesa_hash_table_search(consumer_interface_inputs, iface_field_name); 1913 input_var = entry ? (nir_variable *) entry->data : NULL; 1914 } else { 1915 struct hash_entry *entry = 1916 _mesa_hash_table_search(consumer_inputs, output_var->name); 1917 input_var = entry ? (nir_variable *) entry->data : NULL; 1918 } 1919 1920 return (input_var == NULL || input_var->data.mode != nir_var_shader_in) 1921 ? NULL : input_var; 1922} 1923 1924static int 1925io_variable_cmp(const void *_a, const void *_b) 1926{ 1927 const nir_variable *const a = *(const nir_variable **) _a; 1928 const nir_variable *const b = *(const nir_variable **) _b; 1929 1930 if (a->data.explicit_location && b->data.explicit_location) 1931 return b->data.location - a->data.location; 1932 1933 if (a->data.explicit_location && !b->data.explicit_location) 1934 return 1; 1935 1936 if (!a->data.explicit_location && b->data.explicit_location) 1937 return -1; 1938 1939 return -strcmp(a->name, b->name); 1940} 1941 1942/** 1943 * Sort the shader IO variables into canonical order 1944 */ 1945static void 1946canonicalize_shader_io(nir_shader *nir, nir_variable_mode io_mode) 1947{ 1948 nir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4]; 1949 unsigned num_variables = 0; 1950 1951 nir_foreach_variable_with_modes(var, nir, io_mode) { 1952 /* If we have already encountered more I/O variables that could 1953 * successfully link, bail. 1954 */ 1955 if (num_variables == ARRAY_SIZE(var_table)) 1956 return; 1957 1958 var_table[num_variables++] = var; 1959 } 1960 1961 if (num_variables == 0) 1962 return; 1963 1964 /* Sort the list in reverse order (io_variable_cmp handles this). Later 1965 * we're going to push the variables on to the IR list as a stack, so we 1966 * want the last variable (in canonical order) to be first in the list. 1967 */ 1968 qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); 1969 1970 /* Remove the variable from it's current location in the varible list, and 1971 * put it at the front. 1972 */ 1973 for (unsigned i = 0; i < num_variables; i++) { 1974 exec_node_remove(&var_table[i]->node); 1975 exec_list_push_head(&nir->variables, &var_table[i]->node); 1976 } 1977} 1978 1979/** 1980 * Generate a bitfield map of the explicit locations for shader varyings. 1981 * 1982 * Note: For Tessellation shaders we are sitting right on the limits of the 1983 * 64 bit map. Per-vertex and per-patch both have separate location domains 1984 * with a max of MAX_VARYING. 1985 */ 1986static uint64_t 1987reserved_varying_slot(struct gl_linked_shader *sh, 1988 nir_variable_mode io_mode) 1989{ 1990 assert(io_mode == nir_var_shader_in || io_mode == nir_var_shader_out); 1991 /* Avoid an overflow of the returned value */ 1992 assert(MAX_VARYINGS_INCL_PATCH <= 64); 1993 1994 uint64_t slots = 0; 1995 int var_slot; 1996 1997 if (!sh) 1998 return slots; 1999 2000 nir_foreach_variable_with_modes(var, sh->Program->nir, io_mode) { 2001 if (!var->data.explicit_location || 2002 var->data.location < VARYING_SLOT_VAR0) 2003 continue; 2004 2005 var_slot = var->data.location - VARYING_SLOT_VAR0; 2006 2007 bool is_gl_vertex_input = io_mode == nir_var_shader_in && 2008 sh->Stage == MESA_SHADER_VERTEX; 2009 unsigned num_elements = 2010 glsl_count_attribute_slots(get_varying_type(var, sh->Stage), 2011 is_gl_vertex_input); 2012 for (unsigned i = 0; i < num_elements; i++) { 2013 if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH) 2014 slots |= UINT64_C(1) << var_slot; 2015 var_slot += 1; 2016 } 2017 } 2018 2019 return slots; 2020} 2021 2022/** 2023 * Sets the bits in the inputs_read, or outputs_written 2024 * bitfield corresponding to this variable. 2025 */ 2026static void 2027set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage) 2028{ 2029 assert(var->data.mode == nir_var_shader_in || 2030 var->data.mode == nir_var_shader_out); 2031 assert(var->data.location >= VARYING_SLOT_VAR0); 2032 2033 const struct glsl_type *type = var->type; 2034 if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 2035 assert(glsl_type_is_array(type)); 2036 type = glsl_get_array_element(type); 2037 } 2038 2039 unsigned location = var->data.location - VARYING_SLOT_VAR0; 2040 unsigned slots = glsl_count_attribute_slots(type, false); 2041 for (unsigned i = 0; i < slots; i++) { 2042 BITSET_SET(bits, location + i); 2043 } 2044} 2045 2046static uint8_t 2047get_num_components(nir_variable *var) 2048{ 2049 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 2050 return 4; 2051 2052 return glsl_get_vector_elements(glsl_without_array(var->type)); 2053} 2054 2055static void 2056tcs_add_output_reads(nir_shader *shader, BITSET_WORD **read) 2057{ 2058 nir_foreach_function(function, shader) { 2059 if (!function->impl) 2060 continue; 2061 2062 nir_foreach_block(block, function->impl) { 2063 nir_foreach_instr(instr, block) { 2064 if (instr->type != nir_instr_type_intrinsic) 2065 continue; 2066 2067 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2068 if (intrin->intrinsic != nir_intrinsic_load_deref) 2069 continue; 2070 2071 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 2072 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 2073 continue; 2074 2075 nir_variable *var = nir_deref_instr_get_variable(deref); 2076 for (unsigned i = 0; i < get_num_components(var); i++) { 2077 if (var->data.location < VARYING_SLOT_VAR0) 2078 continue; 2079 2080 unsigned comp = var->data.location_frac; 2081 set_variable_io_mask(read[comp + i], var, shader->info.stage); 2082 } 2083 } 2084 } 2085 } 2086} 2087 2088/* We need to replace any interp intrinsics with undefined (shader_temp) inputs 2089 * as no further NIR pass expects to see this. 2090 */ 2091static bool 2092replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr, 2093 void *data) 2094{ 2095 if (instr->type == nir_instr_type_intrinsic) { 2096 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 2097 2098 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid || 2099 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample || 2100 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) { 2101 nir_variable *var = nir_intrinsic_get_var(intrin, 0); 2102 if (var->data.mode == nir_var_shader_temp) { 2103 /* Create undef and rewrite the interp uses */ 2104 nir_ssa_def *undef = 2105 nir_ssa_undef(b, intrin->dest.ssa.num_components, 2106 intrin->dest.ssa.bit_size); 2107 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, undef); 2108 2109 nir_instr_remove(&intrin->instr); 2110 return true; 2111 } 2112 } 2113 } 2114 2115 return false; 2116} 2117 2118static void 2119fixup_vars_lowered_to_temp(nir_shader *shader, nir_variable_mode mode) 2120{ 2121 /* Remove all interpolate uses of the unset varying and replace with undef. */ 2122 if (mode == nir_var_shader_in && shader->info.stage == MESA_SHADER_FRAGMENT) { 2123 (void) nir_shader_instructions_pass(shader, 2124 replace_unused_interpolate_at_with_undef, 2125 nir_metadata_block_index | 2126 nir_metadata_dominance, 2127 NULL); 2128 } 2129 2130 nir_lower_global_vars_to_local(shader); 2131 nir_fixup_deref_modes(shader); 2132} 2133 2134/** 2135 * Helper for removing unused shader I/O variables, by demoting them to global 2136 * variables (which may then be dead code eliminated). 2137 * 2138 * Example usage is: 2139 * 2140 * progress = nir_remove_unused_io_vars(producer, consumer, nir_var_shader_out, 2141 * read, patches_read) || 2142 * progress; 2143 * 2144 * The "used" should be an array of 4 BITSET_WORDs representing each 2145 * .location_frac used. Note that for vector variables, only the first channel 2146 * (.location_frac) is examined for deciding if the variable is used! 2147 */ 2148static bool 2149remove_unused_io_vars(nir_shader *producer, nir_shader *consumer, 2150 struct gl_shader_program *prog, 2151 nir_variable_mode mode, 2152 BITSET_WORD **used_by_other_stage) 2153{ 2154 assert(mode == nir_var_shader_in || mode == nir_var_shader_out); 2155 2156 bool progress = false; 2157 nir_shader *shader = mode == nir_var_shader_out ? producer : consumer; 2158 2159 BITSET_WORD **used; 2160 nir_foreach_variable_with_modes_safe(var, shader, mode) { 2161 used = used_by_other_stage; 2162 2163 /* Skip builtins dead builtins are removed elsewhere */ 2164 if (is_gl_identifier(var->name)) 2165 continue; 2166 2167 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0) 2168 continue; 2169 2170 /* Skip xfb varyings and any other type we cannot remove */ 2171 if (var->data.always_active_io) 2172 continue; 2173 2174 if (var->data.explicit_xfb_buffer) 2175 continue; 2176 2177 BITSET_WORD *other_stage = used[var->data.location_frac]; 2178 2179 /* if location == -1 lower varying to global as it has no match and is not 2180 * a xfb varying, this must be done after skiping bultins as builtins 2181 * could be assigned a location of -1. 2182 * We also lower unused varyings with explicit locations. 2183 */ 2184 bool use_found = false; 2185 if (var->data.location >= 0) { 2186 unsigned location = var->data.location - VARYING_SLOT_VAR0; 2187 2188 const struct glsl_type *type = var->type; 2189 if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) { 2190 assert(glsl_type_is_array(type)); 2191 type = glsl_get_array_element(type); 2192 } 2193 2194 unsigned slots = glsl_count_attribute_slots(type, false); 2195 for (unsigned i = 0; i < slots; i++) { 2196 if (BITSET_TEST(other_stage, location + i)) { 2197 use_found = true; 2198 break; 2199 } 2200 } 2201 } 2202 2203 if (!use_found) { 2204 /* This one is invalid, make it a global variable instead */ 2205 var->data.location = 0; 2206 var->data.mode = nir_var_shader_temp; 2207 2208 progress = true; 2209 2210 if (mode == nir_var_shader_in) { 2211 if (!prog->IsES && prog->data->Version <= 120) { 2212 /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: 2213 * 2214 * Only those varying variables used (i.e. read) in 2215 * the fragment shader executable must be written to 2216 * by the vertex shader executable; declaring 2217 * superfluous varying variables in a vertex shader is 2218 * permissible. 2219 * 2220 * We interpret this text as meaning that the VS must 2221 * write the variable for the FS to read it. See 2222 * "glsl1-varying read but not written" in piglit. 2223 */ 2224 linker_error(prog, "%s shader varying %s not written " 2225 "by %s shader\n.", 2226 _mesa_shader_stage_to_string(consumer->info.stage), 2227 var->name, 2228 _mesa_shader_stage_to_string(producer->info.stage)); 2229 } else { 2230 linker_warning(prog, "%s shader varying %s not written " 2231 "by %s shader\n.", 2232 _mesa_shader_stage_to_string(consumer->info.stage), 2233 var->name, 2234 _mesa_shader_stage_to_string(producer->info.stage)); 2235 } 2236 } 2237 } 2238 } 2239 2240 if (progress) 2241 fixup_vars_lowered_to_temp(shader, mode); 2242 2243 return progress; 2244} 2245 2246static bool 2247remove_unused_varyings(nir_shader *producer, nir_shader *consumer, 2248 struct gl_shader_program *prog, void *mem_ctx) 2249{ 2250 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 2251 assert(consumer->info.stage != MESA_SHADER_VERTEX); 2252 2253 int max_loc_out = 0; 2254 nir_foreach_shader_out_variable(var, producer) { 2255 if (var->data.location < VARYING_SLOT_VAR0) 2256 continue; 2257 2258 const struct glsl_type *type = var->type; 2259 if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) { 2260 assert(glsl_type_is_array(type)); 2261 type = glsl_get_array_element(type); 2262 } 2263 unsigned slots = glsl_count_attribute_slots(type, false); 2264 2265 max_loc_out = max_loc_out < (var->data.location - VARYING_SLOT_VAR0) + slots ? 2266 (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_out; 2267 } 2268 2269 int max_loc_in = 0; 2270 nir_foreach_shader_in_variable(var, consumer) { 2271 if (var->data.location < VARYING_SLOT_VAR0) 2272 continue; 2273 2274 const struct glsl_type *type = var->type; 2275 if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) { 2276 assert(glsl_type_is_array(type)); 2277 type = glsl_get_array_element(type); 2278 } 2279 unsigned slots = glsl_count_attribute_slots(type, false); 2280 2281 max_loc_in = max_loc_in < (var->data.location - VARYING_SLOT_VAR0) + slots ? 2282 (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_in; 2283 } 2284 2285 /* Old glsl shaders that don't use explicit locations can contain greater 2286 * than 64 varyings before unused varyings are removed so we must count them 2287 * and make use of the BITSET macros to keep track of used slots. Once we 2288 * have removed these excess varyings we can make use of further nir varying 2289 * linking optimimisation passes. 2290 */ 2291 BITSET_WORD *read[4]; 2292 BITSET_WORD *written[4]; 2293 int max_loc = MAX2(max_loc_in, max_loc_out); 2294 for (unsigned i = 0; i < 4; i++) { 2295 read[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc)); 2296 written[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc)); 2297 } 2298 2299 nir_foreach_shader_out_variable(var, producer) { 2300 if (var->data.location < VARYING_SLOT_VAR0) 2301 continue; 2302 2303 for (unsigned i = 0; i < get_num_components(var); i++) { 2304 unsigned comp = var->data.location_frac; 2305 set_variable_io_mask(written[comp + i], var, producer->info.stage); 2306 } 2307 } 2308 2309 nir_foreach_shader_in_variable(var, consumer) { 2310 if (var->data.location < VARYING_SLOT_VAR0) 2311 continue; 2312 2313 for (unsigned i = 0; i < get_num_components(var); i++) { 2314 unsigned comp = var->data.location_frac; 2315 set_variable_io_mask(read[comp + i], var, consumer->info.stage); 2316 } 2317 } 2318 2319 /* Each TCS invocation can read data written by other TCS invocations, 2320 * so even if the outputs are not used by the TES we must also make 2321 * sure they are not read by the TCS before demoting them to globals. 2322 */ 2323 if (producer->info.stage == MESA_SHADER_TESS_CTRL) 2324 tcs_add_output_reads(producer, read); 2325 2326 bool progress = false; 2327 progress = 2328 remove_unused_io_vars(producer, consumer, prog, nir_var_shader_out, read); 2329 progress = 2330 remove_unused_io_vars(producer, consumer, prog, nir_var_shader_in, written) || progress; 2331 2332 return progress; 2333} 2334 2335static bool 2336should_add_varying_match_record(nir_variable *const input_var, 2337 struct gl_shader_program *prog, 2338 struct gl_linked_shader *producer, 2339 struct gl_linked_shader *consumer) { 2340 2341 /* If a matching input variable was found, add this output (and the input) to 2342 * the set. If this is a separable program and there is no consumer stage, 2343 * add the output. 2344 * 2345 * Always add TCS outputs. They are shared by all invocations 2346 * within a patch and can be used as shared memory. 2347 */ 2348 return input_var || (prog->SeparateShader && consumer == NULL) || 2349 producer->Stage == MESA_SHADER_TESS_CTRL; 2350} 2351 2352/* This assigns some initial unoptimised varying locations so that our nir 2353 * optimisations can perform some initial optimisations and also does initial 2354 * processing of 2355 */ 2356static bool 2357assign_initial_varying_locations(const struct gl_constants *consts, 2358 const struct gl_extensions *exts, 2359 void *mem_ctx, 2360 struct gl_shader_program *prog, 2361 struct gl_linked_shader *producer, 2362 struct gl_linked_shader *consumer, 2363 unsigned num_xfb_decls, 2364 struct xfb_decl *xfb_decls, 2365 struct varying_matches *vm) 2366{ 2367 init_varying_matches(mem_ctx, vm, consts, exts, 2368 producer ? producer->Stage : MESA_SHADER_NONE, 2369 consumer ? consumer->Stage : MESA_SHADER_NONE, 2370 prog->SeparateShader); 2371 2372 struct hash_table *tfeedback_candidates = 2373 _mesa_hash_table_create(mem_ctx, _mesa_hash_string, 2374 _mesa_key_string_equal); 2375 struct hash_table *consumer_inputs = 2376 _mesa_hash_table_create(mem_ctx, _mesa_hash_string, 2377 _mesa_key_string_equal); 2378 struct hash_table *consumer_interface_inputs = 2379 _mesa_hash_table_create(mem_ctx, _mesa_hash_string, 2380 _mesa_key_string_equal); 2381 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { 2382 NULL, 2383 }; 2384 2385 if (consumer) 2386 populate_consumer_input_sets(mem_ctx, consumer->Program->nir, 2387 consumer_inputs, consumer_interface_inputs, 2388 consumer_inputs_with_locations); 2389 2390 if (producer) { 2391 nir_foreach_shader_out_variable(output_var, producer->Program->nir) { 2392 /* Only geometry shaders can use non-zero streams */ 2393 assert(output_var->data.stream == 0 || 2394 (output_var->data.stream < MAX_VERTEX_STREAMS && 2395 producer->Stage == MESA_SHADER_GEOMETRY)); 2396 2397 if (num_xfb_decls > 0) { 2398 /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1 2399 * ("Vertex Shader Variables / Output Variables") 2400 * 2401 * "Each program object can specify a set of output variables from 2402 * one shader to be recorded in transform feedback mode (see 2403 * section 13.3). The variables that can be recorded are those 2404 * emitted by the first active shader, in order, from the 2405 * following list: 2406 * 2407 * * geometry shader 2408 * * tessellation evaluation shader 2409 * * tessellation control shader 2410 * * vertex shader" 2411 * 2412 * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader 2413 * Variables / Output Variables") tessellation control shader is 2414 * not included in the stages list. 2415 */ 2416 if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) { 2417 2418 const struct glsl_type *type = output_var->data.from_named_ifc_block ? 2419 output_var->interface_type : output_var->type; 2420 if (!output_var->data.patch && producer->Stage == MESA_SHADER_TESS_CTRL) { 2421 assert(glsl_type_is_array(type)); 2422 type = glsl_get_array_element(type); 2423 } 2424 2425 const struct glsl_struct_field *ifc_member = NULL; 2426 if (output_var->data.from_named_ifc_block) { 2427 ifc_member = 2428 glsl_get_struct_field_data(glsl_without_array(type), 2429 glsl_get_field_index(glsl_without_array(type), output_var->name)); 2430 } 2431 2432 char *name; 2433 if (glsl_type_is_struct(glsl_without_array(type)) || 2434 (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) { 2435 type = output_var->type; 2436 name = ralloc_strdup(NULL, output_var->name); 2437 } else if (glsl_type_is_interface(glsl_without_array(type))) { 2438 name = ralloc_strdup(NULL, glsl_get_type_name(glsl_without_array(type))); 2439 } else { 2440 name = ralloc_strdup(NULL, output_var->name); 2441 } 2442 2443 struct tfeedback_candidate_generator_state state; 2444 state.mem_ctx = mem_ctx; 2445 state.tfeedback_candidates = tfeedback_candidates; 2446 state.stage = producer->Stage; 2447 state.toplevel_var = output_var; 2448 state.varying_floats = 0; 2449 state.xfb_offset_floats = 0; 2450 2451 tfeedback_candidate_generator(&state, &name, strlen(name), type, 2452 ifc_member); 2453 ralloc_free(name); 2454 } 2455 } 2456 2457 nir_variable *const input_var = 2458 get_matching_input(mem_ctx, output_var, consumer_inputs, 2459 consumer_interface_inputs, 2460 consumer_inputs_with_locations); 2461 2462 if (should_add_varying_match_record(input_var, prog, producer, 2463 consumer)) { 2464 varying_matches_record(mem_ctx, vm, output_var, input_var); 2465 } 2466 2467 /* Only stream 0 outputs can be consumed in the next stage */ 2468 if (input_var && output_var->data.stream != 0) { 2469 linker_error(prog, "output %s is assigned to stream=%d but " 2470 "is linked to an input, which requires stream=0", 2471 output_var->name, output_var->data.stream); 2472 return false; 2473 } 2474 } 2475 } else { 2476 /* If there's no producer stage, then this must be a separable program. 2477 * For example, we may have a program that has just a fragment shader. 2478 * Later this program will be used with some arbitrary vertex (or 2479 * geometry) shader program. This means that locations must be assigned 2480 * for all the inputs. 2481 */ 2482 nir_foreach_shader_in_variable(input_var, consumer->Program->nir) { 2483 varying_matches_record(mem_ctx, vm, NULL, input_var); 2484 } 2485 } 2486 2487 for (unsigned i = 0; i < num_xfb_decls; ++i) { 2488 if (!xfb_decl_is_varying(&xfb_decls[i])) 2489 continue; 2490 2491 const struct tfeedback_candidate *matched_candidate 2492 = xfb_decl_find_candidate(&xfb_decls[i], prog, tfeedback_candidates); 2493 2494 if (matched_candidate == NULL) 2495 return false; 2496 2497 /* There are two situations where a new output varying is needed: 2498 * 2499 * - If varying packing is disabled for xfb and the current declaration 2500 * is subscripting an array, whether the subscript is aligned or not. 2501 * to preserve the rest of the array for the consumer. 2502 * 2503 * - If a builtin variable needs to be copied to a new variable 2504 * before its content is modified by another lowering pass (e.g. 2505 * \c gl_Position is transformed by \c nir_lower_viewport_transform). 2506 */ 2507 const bool lowered = 2508 (vm->disable_xfb_packing && xfb_decls[i].is_subscripted) || 2509 (matched_candidate->toplevel_var->data.explicit_location && 2510 matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 && 2511 (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) && 2512 (consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb & 2513 BITFIELD_BIT(matched_candidate->toplevel_var->data.location))); 2514 2515 if (lowered) { 2516 nir_variable *new_var; 2517 struct tfeedback_candidate *new_candidate = NULL; 2518 2519 new_var = gl_nir_lower_xfb_varying(producer->Program->nir, 2520 xfb_decls[i].orig_name, 2521 matched_candidate->toplevel_var); 2522 if (new_var == NULL) 2523 return false; 2524 2525 /* Create new candidate and replace matched_candidate */ 2526 new_candidate = rzalloc(mem_ctx, struct tfeedback_candidate); 2527 new_candidate->toplevel_var = new_var; 2528 new_candidate->type = new_var->type; 2529 new_candidate->struct_offset_floats = 0; 2530 new_candidate->xfb_offset_floats = 0; 2531 _mesa_hash_table_insert(tfeedback_candidates, 2532 ralloc_strdup(mem_ctx, new_var->name), 2533 new_candidate); 2534 2535 xfb_decl_set_lowered_candidate(&xfb_decls[i], new_candidate); 2536 matched_candidate = new_candidate; 2537 } 2538 2539 /* Mark as xfb varying */ 2540 matched_candidate->toplevel_var->data.is_xfb = 1; 2541 2542 /* Mark xfb varyings as always active */ 2543 matched_candidate->toplevel_var->data.always_active_io = 1; 2544 2545 /* Mark any corresponding inputs as always active also. We must do this 2546 * because we have a NIR pass that lowers vectors to scalars and another 2547 * that removes unused varyings. 2548 * We don't split varyings marked as always active because there is no 2549 * point in doing so. This means we need to mark both sides of the 2550 * interface as always active otherwise we will have a mismatch and 2551 * start removing things we shouldn't. 2552 */ 2553 nir_variable *const input_var = 2554 get_matching_input(mem_ctx, matched_candidate->toplevel_var, 2555 consumer_inputs, consumer_interface_inputs, 2556 consumer_inputs_with_locations); 2557 if (input_var) { 2558 input_var->data.is_xfb = 1; 2559 input_var->data.always_active_io = 1; 2560 } 2561 2562 /* Add the xfb varying to varying matches if it wasn't already added */ 2563 if ((!should_add_varying_match_record(input_var, prog, producer, 2564 consumer) && 2565 !matched_candidate->toplevel_var->data.is_xfb_only) || lowered) { 2566 matched_candidate->toplevel_var->data.is_xfb_only = 1; 2567 varying_matches_record(mem_ctx, vm, matched_candidate->toplevel_var, 2568 NULL); 2569 } 2570 } 2571 2572 uint64_t reserved_out_slots = 0; 2573 if (producer) 2574 reserved_out_slots = reserved_varying_slot(producer, nir_var_shader_out); 2575 2576 uint64_t reserved_in_slots = 0; 2577 if (consumer) 2578 reserved_in_slots = reserved_varying_slot(consumer, nir_var_shader_in); 2579 2580 /* Assign temporary user varying locations. This is required for our NIR 2581 * varying optimisations to do their matching. 2582 */ 2583 const uint64_t reserved_slots = reserved_out_slots | reserved_in_slots; 2584 varying_matches_assign_temp_locations(vm, prog, reserved_slots); 2585 2586 for (unsigned i = 0; i < num_xfb_decls; ++i) { 2587 if (!xfb_decl_is_varying(&xfb_decls[i])) 2588 continue; 2589 2590 xfb_decls[i].matched_candidate->initial_location = 2591 xfb_decls[i].matched_candidate->toplevel_var->data.location; 2592 xfb_decls[i].matched_candidate->initial_location_frac = 2593 xfb_decls[i].matched_candidate->toplevel_var->data.location_frac; 2594 } 2595 2596 return true; 2597} 2598 2599static void 2600link_shader_opts(struct varying_matches *vm, 2601 nir_shader *producer, nir_shader *consumer, 2602 struct gl_shader_program *prog, void *mem_ctx) 2603{ 2604 /* If we can't pack the stage using this pass then we can't lower io to 2605 * scalar just yet. Instead we leave it to a later NIR linking pass that uses 2606 * ARB_enhanced_layout style packing to pack things further. 2607 * 2608 * Otherwise we might end up causing linking errors and perf regressions 2609 * because the new scalars will be assigned individual slots and can overflow 2610 * the available slots. 2611 */ 2612 if (producer->options->lower_to_scalar && !vm->disable_varying_packing && 2613 !vm->disable_xfb_packing) { 2614 NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out); 2615 NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); 2616 } 2617 2618 gl_nir_opts(producer); 2619 gl_nir_opts(consumer); 2620 2621 if (nir_link_opt_varyings(producer, consumer)) 2622 gl_nir_opts(consumer); 2623 2624 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL); 2625 NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); 2626 2627 if (remove_unused_varyings(producer, consumer, prog, mem_ctx)) { 2628 NIR_PASS_V(producer, nir_lower_global_vars_to_local); 2629 NIR_PASS_V(consumer, nir_lower_global_vars_to_local); 2630 2631 gl_nir_opts(producer); 2632 gl_nir_opts(consumer); 2633 2634 /* Optimizations can cause varyings to become unused. 2635 * nir_compact_varyings() depends on all dead varyings being removed so 2636 * we need to call nir_remove_dead_variables() again here. 2637 */ 2638 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, 2639 NULL); 2640 NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, 2641 NULL); 2642 } 2643 2644 nir_link_varying_precision(producer, consumer); 2645} 2646 2647/** 2648 * Assign locations for all variables that are produced in one pipeline stage 2649 * (the "producer") and consumed in the next stage (the "consumer"). 2650 * 2651 * Variables produced by the producer may also be consumed by transform 2652 * feedback. 2653 * 2654 * \param num_xfb_decls is the number of declarations indicating 2655 * variables that may be consumed by transform feedback. 2656 * 2657 * \param xfb_decls is a pointer to an array of xfb_decl objects 2658 * representing the result of parsing the strings passed to 2659 * glTransformFeedbackVaryings(). assign_location() will be called for 2660 * each of these objects that matches one of the outputs of the 2661 * producer. 2662 * 2663 * When num_xfb_decls is nonzero, it is permissible for the consumer to 2664 * be NULL. In this case, varying locations are assigned solely based on the 2665 * requirements of transform feedback. 2666 */ 2667static bool 2668assign_final_varying_locations(const struct gl_constants *consts, 2669 const struct gl_extensions *exts, 2670 void *mem_ctx, 2671 struct gl_shader_program *prog, 2672 struct gl_linked_shader *producer, 2673 struct gl_linked_shader *consumer, 2674 unsigned num_xfb_decls, 2675 struct xfb_decl *xfb_decls, 2676 const uint64_t reserved_slots, 2677 struct varying_matches *vm) 2678{ 2679 init_varying_matches(mem_ctx, vm, consts, exts, 2680 producer ? producer->Stage : MESA_SHADER_NONE, 2681 consumer ? consumer->Stage : MESA_SHADER_NONE, 2682 prog->SeparateShader); 2683 2684 /* Regather varying matches as we ran optimisations and the previous pointers 2685 * are no longer valid. 2686 */ 2687 if (producer) { 2688 nir_foreach_shader_out_variable(var_out, producer->Program->nir) { 2689 if (var_out->data.location < VARYING_SLOT_VAR0 || 2690 var_out->data.explicit_location) 2691 continue; 2692 2693 if (vm->num_matches == vm->matches_capacity) { 2694 vm->matches_capacity *= 2; 2695 vm->matches = (struct match *) 2696 reralloc(mem_ctx, vm->matches, struct match, 2697 vm->matches_capacity); 2698 } 2699 2700 vm->matches[vm->num_matches].packing_class 2701 = varying_matches_compute_packing_class(var_out); 2702 vm->matches[vm->num_matches].packing_order 2703 = varying_matches_compute_packing_order(var_out); 2704 2705 vm->matches[vm->num_matches].producer_var = var_out; 2706 vm->matches[vm->num_matches].consumer_var = NULL; 2707 vm->num_matches++; 2708 } 2709 2710 /* Regather xfb varyings too */ 2711 for (unsigned i = 0; i < num_xfb_decls; i++) { 2712 if (!xfb_decl_is_varying(&xfb_decls[i])) 2713 continue; 2714 2715 /* Varying pointer was already reset */ 2716 if (xfb_decls[i].matched_candidate->initial_location == -1) 2717 continue; 2718 2719 bool UNUSED is_reset = false; 2720 bool UNUSED no_outputs = true; 2721 nir_foreach_shader_out_variable(var_out, producer->Program->nir) { 2722 no_outputs = false; 2723 assert(var_out->data.location != -1); 2724 if (var_out->data.location == 2725 xfb_decls[i].matched_candidate->initial_location && 2726 var_out->data.location_frac == 2727 xfb_decls[i].matched_candidate->initial_location_frac) { 2728 xfb_decls[i].matched_candidate->toplevel_var = var_out; 2729 xfb_decls[i].matched_candidate->initial_location = -1; 2730 is_reset = true; 2731 break; 2732 } 2733 } 2734 assert(is_reset || no_outputs); 2735 } 2736 } 2737 2738 bool found_match = false; 2739 if (consumer) { 2740 nir_foreach_shader_in_variable(var_in, consumer->Program->nir) { 2741 if (var_in->data.location < VARYING_SLOT_VAR0 || 2742 var_in->data.explicit_location) 2743 continue; 2744 2745 found_match = false; 2746 for (unsigned i = 0; i < vm->num_matches; i++) { 2747 if (vm->matches[i].producer_var && 2748 (vm->matches[i].producer_var->data.location == var_in->data.location && 2749 vm->matches[i].producer_var->data.location_frac == var_in->data.location_frac)) { 2750 2751 vm->matches[i].consumer_var = var_in; 2752 found_match = true; 2753 break; 2754 } 2755 } 2756 if (!found_match) { 2757 if (vm->num_matches == vm->matches_capacity) { 2758 vm->matches_capacity *= 2; 2759 vm->matches = (struct match *) 2760 reralloc(mem_ctx, vm->matches, struct match, 2761 vm->matches_capacity); 2762 } 2763 2764 vm->matches[vm->num_matches].packing_class 2765 = varying_matches_compute_packing_class(var_in); 2766 vm->matches[vm->num_matches].packing_order 2767 = varying_matches_compute_packing_order(var_in); 2768 2769 vm->matches[vm->num_matches].producer_var = NULL; 2770 vm->matches[vm->num_matches].consumer_var = var_in; 2771 vm->num_matches++; 2772 } 2773 } 2774 } 2775 2776 uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0}; 2777 const unsigned slots_used = 2778 varying_matches_assign_locations(vm, prog, components, reserved_slots); 2779 varying_matches_store_locations(vm); 2780 2781 for (unsigned i = 0; i < num_xfb_decls; ++i) { 2782 if (xfb_decl_is_varying(&xfb_decls[i])) { 2783 if (!xfb_decl_assign_location(&xfb_decls[i], consts, prog)) 2784 return false; 2785 } 2786 } 2787 2788 if (producer) { 2789 gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components, 2790 nir_var_shader_out, 0, producer, 2791 vm->disable_varying_packing, 2792 vm->disable_xfb_packing, vm->xfb_enabled); 2793 nir_lower_pack(producer->Program->nir); 2794 } 2795 2796 if (consumer) { 2797 unsigned consumer_vertices = 0; 2798 if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY) 2799 consumer_vertices = prog->Geom.VerticesIn; 2800 2801 gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components, 2802 nir_var_shader_in, consumer_vertices, 2803 consumer, vm->disable_varying_packing, 2804 vm->disable_xfb_packing, vm->xfb_enabled); 2805 nir_lower_pack(consumer->Program->nir); 2806 } 2807 2808 return true; 2809} 2810 2811static bool 2812check_against_output_limit(const struct gl_constants *consts, gl_api api, 2813 struct gl_shader_program *prog, 2814 struct gl_linked_shader *producer, 2815 unsigned num_explicit_locations) 2816{ 2817 unsigned output_vectors = num_explicit_locations; 2818 nir_foreach_shader_out_variable(var, producer->Program->nir) { 2819 if (!var->data.explicit_location && 2820 var_counts_against_varying_limit(producer->Stage, var)) { 2821 /* outputs for fragment shader can't be doubles */ 2822 output_vectors += glsl_count_attribute_slots(var->type, false); 2823 } 2824 } 2825 2826 assert(producer->Stage != MESA_SHADER_FRAGMENT); 2827 unsigned max_output_components = 2828 consts->Program[producer->Stage].MaxOutputComponents; 2829 2830 const unsigned output_components = output_vectors * 4; 2831 if (output_components > max_output_components) { 2832 if (api == API_OPENGLES2 || prog->IsES) 2833 linker_error(prog, "%s shader uses too many output vectors " 2834 "(%u > %u)\n", 2835 _mesa_shader_stage_to_string(producer->Stage), 2836 output_vectors, 2837 max_output_components / 4); 2838 else 2839 linker_error(prog, "%s shader uses too many output components " 2840 "(%u > %u)\n", 2841 _mesa_shader_stage_to_string(producer->Stage), 2842 output_components, 2843 max_output_components); 2844 2845 return false; 2846 } 2847 2848 return true; 2849} 2850 2851static bool 2852check_against_input_limit(const struct gl_constants *consts, gl_api api, 2853 struct gl_shader_program *prog, 2854 struct gl_linked_shader *consumer, 2855 unsigned num_explicit_locations) 2856{ 2857 unsigned input_vectors = num_explicit_locations; 2858 2859 nir_foreach_shader_in_variable(var, consumer->Program->nir) { 2860 if (!var->data.explicit_location && 2861 var_counts_against_varying_limit(consumer->Stage, var)) { 2862 /* vertex inputs aren't varying counted */ 2863 input_vectors += glsl_count_attribute_slots(var->type, false); 2864 } 2865 } 2866 2867 assert(consumer->Stage != MESA_SHADER_VERTEX); 2868 unsigned max_input_components = 2869 consts->Program[consumer->Stage].MaxInputComponents; 2870 2871 const unsigned input_components = input_vectors * 4; 2872 if (input_components > max_input_components) { 2873 if (api == API_OPENGLES2 || prog->IsES) 2874 linker_error(prog, "%s shader uses too many input vectors " 2875 "(%u > %u)\n", 2876 _mesa_shader_stage_to_string(consumer->Stage), 2877 input_vectors, 2878 max_input_components / 4); 2879 else 2880 linker_error(prog, "%s shader uses too many input components " 2881 "(%u > %u)\n", 2882 _mesa_shader_stage_to_string(consumer->Stage), 2883 input_components, 2884 max_input_components); 2885 2886 return false; 2887 } 2888 2889 return true; 2890} 2891 2892/* Lower unset/unused inputs/outputs */ 2893static void 2894remove_unused_shader_inputs_and_outputs(struct gl_shader_program *prog, 2895 unsigned stage, nir_variable_mode mode) 2896{ 2897 bool progress = false; 2898 nir_shader *shader = prog->_LinkedShaders[stage]->Program->nir; 2899 2900 nir_foreach_variable_with_modes_safe(var, shader, mode) { 2901 if (!var->data.is_xfb_only && var->data.location == -1) { 2902 var->data.location = 0; 2903 var->data.mode = nir_var_shader_temp; 2904 progress = true; 2905 } 2906 } 2907 2908 if (progress) 2909 fixup_vars_lowered_to_temp(shader, mode); 2910} 2911 2912static bool 2913link_varyings(struct gl_shader_program *prog, unsigned first, 2914 unsigned last, const struct gl_constants *consts, 2915 const struct gl_extensions *exts, gl_api api, void *mem_ctx) 2916{ 2917 bool has_xfb_qualifiers = false; 2918 unsigned num_xfb_decls = 0; 2919 char **varying_names = NULL; 2920 struct xfb_decl *xfb_decls = NULL; 2921 2922 if (last > MESA_SHADER_FRAGMENT) 2923 return true; 2924 2925 /* From the ARB_enhanced_layouts spec: 2926 * 2927 * "If the shader used to record output variables for transform feedback 2928 * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout 2929 * qualifiers, the values specified by TransformFeedbackVaryings are 2930 * ignored, and the set of variables captured for transform feedback is 2931 * instead derived from the specified layout qualifiers." 2932 */ 2933 for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) { 2934 /* Find last stage before fragment shader */ 2935 if (prog->_LinkedShaders[i]) { 2936 has_xfb_qualifiers = 2937 process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i], 2938 prog, &num_xfb_decls, 2939 &varying_names); 2940 break; 2941 } 2942 } 2943 2944 if (!has_xfb_qualifiers) { 2945 num_xfb_decls = prog->TransformFeedback.NumVarying; 2946 varying_names = prog->TransformFeedback.VaryingNames; 2947 } 2948 2949 if (num_xfb_decls != 0) { 2950 /* From GL_EXT_transform_feedback: 2951 * A program will fail to link if: 2952 * 2953 * * the <count> specified by TransformFeedbackVaryingsEXT is 2954 * non-zero, but the program object has no vertex or geometry 2955 * shader; 2956 */ 2957 if (first >= MESA_SHADER_FRAGMENT) { 2958 linker_error(prog, "Transform feedback varyings specified, but " 2959 "no vertex, tessellation, or geometry shader is " 2960 "present.\n"); 2961 return false; 2962 } 2963 2964 xfb_decls = rzalloc_array(mem_ctx, struct xfb_decl, 2965 num_xfb_decls); 2966 if (!parse_xfb_decls(consts, exts, prog, mem_ctx, num_xfb_decls, 2967 varying_names, xfb_decls)) 2968 return false; 2969 } 2970 2971 struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES]; 2972 unsigned num_shaders = 0; 2973 2974 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 2975 if (prog->_LinkedShaders[i]) 2976 linked_shader[num_shaders++] = prog->_LinkedShaders[i]; 2977 } 2978 2979 struct varying_matches vm; 2980 if (last < MESA_SHADER_FRAGMENT && 2981 (num_xfb_decls != 0 || prog->SeparateShader)) { 2982 struct gl_linked_shader *producer = prog->_LinkedShaders[last]; 2983 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, 2984 producer, NULL, num_xfb_decls, 2985 xfb_decls, &vm)) 2986 return false; 2987 } 2988 2989 if (last <= MESA_SHADER_FRAGMENT && !prog->SeparateShader) { 2990 remove_unused_shader_inputs_and_outputs(prog, first, nir_var_shader_in); 2991 remove_unused_shader_inputs_and_outputs(prog, last, nir_var_shader_out); 2992 } 2993 2994 if (prog->SeparateShader) { 2995 struct gl_linked_shader *consumer = linked_shader[0]; 2996 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, NULL, 2997 consumer, 0, NULL, &vm)) 2998 return false; 2999 } 3000 3001 if (num_shaders == 1) { 3002 /* Linking shaders also optimizes them. Separate shaders, compute shaders 3003 * and shaders with a fixed-func VS or FS that don't need linking are 3004 * optimized here. 3005 */ 3006 gl_nir_opts(linked_shader[0]->Program->nir); 3007 } else { 3008 /* Linking the stages in the opposite order (from fragment to vertex) 3009 * ensures that inter-shader outputs written to in an earlier stage 3010 * are eliminated if they are (transitively) not used in a later 3011 * stage. 3012 */ 3013 for (int i = num_shaders - 2; i >= 0; i--) { 3014 unsigned stage_num_xfb_decls = 3015 linked_shader[i + 1]->Stage == MESA_SHADER_FRAGMENT ? 3016 num_xfb_decls : 0; 3017 3018 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, 3019 linked_shader[i], 3020 linked_shader[i + 1], 3021 stage_num_xfb_decls, xfb_decls, 3022 &vm)) 3023 return false; 3024 3025 /* Now that validation is done its safe to remove unused varyings. As 3026 * we have both a producer and consumer its safe to remove unused 3027 * varyings even if the program is a SSO because the stages are being 3028 * linked together i.e. we have a multi-stage SSO. 3029 */ 3030 link_shader_opts(&vm, linked_shader[i]->Program->nir, 3031 linked_shader[i + 1]->Program->nir, 3032 prog, mem_ctx); 3033 3034 remove_unused_shader_inputs_and_outputs(prog, linked_shader[i]->Stage, 3035 nir_var_shader_out); 3036 remove_unused_shader_inputs_and_outputs(prog, 3037 linked_shader[i + 1]->Stage, 3038 nir_var_shader_in); 3039 } 3040 } 3041 3042 if (!prog->SeparateShader) { 3043 /* If not SSO remove unused varyings from the first/last stage */ 3044 NIR_PASS_V(prog->_LinkedShaders[first]->Program->nir, 3045 nir_remove_dead_variables, nir_var_shader_in, NULL); 3046 NIR_PASS_V(prog->_LinkedShaders[last]->Program->nir, 3047 nir_remove_dead_variables, nir_var_shader_out, NULL); 3048 } else { 3049 /* Sort inputs / outputs into a canonical order. This is necessary so 3050 * that inputs / outputs of separable shaders will be assigned 3051 * predictable locations regardless of the order in which declarations 3052 * appeared in the shader source. 3053 */ 3054 if (first != MESA_SHADER_VERTEX) { 3055 canonicalize_shader_io(prog->_LinkedShaders[first]->Program->nir, 3056 nir_var_shader_in); 3057 } 3058 3059 if (last != MESA_SHADER_FRAGMENT) { 3060 canonicalize_shader_io(prog->_LinkedShaders[last]->Program->nir, 3061 nir_var_shader_out); 3062 } 3063 } 3064 3065 /* If there is no fragment shader we need to set transform feedback. 3066 * 3067 * For SSO we also need to assign output locations. We assign them here 3068 * because we need to do it for both single stage programs and multi stage 3069 * programs. 3070 */ 3071 if (last < MESA_SHADER_FRAGMENT && 3072 (num_xfb_decls != 0 || prog->SeparateShader)) { 3073 const uint64_t reserved_out_slots = 3074 reserved_varying_slot(prog->_LinkedShaders[last], nir_var_shader_out); 3075 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, 3076 prog->_LinkedShaders[last], NULL, 3077 num_xfb_decls, xfb_decls, 3078 reserved_out_slots, &vm)) 3079 return false; 3080 } 3081 3082 if (prog->SeparateShader) { 3083 struct gl_linked_shader *const sh = prog->_LinkedShaders[first]; 3084 3085 const uint64_t reserved_slots = 3086 reserved_varying_slot(sh, nir_var_shader_in); 3087 3088 /* Assign input locations for SSO, output locations are already 3089 * assigned. 3090 */ 3091 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, 3092 NULL /* producer */, 3093 sh /* consumer */, 3094 0 /* num_xfb_decls */, 3095 NULL /* xfb_decls */, 3096 reserved_slots, &vm)) 3097 return false; 3098 } 3099 3100 if (num_shaders == 1) { 3101 gl_nir_opt_dead_builtin_varyings(consts, api, prog, NULL, linked_shader[0], 3102 0, NULL); 3103 gl_nir_opt_dead_builtin_varyings(consts, api, prog, linked_shader[0], NULL, 3104 num_xfb_decls, xfb_decls); 3105 } else { 3106 /* Linking the stages in the opposite order (from fragment to vertex) 3107 * ensures that inter-shader outputs written to in an earlier stage 3108 * are eliminated if they are (transitively) not used in a later 3109 * stage. 3110 */ 3111 int next = last; 3112 for (int i = next - 1; i >= 0; i--) { 3113 if (prog->_LinkedShaders[i] == NULL && i != 0) 3114 continue; 3115 3116 struct gl_linked_shader *const sh_i = prog->_LinkedShaders[i]; 3117 struct gl_linked_shader *const sh_next = prog->_LinkedShaders[next]; 3118 3119 gl_nir_opt_dead_builtin_varyings(consts, api, prog, sh_i, sh_next, 3120 next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0, 3121 xfb_decls); 3122 3123 const uint64_t reserved_out_slots = 3124 reserved_varying_slot(sh_i, nir_var_shader_out); 3125 const uint64_t reserved_in_slots = 3126 reserved_varying_slot(sh_next, nir_var_shader_in); 3127 3128 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, sh_i, 3129 sh_next, next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0, 3130 xfb_decls, reserved_out_slots | reserved_in_slots, &vm)) 3131 return false; 3132 3133 /* This must be done after all dead varyings are eliminated. */ 3134 if (sh_i != NULL) { 3135 unsigned slots_used = util_bitcount64(reserved_out_slots); 3136 if (!check_against_output_limit(consts, api, prog, sh_i, slots_used)) 3137 return false; 3138 } 3139 3140 unsigned slots_used = util_bitcount64(reserved_in_slots); 3141 if (!check_against_input_limit(consts, api, prog, sh_next, slots_used)) 3142 return false; 3143 3144 next = i; 3145 } 3146 } 3147 3148 if (!store_tfeedback_info(consts, prog, num_xfb_decls, xfb_decls, 3149 has_xfb_qualifiers, mem_ctx)) 3150 return false; 3151 3152 return true; 3153} 3154 3155bool 3156gl_nir_link_varyings(const struct gl_constants *consts, 3157 const struct gl_extensions *exts, 3158 gl_api api, struct gl_shader_program *prog) 3159{ 3160 void *mem_ctx = ralloc_context(NULL); 3161 3162 unsigned first, last; 3163 3164 first = MESA_SHADER_STAGES; 3165 last = 0; 3166 3167 /* We need to initialise the program resource list because the varying 3168 * packing pass my start inserting varyings onto the list. 3169 */ 3170 init_program_resource_list(prog); 3171 3172 /* Determine first and last stage. */ 3173 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 3174 if (!prog->_LinkedShaders[i]) 3175 continue; 3176 if (first == MESA_SHADER_STAGES) 3177 first = i; 3178 last = i; 3179 } 3180 3181 bool r = link_varyings(prog, first, last, consts, exts, api, mem_ctx); 3182 if (r) { 3183 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 3184 if (!prog->_LinkedShaders[i]) 3185 continue; 3186 3187 /* Check for transform feedback varyings specified via the API */ 3188 prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings = 3189 prog->TransformFeedback.NumVarying > 0; 3190 3191 /* Check for transform feedback varyings specified in the Shader */ 3192 if (prog->last_vert_prog) { 3193 prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings |= 3194 prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0; 3195 } 3196 } 3197 3198 /* Assign NIR XFB info to the last stage before the fragment shader */ 3199 for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) { 3200 struct gl_linked_shader *sh = prog->_LinkedShaders[stage]; 3201 if (sh && stage != MESA_SHADER_TESS_CTRL) { 3202 sh->Program->nir->xfb_info = 3203 gl_to_nir_xfb_info(sh->Program->sh.LinkedTransformFeedback, 3204 sh->Program->nir); 3205 break; 3206 } 3207 } 3208 } 3209 3210 ralloc_free(mem_ctx); 3211 return r; 3212} 3213