1/*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2021 Valve Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * Linker functions related specifically to linking varyings between shader
27 * stages.
28 */
29
30#include "main/errors.h"
31#include "main/macros.h"
32#include "main/menums.h"
33#include "main/mtypes.h"
34#include "util/hash_table.h"
35#include "util/u_math.h"
36
37#include "nir.h"
38#include "nir_builder.h"
39#include "gl_nir.h"
40#include "gl_nir_link_varyings.h"
41#include "gl_nir_linker.h"
42#include "linker_util.h"
43#include "nir_gl_types.h"
44
45
46/**
47 * Get the varying type stripped of the outermost array if we're processing
48 * a stage whose varyings are arrays indexed by a vertex number (such as
49 * geometry shader inputs).
50 */
51static const struct glsl_type *
52get_varying_type(const nir_variable *var, gl_shader_stage stage)
53{
54   const struct glsl_type *type = var->type;
55   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
56      assert(glsl_type_is_array(type));
57      type = glsl_get_array_element(type);
58   }
59
60   return type;
61}
62
63static bool
64varying_has_user_specified_location(const nir_variable *var)
65{
66   return var->data.explicit_location &&
67      var->data.location >= VARYING_SLOT_VAR0;
68}
69
70static void
71create_xfb_varying_names(void *mem_ctx, const struct glsl_type *t, char **name,
72                         size_t name_length, unsigned *count,
73                         const char *ifc_member_name,
74                         const struct glsl_type *ifc_member_t,
75                         char ***varying_names)
76{
77   if (glsl_type_is_interface(t)) {
78      size_t new_length = name_length;
79
80      assert(ifc_member_name && ifc_member_t);
81      ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
82
83      create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
84                               NULL, NULL, varying_names);
85   } else if (glsl_type_is_struct(t)) {
86      for (unsigned i = 0; i < glsl_get_length(t); i++) {
87         const char *field = glsl_get_struct_elem_name(t, i);
88         size_t new_length = name_length;
89
90         ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
91
92         create_xfb_varying_names(mem_ctx, glsl_get_struct_field(t, i), name,
93                                  new_length, count, NULL, NULL,
94                                  varying_names);
95      }
96   } else if (glsl_type_is_struct(glsl_without_array(t)) ||
97              glsl_type_is_interface(glsl_without_array(t)) ||
98              (glsl_type_is_array(t) && glsl_type_is_array(glsl_get_array_element(t)))) {
99      for (unsigned i = 0; i < glsl_get_length(t); i++) {
100         size_t new_length = name_length;
101
102         /* Append the subscript to the current variable name */
103         ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
104
105         create_xfb_varying_names(mem_ctx, glsl_get_array_element(t), name,
106                                  new_length, count, ifc_member_name,
107                                  ifc_member_t, varying_names);
108      }
109   } else {
110      (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
111   }
112}
113
114static bool
115process_xfb_layout_qualifiers(void *mem_ctx, const struct gl_linked_shader *sh,
116                              struct gl_shader_program *prog,
117                              unsigned *num_xfb_decls,
118                              char ***varying_names)
119{
120   bool has_xfb_qualifiers = false;
121
122   /* We still need to enable transform feedback mode even if xfb_stride is
123    * only applied to a global out. Also we don't bother to propagate
124    * xfb_stride to interface block members so this will catch that case also.
125    */
126   for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
127      if (prog->TransformFeedback.BufferStride[j]) {
128         has_xfb_qualifiers = true;
129         break;
130      }
131   }
132
133   nir_foreach_shader_out_variable(var, sh->Program->nir) {
134      /* From the ARB_enhanced_layouts spec:
135       *
136       *    "Any shader making any static use (after preprocessing) of any of
137       *     these *xfb_* qualifiers will cause the shader to be in a
138       *     transform feedback capturing mode and hence responsible for
139       *     describing the transform feedback setup.  This mode will capture
140       *     any output selected by *xfb_offset*, directly or indirectly, to
141       *     a transform feedback buffer."
142       */
143      if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
144         has_xfb_qualifiers = true;
145      }
146
147      if (var->data.explicit_offset) {
148         *num_xfb_decls += glsl_varying_count(var->type);
149         has_xfb_qualifiers = true;
150      }
151   }
152
153   if (*num_xfb_decls == 0)
154      return has_xfb_qualifiers;
155
156   unsigned i = 0;
157   *varying_names = ralloc_array(mem_ctx, char *, *num_xfb_decls);
158   nir_foreach_shader_out_variable(var, sh->Program->nir) {
159      if (var->data.explicit_offset) {
160         char *name;
161         const struct glsl_type *type, *member_type;
162
163         if (var->data.from_named_ifc_block) {
164            type = var->interface_type;
165
166            /* Find the member type before it was altered by lowering */
167            const struct glsl_type *type_wa = glsl_without_array(type);
168            member_type =
169               glsl_get_struct_field(type_wa, glsl_get_field_index(type_wa, var->name));
170            name = ralloc_strdup(NULL, glsl_get_type_name(type_wa));
171         } else {
172            type = var->type;
173            member_type = NULL;
174            name = ralloc_strdup(NULL, var->name);
175         }
176         create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
177                                  var->name, member_type, varying_names);
178         ralloc_free(name);
179      }
180   }
181
182   assert(i == *num_xfb_decls);
183   return has_xfb_qualifiers;
184}
185
186/**
187 * Initialize this struct based on a string that was passed to
188 * glTransformFeedbackVaryings.
189 *
190 * If the input is mal-formed, this call still succeeds, but it sets
191 * this->var_name to a mal-formed input, so xfb_decl_find_output_var()
192 * will fail to find any matching variable.
193 */
194static void
195xfb_decl_init(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
196              const struct gl_extensions *exts, const void *mem_ctx,
197              const char *input)
198{
199   /* We don't have to be pedantic about what is a valid GLSL variable name,
200    * because any variable with an invalid name can't exist in the IR anyway.
201    */
202   xfb_decl->location = -1;
203   xfb_decl->orig_name = input;
204   xfb_decl->lowered_builtin_array_variable = none;
205   xfb_decl->skip_components = 0;
206   xfb_decl->next_buffer_separator = false;
207   xfb_decl->matched_candidate = NULL;
208   xfb_decl->stream_id = 0;
209   xfb_decl->buffer = 0;
210   xfb_decl->offset = 0;
211
212   if (exts->ARB_transform_feedback3) {
213      /* Parse gl_NextBuffer. */
214      if (strcmp(input, "gl_NextBuffer") == 0) {
215         xfb_decl->next_buffer_separator = true;
216         return;
217      }
218
219      /* Parse gl_SkipComponents. */
220      if (strcmp(input, "gl_SkipComponents1") == 0)
221         xfb_decl->skip_components = 1;
222      else if (strcmp(input, "gl_SkipComponents2") == 0)
223         xfb_decl->skip_components = 2;
224      else if (strcmp(input, "gl_SkipComponents3") == 0)
225         xfb_decl->skip_components = 3;
226      else if (strcmp(input, "gl_SkipComponents4") == 0)
227         xfb_decl->skip_components = 4;
228
229      if (xfb_decl->skip_components)
230         return;
231   }
232
233   /* Parse a declaration. */
234   const char *base_name_end;
235   long subscript = link_util_parse_program_resource_name(input, strlen(input),
236                                                          &base_name_end);
237   xfb_decl->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
238   if (xfb_decl->var_name == NULL) {
239      _mesa_error_no_memory(__func__);
240      return;
241   }
242
243   if (subscript >= 0) {
244      xfb_decl->array_subscript = subscript;
245      xfb_decl->is_subscripted = true;
246   } else {
247      xfb_decl->is_subscripted = false;
248   }
249
250   /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
251    * class must behave specially to account for the fact that gl_ClipDistance
252    * is converted from a float[8] to a vec4[2].
253    */
254   if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
255       strcmp(xfb_decl->var_name, "gl_ClipDistance") == 0) {
256      xfb_decl->lowered_builtin_array_variable = clip_distance;
257   }
258   if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
259       strcmp(xfb_decl->var_name, "gl_CullDistance") == 0) {
260      xfb_decl->lowered_builtin_array_variable = cull_distance;
261   }
262
263   if (consts->LowerTessLevel &&
264       (strcmp(xfb_decl->var_name, "gl_TessLevelOuter") == 0))
265      xfb_decl->lowered_builtin_array_variable = tess_level_outer;
266   if (consts->LowerTessLevel &&
267       (strcmp(xfb_decl->var_name, "gl_TessLevelInner") == 0))
268      xfb_decl->lowered_builtin_array_variable = tess_level_inner;
269}
270
271/**
272 * Determine whether two xfb_decl structs refer to the same variable and
273 * array index (if applicable).
274 */
275static bool
276xfb_decl_is_same(const struct xfb_decl *x, const struct xfb_decl *y)
277{
278   assert(xfb_decl_is_varying(x) && xfb_decl_is_varying(y));
279
280   if (strcmp(x->var_name, y->var_name) != 0)
281      return false;
282   if (x->is_subscripted != y->is_subscripted)
283      return false;
284   if (x->is_subscripted && x->array_subscript != y->array_subscript)
285      return false;
286   return true;
287}
288
289/**
290 * The total number of varying components taken up by this variable.  Only
291 * valid if assign_location() has been called.
292 */
293static unsigned
294xfb_decl_num_components(struct xfb_decl *xfb_decl)
295{
296   if (xfb_decl->lowered_builtin_array_variable)
297      return xfb_decl->size;
298   else
299      return xfb_decl->vector_elements * xfb_decl->matrix_columns *
300         xfb_decl->size * (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
301}
302
303/**
304 * Assign a location and stream ID for this xfb_decl object based on the
305 * transform feedback candidate found by find_candidate.
306 *
307 * If an error occurs, the error is reported through linker_error() and false
308 * is returned.
309 */
310static bool
311xfb_decl_assign_location(struct xfb_decl *xfb_decl,
312                         const struct gl_constants *consts,
313                         struct gl_shader_program *prog)
314{
315   assert(xfb_decl_is_varying(xfb_decl));
316
317   unsigned fine_location
318      = xfb_decl->matched_candidate->toplevel_var->data.location * 4
319      + xfb_decl->matched_candidate->toplevel_var->data.location_frac
320      + xfb_decl->matched_candidate->struct_offset_floats;
321   const unsigned dmul =
322      glsl_type_is_64bit(glsl_without_array(xfb_decl->matched_candidate->type)) ? 2 : 1;
323
324   if (glsl_type_is_array(xfb_decl->matched_candidate->type)) {
325      /* Array variable */
326      const struct glsl_type *element_type =
327         glsl_get_array_element(xfb_decl->matched_candidate->type);
328      const unsigned matrix_cols = glsl_get_matrix_columns(element_type);
329      const unsigned vector_elements = glsl_get_vector_elements(element_type);
330      unsigned actual_array_size;
331      switch (xfb_decl->lowered_builtin_array_variable) {
332      case clip_distance:
333         actual_array_size = prog->last_vert_prog ?
334            prog->last_vert_prog->info.clip_distance_array_size : 0;
335         break;
336      case cull_distance:
337         actual_array_size = prog->last_vert_prog ?
338            prog->last_vert_prog->info.cull_distance_array_size : 0;
339         break;
340      case tess_level_outer:
341         actual_array_size = 4;
342         break;
343      case tess_level_inner:
344         actual_array_size = 2;
345         break;
346      case none:
347      default:
348         actual_array_size = glsl_array_size(xfb_decl->matched_candidate->type);
349         break;
350      }
351
352      if (xfb_decl->is_subscripted) {
353         /* Check array bounds. */
354         if (xfb_decl->array_subscript >= actual_array_size) {
355            linker_error(prog, "Transform feedback varying %s has index "
356                         "%i, but the array size is %u.",
357                         xfb_decl->orig_name, xfb_decl->array_subscript,
358                         actual_array_size);
359            return false;
360         }
361         unsigned array_elem_size = xfb_decl->lowered_builtin_array_variable ?
362            1 : vector_elements * matrix_cols * dmul;
363         fine_location += array_elem_size * xfb_decl->array_subscript;
364         xfb_decl->size = 1;
365      } else {
366         xfb_decl->size = actual_array_size;
367      }
368      xfb_decl->vector_elements = vector_elements;
369      xfb_decl->matrix_columns = matrix_cols;
370      if (xfb_decl->lowered_builtin_array_variable)
371         xfb_decl->type = GL_FLOAT;
372      else
373         xfb_decl->type = glsl_get_gl_type(element_type);
374   } else {
375      /* Regular variable (scalar, vector, or matrix) */
376      if (xfb_decl->is_subscripted) {
377         linker_error(prog, "Transform feedback varying %s requested, "
378                      "but %s is not an array.",
379                      xfb_decl->orig_name, xfb_decl->var_name);
380         return false;
381      }
382      xfb_decl->size = 1;
383      xfb_decl->vector_elements = glsl_get_vector_elements(xfb_decl->matched_candidate->type);
384      xfb_decl->matrix_columns = glsl_get_matrix_columns(xfb_decl->matched_candidate->type);
385      xfb_decl->type = glsl_get_gl_type(xfb_decl->matched_candidate->type);
386   }
387   xfb_decl->location = fine_location / 4;
388   xfb_decl->location_frac = fine_location % 4;
389
390   /* From GL_EXT_transform_feedback:
391    *   A program will fail to link if:
392    *
393    *   * the total number of components to capture in any varying
394    *     variable in <varyings> is greater than the constant
395    *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
396    *     buffer mode is SEPARATE_ATTRIBS_EXT;
397    */
398   if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
399       xfb_decl_num_components(xfb_decl) >
400       consts->MaxTransformFeedbackSeparateComponents) {
401      linker_error(prog, "Transform feedback varying %s exceeds "
402                   "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
403                   xfb_decl->orig_name);
404      return false;
405   }
406
407   /* Only transform feedback varyings can be assigned to non-zero streams,
408    * so assign the stream id here.
409    */
410   xfb_decl->stream_id = xfb_decl->matched_candidate->toplevel_var->data.stream;
411
412   unsigned array_offset = xfb_decl->array_subscript * 4 * dmul;
413   unsigned struct_offset = xfb_decl->matched_candidate->xfb_offset_floats * 4;
414   xfb_decl->buffer = xfb_decl->matched_candidate->toplevel_var->data.xfb.buffer;
415   xfb_decl->offset = xfb_decl->matched_candidate->toplevel_var->data.offset +
416      array_offset + struct_offset;
417
418   return true;
419}
420
421static unsigned
422xfb_decl_get_num_outputs(struct xfb_decl *xfb_decl)
423{
424   if (!xfb_decl_is_varying(xfb_decl)) {
425      return 0;
426   }
427
428   if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
429      unsigned dmul = _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1;
430      unsigned rows_per_element = DIV_ROUND_UP(xfb_decl->vector_elements * dmul, 4);
431      return xfb_decl->size * xfb_decl->matrix_columns * rows_per_element;
432   } else {
433      return (xfb_decl_num_components(xfb_decl) + xfb_decl->location_frac + 3) / 4;
434   }
435}
436
437static bool
438xfb_decl_is_varying_written(struct xfb_decl *xfb_decl)
439{
440   if (xfb_decl->next_buffer_separator || xfb_decl->skip_components)
441      return false;
442
443   return xfb_decl->matched_candidate->toplevel_var->data.assigned;
444}
445
446/**
447 * Update gl_transform_feedback_info to reflect this xfb_decl.
448 *
449 * If an error occurs, the error is reported through linker_error() and false
450 * is returned.
451 */
452static bool
453xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
454               struct gl_shader_program *prog,
455               struct gl_transform_feedback_info *info,
456               unsigned buffer, unsigned buffer_index,
457               const unsigned max_outputs,
458               BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
459               bool *explicit_stride, unsigned *max_member_alignment,
460               bool has_xfb_qualifiers, const void* mem_ctx)
461{
462   unsigned xfb_offset = 0;
463   unsigned size = xfb_decl->size;
464   /* Handle gl_SkipComponents. */
465   if (xfb_decl->skip_components) {
466      info->Buffers[buffer].Stride += xfb_decl->skip_components;
467      size = xfb_decl->skip_components;
468      goto store_varying;
469   }
470
471   if (xfb_decl->next_buffer_separator) {
472      size = 0;
473      goto store_varying;
474   }
475
476   if (has_xfb_qualifiers) {
477      xfb_offset = xfb_decl->offset / 4;
478   } else {
479      xfb_offset = info->Buffers[buffer].Stride;
480   }
481   info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
482
483   {
484      unsigned location = xfb_decl->location;
485      unsigned location_frac = xfb_decl->location_frac;
486      unsigned num_components = xfb_decl_num_components(xfb_decl);
487
488      /* From GL_EXT_transform_feedback:
489       *
490       *   " A program will fail to link if:
491       *
492       *       * the total number of components to capture is greater than the
493       *         constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
494       *         and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
495       *
496       * From GL_ARB_enhanced_layouts:
497       *
498       *   " The resulting stride (implicit or explicit) must be less than or
499       *     equal to the implementation-dependent constant
500       *     gl_MaxTransformFeedbackInterleavedComponents."
501       */
502      if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
503           has_xfb_qualifiers) &&
504          xfb_offset + num_components >
505          consts->MaxTransformFeedbackInterleavedComponents) {
506         linker_error(prog,
507                      "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
508                      "limit has been exceeded.");
509         return false;
510      }
511
512      /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
513       * Page 76, (Transform Feedback Layout Qualifiers):
514       *
515       *   " No aliasing in output buffers is allowed: It is a compile-time or
516       *     link-time error to specify variables with overlapping transform
517       *     feedback offsets."
518       */
519      const unsigned max_components =
520         consts->MaxTransformFeedbackInterleavedComponents;
521      const unsigned first_component = xfb_offset;
522      const unsigned last_component = xfb_offset + num_components - 1;
523      const unsigned start_word = BITSET_BITWORD(first_component);
524      const unsigned end_word = BITSET_BITWORD(last_component);
525      BITSET_WORD *used;
526      assert(last_component < max_components);
527
528      if (!used_components[buffer]) {
529         used_components[buffer] =
530            rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
531      }
532      used = used_components[buffer];
533
534      for (unsigned word = start_word; word <= end_word; word++) {
535         unsigned start_range = 0;
536         unsigned end_range = BITSET_WORDBITS - 1;
537
538         if (word == start_word)
539            start_range = first_component % BITSET_WORDBITS;
540
541         if (word == end_word)
542            end_range = last_component % BITSET_WORDBITS;
543
544         if (used[word] & BITSET_RANGE(start_range, end_range)) {
545            linker_error(prog,
546                         "variable '%s', xfb_offset (%d) is causing aliasing.",
547                         xfb_decl->orig_name, xfb_offset * 4);
548            return false;
549         }
550         used[word] |= BITSET_RANGE(start_range, end_range);
551      }
552
553      const unsigned type_num_components =
554         xfb_decl->vector_elements *
555         (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
556      unsigned current_type_components_left = type_num_components;
557
558      while (num_components > 0) {
559         unsigned output_size = 0;
560
561         /*  From GL_ARB_enhanced_layouts:
562          *
563          * "When an attribute variable declared using an array type is bound to
564          * generic attribute index <i>, the active array elements are assigned to
565          * consecutive generic attributes beginning with generic attribute <i>.  The
566          * number of attributes and components assigned to each element are
567          * determined according to the data type of array elements and "component"
568          * layout qualifier (if any) specified in the declaration of the array."
569          *
570          * "When an attribute variable declared using a matrix type is bound to a
571          * generic attribute index <i>, its values are taken from consecutive generic
572          * attributes beginning with generic attribute <i>.  Such matrices are
573          * treated as an array of column vectors with values taken from the generic
574          * attributes.
575          * This means there may be gaps in the varyings we are taking values from."
576          *
577          * Examples:
578          *
579          * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
580          * |                                |                               |
581          * |        32b 32b 32b 32b         |        32b 32b 32b 32b        |
582          * |      0  X   X   Y   Y          |      4  X   Y   0   0         |
583          * |      1  Z   Z   0   0          |      5  X   Y   0   0         |
584          * |      2  X   X   Y   Y          |      6  X   Y   0   0         |
585          * |      3  Z   Z   0   0          |      7  X   Y   0   0         |
586          *
587          */
588         if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
589            output_size = MIN3(num_components, current_type_components_left, 4);
590            current_type_components_left -= output_size;
591            if (current_type_components_left == 0) {
592               current_type_components_left = type_num_components;
593            }
594         } else {
595            output_size = MIN2(num_components, 4 - location_frac);
596         }
597
598         assert((info->NumOutputs == 0 && max_outputs == 0) ||
599                info->NumOutputs < max_outputs);
600
601         /* From the ARB_enhanced_layouts spec:
602          *
603          *    "If such a block member or variable is not written during a shader
604          *    invocation, the buffer contents at the assigned offset will be
605          *    undefined.  Even if there are no static writes to a variable or
606          *    member that is assigned a transform feedback offset, the space is
607          *    still allocated in the buffer and still affects the stride."
608          */
609         if (xfb_decl_is_varying_written(xfb_decl)) {
610            info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
611            info->Outputs[info->NumOutputs].OutputRegister = location;
612            info->Outputs[info->NumOutputs].NumComponents = output_size;
613            info->Outputs[info->NumOutputs].StreamId = xfb_decl->stream_id;
614            info->Outputs[info->NumOutputs].OutputBuffer = buffer;
615            info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
616            ++info->NumOutputs;
617         }
618         info->Buffers[buffer].Stream = xfb_decl->stream_id;
619         xfb_offset += output_size;
620
621         num_components -= output_size;
622         location++;
623         location_frac = 0;
624      }
625   }
626
627   if (explicit_stride && explicit_stride[buffer]) {
628      if (_mesa_gl_datatype_is_64bit(xfb_decl->type) &&
629          info->Buffers[buffer].Stride % 2) {
630         linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
631                      "multiple of 8 as its applied to a type that is or "
632                      "contains a double.",
633                      info->Buffers[buffer].Stride * 4);
634         return false;
635      }
636
637      if (xfb_offset > info->Buffers[buffer].Stride) {
638         linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
639                      "buffer (%d)", xfb_offset * 4,
640                      info->Buffers[buffer].Stride * 4, buffer);
641         return false;
642      }
643   } else {
644      if (max_member_alignment && has_xfb_qualifiers) {
645         max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
646                                             _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
647         info->Buffers[buffer].Stride = ALIGN(xfb_offset,
648                                              max_member_alignment[buffer]);
649      } else {
650         info->Buffers[buffer].Stride = xfb_offset;
651      }
652   }
653
654 store_varying:
655   info->Varyings[info->NumVarying].name.string =
656      ralloc_strdup(prog, xfb_decl->orig_name);
657   resource_name_updated(&info->Varyings[info->NumVarying].name);
658   info->Varyings[info->NumVarying].Type = xfb_decl->type;
659   info->Varyings[info->NumVarying].Size = size;
660   info->Varyings[info->NumVarying].BufferIndex = buffer_index;
661   info->NumVarying++;
662   info->Buffers[buffer].NumVaryings++;
663
664   return true;
665}
666
667static const struct tfeedback_candidate *
668xfb_decl_find_candidate(struct xfb_decl *xfb_decl,
669                        struct gl_shader_program *prog,
670                        struct hash_table *tfeedback_candidates)
671{
672   const char *name = xfb_decl->var_name;
673   switch (xfb_decl->lowered_builtin_array_variable) {
674   case none:
675      name = xfb_decl->var_name;
676      break;
677   case clip_distance:
678      name = "gl_ClipDistanceMESA";
679      break;
680   case cull_distance:
681      name = "gl_CullDistanceMESA";
682      break;
683   case tess_level_outer:
684      name = "gl_TessLevelOuterMESA";
685      break;
686   case tess_level_inner:
687      name = "gl_TessLevelInnerMESA";
688      break;
689   }
690   struct hash_entry *entry =
691      _mesa_hash_table_search(tfeedback_candidates, name);
692
693   xfb_decl->matched_candidate = entry ?
694         (struct tfeedback_candidate *) entry->data : NULL;
695
696   if (!xfb_decl->matched_candidate) {
697      /* From GL_EXT_transform_feedback:
698       *   A program will fail to link if:
699       *
700       *   * any variable name specified in the <varyings> array is not
701       *     declared as an output in the geometry shader (if present) or
702       *     the vertex shader (if no geometry shader is present);
703       */
704      linker_error(prog, "Transform feedback varying %s undeclared.",
705                   xfb_decl->orig_name);
706   }
707
708   return xfb_decl->matched_candidate;
709}
710
711/**
712 * Force a candidate over the previously matched one. It happens when a new
713 * varying needs to be created to match the xfb declaration, for example,
714 * to fullfil an alignment criteria.
715 */
716static void
717xfb_decl_set_lowered_candidate(struct xfb_decl *xfb_decl,
718                               struct tfeedback_candidate *candidate)
719{
720   xfb_decl->matched_candidate = candidate;
721
722   /* The subscript part is no longer relevant */
723   xfb_decl->is_subscripted = false;
724   xfb_decl->array_subscript = 0;
725}
726
727/**
728 * Parse all the transform feedback declarations that were passed to
729 * glTransformFeedbackVaryings() and store them in xfb_decl objects.
730 *
731 * If an error occurs, the error is reported through linker_error() and false
732 * is returned.
733 */
734static bool
735parse_xfb_decls(const struct gl_constants *consts,
736                const struct gl_extensions *exts,
737                struct gl_shader_program *prog,
738                const void *mem_ctx, unsigned num_names,
739                char **varying_names, struct xfb_decl *decls)
740{
741   for (unsigned i = 0; i < num_names; ++i) {
742      xfb_decl_init(&decls[i], consts, exts, mem_ctx, varying_names[i]);
743
744      if (!xfb_decl_is_varying(&decls[i]))
745         continue;
746
747      /* From GL_EXT_transform_feedback:
748       *   A program will fail to link if:
749       *
750       *   * any two entries in the <varyings> array specify the same varying
751       *     variable;
752       *
753       * We interpret this to mean "any two entries in the <varyings> array
754       * specify the same varying variable and array index", since transform
755       * feedback of arrays would be useless otherwise.
756       */
757      for (unsigned j = 0; j < i; ++j) {
758         if (xfb_decl_is_varying(&decls[j])) {
759            if (xfb_decl_is_same(&decls[i], &decls[j])) {
760               linker_error(prog, "Transform feedback varying %s specified "
761                            "more than once.", varying_names[i]);
762               return false;
763            }
764         }
765      }
766   }
767   return true;
768}
769
770static int
771cmp_xfb_offset(const void * x_generic, const void * y_generic)
772{
773   struct xfb_decl *x = (struct xfb_decl *) x_generic;
774   struct xfb_decl *y = (struct xfb_decl *) y_generic;
775
776   if (x->buffer != y->buffer)
777      return x->buffer - y->buffer;
778   return x->offset - y->offset;
779}
780
781/**
782 * Store transform feedback location assignments into
783 * prog->sh.LinkedTransformFeedback based on the data stored in
784 * xfb_decls.
785 *
786 * If an error occurs, the error is reported through linker_error() and false
787 * is returned.
788 */
789static bool
790store_tfeedback_info(const struct gl_constants *consts,
791                     struct gl_shader_program *prog, unsigned num_xfb_decls,
792                     struct xfb_decl *xfb_decls, bool has_xfb_qualifiers,
793                     const void *mem_ctx)
794{
795   if (!prog->last_vert_prog)
796      return true;
797
798   /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
799    * tracking the number of buffers doesn't overflow.
800    */
801   assert(consts->MaxTransformFeedbackBuffers < 32);
802
803   bool separate_attribs_mode =
804      prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
805
806   struct gl_program *xfb_prog = prog->last_vert_prog;
807   xfb_prog->sh.LinkedTransformFeedback =
808      rzalloc(xfb_prog, struct gl_transform_feedback_info);
809
810   /* The xfb_offset qualifier does not have to be used in increasing order
811    * however some drivers expect to receive the list of transform feedback
812    * declarations in order so sort it now for convenience.
813    */
814   if (has_xfb_qualifiers) {
815      qsort(xfb_decls, num_xfb_decls, sizeof(*xfb_decls),
816            cmp_xfb_offset);
817   }
818
819   xfb_prog->sh.LinkedTransformFeedback->Varyings =
820      rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
821                    num_xfb_decls);
822
823   unsigned num_outputs = 0;
824   for (unsigned i = 0; i < num_xfb_decls; ++i) {
825      if (xfb_decl_is_varying_written(&xfb_decls[i]))
826         num_outputs += xfb_decl_get_num_outputs(&xfb_decls[i]);
827   }
828
829   xfb_prog->sh.LinkedTransformFeedback->Outputs =
830      rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
831                    num_outputs);
832
833   unsigned num_buffers = 0;
834   unsigned buffers = 0;
835   BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {0};
836
837   if (!has_xfb_qualifiers && separate_attribs_mode) {
838      /* GL_SEPARATE_ATTRIBS */
839      for (unsigned i = 0; i < num_xfb_decls; ++i) {
840         if (!xfb_decl_store(&xfb_decls[i], consts, prog,
841                             xfb_prog->sh.LinkedTransformFeedback,
842                             num_buffers, num_buffers, num_outputs,
843                             used_components, NULL, NULL, has_xfb_qualifiers,
844                             mem_ctx))
845            return false;
846
847         buffers |= 1 << num_buffers;
848         num_buffers++;
849      }
850   }
851   else {
852      /* GL_INVERLEAVED_ATTRIBS */
853      int buffer_stream_id = -1;
854      unsigned buffer =
855         num_xfb_decls ? xfb_decls[0].buffer : 0;
856      bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
857      unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
858      /* Apply any xfb_stride global qualifiers */
859      if (has_xfb_qualifiers) {
860         for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
861            if (prog->TransformFeedback.BufferStride[j]) {
862               explicit_stride[j] = true;
863               xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
864                  prog->TransformFeedback.BufferStride[j] / 4;
865            }
866         }
867      }
868
869      for (unsigned i = 0; i < num_xfb_decls; ++i) {
870         if (has_xfb_qualifiers &&
871             buffer != xfb_decls[i].buffer) {
872            /* we have moved to the next buffer so reset stream id */
873            buffer_stream_id = -1;
874            num_buffers++;
875         }
876
877         if (xfb_decls[i].next_buffer_separator) {
878            if (!xfb_decl_store(&xfb_decls[i], consts, prog,
879                                xfb_prog->sh.LinkedTransformFeedback,
880                                buffer, num_buffers, num_outputs,
881                                used_components, explicit_stride,
882                                max_member_alignment, has_xfb_qualifiers,
883                                mem_ctx))
884               return false;
885            num_buffers++;
886            buffer_stream_id = -1;
887            continue;
888         }
889
890         if (has_xfb_qualifiers) {
891            buffer = xfb_decls[i].buffer;
892         } else {
893            buffer = num_buffers;
894         }
895
896         if (xfb_decl_is_varying(&xfb_decls[i])) {
897            if (buffer_stream_id == -1)  {
898               /* First varying writing to this buffer: remember its stream */
899               buffer_stream_id = (int) xfb_decls[i].stream_id;
900
901               /* Only mark a buffer as active when there is a varying
902                * attached to it. This behaviour is based on a revised version
903                * of section 13.2.2 of the GL 4.6 spec.
904                */
905               buffers |= 1 << buffer;
906            } else if (buffer_stream_id !=
907                       (int) xfb_decls[i].stream_id) {
908               /* Varying writes to the same buffer from a different stream */
909               linker_error(prog,
910                            "Transform feedback can't capture varyings belonging "
911                            "to different vertex streams in a single buffer. "
912                            "Varying %s writes to buffer from stream %u, other "
913                            "varyings in the same buffer write from stream %u.",
914                            xfb_decls[i].orig_name,
915                            xfb_decls[i].stream_id,
916                            buffer_stream_id);
917               return false;
918            }
919         }
920
921         if (!xfb_decl_store(&xfb_decls[i], consts, prog,
922                             xfb_prog->sh.LinkedTransformFeedback,
923                             buffer, num_buffers, num_outputs, used_components,
924                             explicit_stride, max_member_alignment,
925                             has_xfb_qualifiers, mem_ctx))
926            return false;
927      }
928   }
929   assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
930
931   xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
932   return true;
933}
934
935/**
936 * Enum representing the order in which varyings are packed within a
937 * packing class.
938 *
939 * Currently we pack vec4's first, then vec2's, then scalar values, then
940 * vec3's.  This order ensures that the only vectors that are at risk of
941 * having to be "double parked" (split between two adjacent varying slots)
942 * are the vec3's.
943 */
944enum packing_order_enum {
945   PACKING_ORDER_VEC4,
946   PACKING_ORDER_VEC2,
947   PACKING_ORDER_SCALAR,
948   PACKING_ORDER_VEC3,
949};
950
951/**
952 * Structure recording the relationship between a single producer output
953 * and a single consumer input.
954 */
955struct match {
956   /**
957    * Packing class for this varying, computed by compute_packing_class().
958    */
959   unsigned packing_class;
960
961   /**
962    * Packing order for this varying, computed by compute_packing_order().
963    */
964   enum packing_order_enum packing_order;
965
966   /**
967    * The output variable in the producer stage.
968    */
969   nir_variable *producer_var;
970
971   /**
972    * The input variable in the consumer stage.
973    */
974   nir_variable *consumer_var;
975
976   /**
977    * The location which has been assigned for this varying.  This is
978    * expressed in multiples of a float, with the first generic varying
979    * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
980    * value 0.
981    */
982   unsigned generic_location;
983};
984
985/**
986 * Data structure recording the relationship between outputs of one shader
987 * stage (the "producer") and inputs of another (the "consumer").
988 */
989struct varying_matches
990{
991   /**
992    * If true, this driver disables varying packing, so all varyings need to
993    * be aligned on slot boundaries, and take up a number of slots equal to
994    * their number of matrix columns times their array size.
995    *
996    * Packing may also be disabled because our current packing method is not
997    * safe in SSO or versions of OpenGL where interpolation qualifiers are not
998    * guaranteed to match across stages.
999    */
1000   bool disable_varying_packing;
1001
1002   /**
1003    * If true, this driver disables packing for varyings used by transform
1004    * feedback.
1005    */
1006   bool disable_xfb_packing;
1007
1008   /**
1009    * If true, this driver has transform feedback enabled. The transform
1010    * feedback code usually requires at least some packing be done even
1011    * when varying packing is disabled, fortunately where transform feedback
1012    * requires packing it's safe to override the disabled setting. See
1013    * is_varying_packing_safe().
1014    */
1015   bool xfb_enabled;
1016
1017   bool enhanced_layouts_enabled;
1018
1019   /**
1020    * If true, this driver prefers varyings to be aligned to power of two
1021    * in a slot.
1022    */
1023   bool prefer_pot_aligned_varyings;
1024
1025   struct match *matches;
1026
1027   /**
1028    * The number of elements in the \c matches array that are currently in
1029    * use.
1030    */
1031   unsigned num_matches;
1032
1033   /**
1034    * The number of elements that were set aside for the \c matches array when
1035    * it was allocated.
1036    */
1037   unsigned matches_capacity;
1038
1039   gl_shader_stage producer_stage;
1040   gl_shader_stage consumer_stage;
1041};
1042
1043/**
1044 * Comparison function passed to qsort() to sort varyings by packing_class and
1045 * then by packing_order.
1046 */
1047static int
1048varying_matches_match_comparator(const void *x_generic, const void *y_generic)
1049{
1050   const struct match *x = (const struct match *) x_generic;
1051   const struct match *y = (const struct match *) y_generic;
1052
1053   if (x->packing_class != y->packing_class)
1054      return x->packing_class - y->packing_class;
1055   return x->packing_order - y->packing_order;
1056}
1057
1058/**
1059 * Comparison function passed to qsort() to sort varyings used only by
1060 * transform feedback when packing of other varyings is disabled.
1061 */
1062static int
1063varying_matches_xfb_comparator(const void *x_generic, const void *y_generic)
1064{
1065   const struct match *x = (const struct match *) x_generic;
1066
1067   if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
1068      return varying_matches_match_comparator(x_generic, y_generic);
1069
1070   /* FIXME: When the comparator returns 0 it means the elements being
1071    * compared are equivalent. However the qsort documentation says:
1072    *
1073    *    "The order of equivalent elements is undefined."
1074    *
1075    * In practice the sort ends up reversing the order of the varyings which
1076    * means locations are also assigned in this reversed order and happens to
1077    * be what we want. This is also whats happening in
1078    * varying_matches_match_comparator().
1079    */
1080   return 0;
1081}
1082
1083/**
1084 * Comparison function passed to qsort() to sort varyings NOT used by
1085 * transform feedback when packing of xfb varyings is disabled.
1086 */
1087static int
1088varying_matches_not_xfb_comparator(const void *x_generic, const void *y_generic)
1089{
1090   const struct match *x = (const struct match *) x_generic;
1091
1092   if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
1093      return varying_matches_match_comparator(x_generic, y_generic);
1094
1095   /* FIXME: When the comparator returns 0 it means the elements being
1096    * compared are equivalent. However the qsort documentation says:
1097    *
1098    *    "The order of equivalent elements is undefined."
1099    *
1100    * In practice the sort ends up reversing the order of the varyings which
1101    * means locations are also assigned in this reversed order and happens to
1102    * be what we want. This is also whats happening in
1103    * varying_matches_match_comparator().
1104    */
1105   return 0;
1106}
1107
1108static bool
1109is_unpackable_tess(gl_shader_stage producer_stage,
1110                   gl_shader_stage consumer_stage)
1111{
1112   if (consumer_stage == MESA_SHADER_TESS_EVAL ||
1113       consumer_stage == MESA_SHADER_TESS_CTRL ||
1114       producer_stage == MESA_SHADER_TESS_CTRL)
1115      return true;
1116
1117   return false;
1118}
1119
1120static void
1121init_varying_matches(void *mem_ctx, struct varying_matches *vm,
1122                     const struct gl_constants *consts,
1123                     const struct gl_extensions *exts,
1124                     gl_shader_stage producer_stage,
1125                     gl_shader_stage consumer_stage,
1126                     bool sso)
1127{
1128   /* Tessellation shaders treat inputs and outputs as shared memory and can
1129    * access inputs and outputs of other invocations.
1130    * Therefore, they can't be lowered to temps easily (and definitely not
1131    * efficiently).
1132    */
1133   bool unpackable_tess =
1134      is_unpackable_tess(producer_stage, consumer_stage);
1135
1136   /* Transform feedback code assumes varying arrays are packed, so if the
1137    * driver has disabled varying packing, make sure to at least enable
1138    * packing required by transform feedback. See below for exception.
1139    */
1140   bool xfb_enabled = exts->EXT_transform_feedback && !unpackable_tess;
1141
1142   /* Some drivers actually requires packing to be explicitly disabled
1143    * for varyings used by transform feedback.
1144    */
1145   bool disable_xfb_packing = consts->DisableTransformFeedbackPacking;
1146
1147   /* Disable packing on outward facing interfaces for SSO because in ES we
1148    * need to retain the unpacked varying information for draw time
1149    * validation.
1150    *
1151    * Packing is still enabled on individual arrays, structs, and matrices as
1152    * these are required by the transform feedback code and it is still safe
1153    * to do so. We also enable packing when a varying is only used for
1154    * transform feedback and its not a SSO.
1155    */
1156   bool disable_varying_packing =
1157      consts->DisableVaryingPacking || unpackable_tess;
1158   if (sso && (producer_stage == MESA_SHADER_NONE || consumer_stage == MESA_SHADER_NONE))
1159      disable_varying_packing = true;
1160
1161   /* Note: this initial capacity is rather arbitrarily chosen to be large
1162    * enough for many cases without wasting an unreasonable amount of space.
1163    * varying_matches_record() will resize the array if there are more than
1164    * this number of varyings.
1165    */
1166   vm->matches_capacity = 8;
1167   vm->matches = (struct match *)
1168      ralloc_array(mem_ctx, struct match, vm->matches_capacity);
1169   vm->num_matches = 0;
1170
1171   vm->disable_varying_packing = disable_varying_packing;
1172   vm->disable_xfb_packing = disable_xfb_packing;
1173   vm->xfb_enabled = xfb_enabled;
1174   vm->enhanced_layouts_enabled = exts->ARB_enhanced_layouts;
1175   vm->prefer_pot_aligned_varyings = consts->PreferPOTAlignedVaryings;
1176   vm->producer_stage = producer_stage;
1177   vm->consumer_stage = consumer_stage;
1178}
1179
1180/**
1181 * Packing is always safe on individual arrays, structures, and matrices. It
1182 * is also safe if the varying is only used for transform feedback.
1183 */
1184static bool
1185is_varying_packing_safe(struct varying_matches *vm,
1186                        const struct glsl_type *type, const nir_variable *var)
1187{
1188   if (is_unpackable_tess(vm->producer_stage, vm->consumer_stage))
1189      return false;
1190
1191   return vm->xfb_enabled && (glsl_type_is_array_or_matrix(type) ||
1192                              glsl_type_is_struct(type) ||
1193                              var->data.is_xfb_only);
1194}
1195
1196static bool
1197is_packing_disabled(struct varying_matches *vm, const struct glsl_type *type,
1198                    const nir_variable *var)
1199{
1200   return (vm->disable_varying_packing && !is_varying_packing_safe(vm, type, var)) ||
1201      (vm->disable_xfb_packing && var->data.is_xfb &&
1202       !(glsl_type_is_array(type) || glsl_type_is_struct(type) ||
1203         glsl_type_is_matrix(type))) || var->data.must_be_shader_input;
1204}
1205
1206/**
1207 * Compute the "packing class" of the given varying.  This is an unsigned
1208 * integer with the property that two variables in the same packing class can
1209 * be safely backed into the same vec4.
1210 */
1211static unsigned
1212varying_matches_compute_packing_class(const nir_variable *var)
1213{
1214   /* Without help from the back-end, there is no way to pack together
1215    * variables with different interpolation types, because
1216    * lower_packed_varyings must choose exactly one interpolation type for
1217    * each packed varying it creates.
1218    *
1219    * However, we can safely pack together floats, ints, and uints, because:
1220    *
1221    * - varyings of base type "int" and "uint" must use the "flat"
1222    *   interpolation type, which can only occur in GLSL 1.30 and above.
1223    *
1224    * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
1225    *   can store flat floats as ints without losing any information (using
1226    *   the ir_unop_bitcast_* opcodes).
1227    *
1228    * Therefore, the packing class depends only on the interpolation type.
1229    */
1230   bool is_interpolation_flat = var->data.interpolation == INTERP_MODE_FLAT ||
1231      glsl_contains_integer(var->type) || glsl_contains_double(var->type);
1232
1233   const unsigned interp = is_interpolation_flat
1234      ? (unsigned) INTERP_MODE_FLAT : var->data.interpolation;
1235
1236   assert(interp < (1 << 3));
1237
1238   const unsigned packing_class = (interp << 0) |
1239                                  (var->data.centroid << 3) |
1240                                  (var->data.sample << 4) |
1241                                  (var->data.patch << 5) |
1242                                  (var->data.must_be_shader_input << 6);
1243
1244   return packing_class;
1245}
1246
1247/**
1248 * Compute the "packing order" of the given varying.  This is a sort key we
1249 * use to determine when to attempt to pack the given varying relative to
1250 * other varyings in the same packing class.
1251 */
1252static enum packing_order_enum
1253varying_matches_compute_packing_order(const nir_variable *var)
1254{
1255   const struct glsl_type *element_type = glsl_without_array(var->type);
1256
1257   switch (glsl_get_component_slots(element_type) % 4) {
1258   case 1: return PACKING_ORDER_SCALAR;
1259   case 2: return PACKING_ORDER_VEC2;
1260   case 3: return PACKING_ORDER_VEC3;
1261   case 0: return PACKING_ORDER_VEC4;
1262   default:
1263      assert(!"Unexpected value of vector_elements");
1264      return PACKING_ORDER_VEC4;
1265   }
1266}
1267
1268/**
1269 * Built-in / reserved GL variables names start with "gl_"
1270 */
1271static bool
1272is_gl_identifier(const char *s)
1273{
1274   return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_';
1275}
1276
1277/**
1278 * Record the given producer/consumer variable pair in the list of variables
1279 * that should later be assigned locations.
1280 *
1281 * It is permissible for \c consumer_var to be NULL (this happens if a
1282 * variable is output by the producer and consumed by transform feedback, but
1283 * not consumed by the consumer).
1284 *
1285 * If \c producer_var has already been paired up with a consumer_var, or
1286 * producer_var is part of fixed pipeline functionality (and hence already has
1287 * a location assigned), this function has no effect.
1288 *
1289 * Note: as a side effect this function may change the interpolation type of
1290 * \c producer_var, but only when the change couldn't possibly affect
1291 * rendering.
1292 */
1293static void
1294varying_matches_record(void *mem_ctx, struct varying_matches *vm,
1295                       nir_variable *producer_var, nir_variable *consumer_var)
1296{
1297   assert(producer_var != NULL || consumer_var != NULL);
1298
1299   if ((producer_var &&
1300       (producer_var->data.explicit_location || producer_var->data.location != -1)) ||
1301       (consumer_var &&
1302        (consumer_var->data.explicit_location || consumer_var->data.location != -1))) {
1303      /* Either a location already exists for this variable (since it is part
1304       * of fixed functionality), or it has already been assigned explicitly.
1305       */
1306      return;
1307   }
1308
1309   /* The varyings should not have been matched and assgned previously */
1310   assert((producer_var == NULL || producer_var->data.location == -1) &&
1311          (consumer_var == NULL || consumer_var->data.location == -1));
1312
1313   bool needs_flat_qualifier = consumer_var == NULL &&
1314      (glsl_contains_integer(producer_var->type) ||
1315       glsl_contains_double(producer_var->type));
1316
1317   if (!vm->disable_varying_packing &&
1318       (!vm->disable_xfb_packing || producer_var  == NULL || !producer_var->data.is_xfb) &&
1319       (needs_flat_qualifier ||
1320        (vm->consumer_stage != MESA_SHADER_NONE && vm->consumer_stage != MESA_SHADER_FRAGMENT))) {
1321      /* Since this varying is not being consumed by the fragment shader, its
1322       * interpolation type varying cannot possibly affect rendering.
1323       * Also, this variable is non-flat and is (or contains) an integer
1324       * or a double.
1325       * If the consumer stage is unknown, don't modify the interpolation
1326       * type as it could affect rendering later with separate shaders.
1327       *
1328       * lower_packed_varyings requires all integer varyings to flat,
1329       * regardless of where they appear.  We can trivially satisfy that
1330       * requirement by changing the interpolation type to flat here.
1331       */
1332      if (producer_var) {
1333         producer_var->data.centroid = false;
1334         producer_var->data.sample = false;
1335         producer_var->data.interpolation = INTERP_MODE_FLAT;
1336      }
1337
1338      if (consumer_var) {
1339         consumer_var->data.centroid = false;
1340         consumer_var->data.sample = false;
1341         consumer_var->data.interpolation = INTERP_MODE_FLAT;
1342      }
1343   }
1344
1345   if (vm->num_matches == vm->matches_capacity) {
1346      vm->matches_capacity *= 2;
1347      vm->matches = (struct match *)
1348         reralloc(mem_ctx, vm->matches, struct match, vm->matches_capacity);
1349   }
1350
1351   /* We must use the consumer to compute the packing class because in GL4.4+
1352    * there is no guarantee interpolation qualifiers will match across stages.
1353    *
1354    * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
1355    *
1356    *    "The type and presence of interpolation qualifiers of variables with
1357    *    the same name declared in all linked shaders for the same cross-stage
1358    *    interface must match, otherwise the link command will fail.
1359    *
1360    *    When comparing an output from one stage to an input of a subsequent
1361    *    stage, the input and output don't match if their interpolation
1362    *    qualifiers (or lack thereof) are not the same."
1363    *
1364    * This text was also in at least revison 7 of the 4.40 spec but is no
1365    * longer in revision 9 and not in the 4.50 spec.
1366    */
1367   const nir_variable *const var = (consumer_var != NULL)
1368      ? consumer_var : producer_var;
1369
1370   if (producer_var && consumer_var &&
1371       consumer_var->data.must_be_shader_input) {
1372      producer_var->data.must_be_shader_input = 1;
1373   }
1374
1375   vm->matches[vm->num_matches].packing_class
1376      = varying_matches_compute_packing_class(var);
1377   vm->matches[vm->num_matches].packing_order
1378      = varying_matches_compute_packing_order(var);
1379
1380   vm->matches[vm->num_matches].producer_var = producer_var;
1381   vm->matches[vm->num_matches].consumer_var = consumer_var;
1382   vm->num_matches++;
1383}
1384
1385/**
1386 * Choose locations for all of the variable matches that were previously
1387 * passed to varying_matches_record().
1388 * \param components  returns array[slot] of number of components used
1389 *                    per slot (1, 2, 3 or 4)
1390 * \param reserved_slots  bitmask indicating which varying slots are already
1391 *                        allocated
1392 * \return number of slots (4-element vectors) allocated
1393 */
1394static unsigned
1395varying_matches_assign_locations(struct varying_matches *vm,
1396                                 struct gl_shader_program *prog,
1397                                 uint8_t components[], uint64_t reserved_slots)
1398{
1399   /* If packing has been disabled then we cannot safely sort the varyings by
1400    * class as it may mean we are using a version of OpenGL where
1401    * interpolation qualifiers are not guaranteed to be matching across
1402    * shaders, sorting in this case could result in mismatching shader
1403    * interfaces.
1404    * When packing is disabled the sort orders varyings used by transform
1405    * feedback first, but also depends on *undefined behaviour* of qsort to
1406    * reverse the order of the varyings. See: xfb_comparator().
1407    *
1408    * If packing is only disabled for xfb varyings (mutually exclusive with
1409    * disable_varying_packing), we then group varyings depending on if they
1410    * are captured for transform feedback. The same *undefined behaviour* is
1411    * taken advantage of.
1412    */
1413   if (vm->disable_varying_packing) {
1414      /* Only sort varyings that are only used by transform feedback. */
1415      qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
1416            &varying_matches_xfb_comparator);
1417   } else if (vm->disable_xfb_packing) {
1418      /* Only sort varyings that are NOT used by transform feedback. */
1419      qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
1420            &varying_matches_not_xfb_comparator);
1421   } else {
1422      /* Sort varying matches into an order that makes them easy to pack. */
1423      qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
1424            &varying_matches_match_comparator);
1425   }
1426
1427   unsigned generic_location = 0;
1428   unsigned generic_patch_location = MAX_VARYING*4;
1429   bool previous_var_xfb = false;
1430   bool previous_var_xfb_only = false;
1431   unsigned previous_packing_class = ~0u;
1432
1433   /* For tranform feedback separate mode, we know the number of attributes
1434    * is <= the number of buffers.  So packing isn't critical.  In fact,
1435    * packing vec3 attributes can cause trouble because splitting a vec3
1436    * effectively creates an additional transform feedback output.  The
1437    * extra TFB output may exceed device driver limits.
1438    *
1439    * Also don't pack vec3 if the driver prefers power of two aligned
1440    * varyings. Packing order guarantees that vec4, vec2 and vec1 will be
1441    * pot-aligned, we only need to take care of vec3s
1442    */
1443   const bool dont_pack_vec3 =
1444      (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1445       prog->TransformFeedback.NumVarying > 0) ||
1446      vm->prefer_pot_aligned_varyings;
1447
1448   for (unsigned i = 0; i < vm->num_matches; i++) {
1449      unsigned *location = &generic_location;
1450      const nir_variable *var;
1451      const struct glsl_type *type;
1452      bool is_vertex_input = false;
1453
1454      if (vm->matches[i].consumer_var) {
1455         var = vm->matches[i].consumer_var;
1456         type = get_varying_type(var, vm->consumer_stage);
1457         if (vm->consumer_stage == MESA_SHADER_VERTEX)
1458            is_vertex_input = true;
1459      } else {
1460         if (!vm->matches[i].producer_var)
1461            continue; /* The varying was optimised away */
1462
1463         var = vm->matches[i].producer_var;
1464         type = get_varying_type(var, vm->producer_stage);
1465      }
1466
1467      if (var->data.patch)
1468         location = &generic_patch_location;
1469
1470      /* Advance to the next slot if this varying has a different packing
1471       * class than the previous one, and we're not already on a slot
1472       * boundary.
1473       *
1474       * Also advance if varying packing is disabled for transform feedback,
1475       * and previous or current varying is used for transform feedback.
1476       *
1477       * Also advance to the next slot if packing is disabled. This makes sure
1478       * we don't assign varyings the same locations which is possible
1479       * because we still pack individual arrays, records and matrices even
1480       * when packing is disabled. Note we don't advance to the next slot if
1481       * we can pack varyings together that are only used for transform
1482       * feedback.
1483       */
1484      if (var->data.must_be_shader_input ||
1485          (vm->disable_xfb_packing &&
1486           (previous_var_xfb || var->data.is_xfb)) ||
1487          (vm->disable_varying_packing &&
1488           !(previous_var_xfb_only && var->data.is_xfb_only)) ||
1489          (previous_packing_class != vm->matches[i].packing_class) ||
1490          (vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
1491           dont_pack_vec3)) {
1492         *location = ALIGN(*location, 4);
1493      }
1494
1495      previous_var_xfb = var->data.is_xfb;
1496      previous_var_xfb_only = var->data.is_xfb_only;
1497      previous_packing_class = vm->matches[i].packing_class;
1498
1499      /* The number of components taken up by this variable. For vertex shader
1500       * inputs, we use the number of slots * 4, as they have different
1501       * counting rules.
1502       */
1503      unsigned num_components = 0;
1504      if (is_vertex_input) {
1505         num_components = glsl_count_attribute_slots(type, is_vertex_input) * 4;
1506      } else {
1507         if (is_packing_disabled(vm, type, var)) {
1508            num_components = glsl_count_attribute_slots(type, false) * 4;
1509         } else {
1510            num_components = glsl_get_component_slots_aligned(type, *location);
1511         }
1512      }
1513
1514      /* The last slot for this variable, inclusive. */
1515      unsigned slot_end = *location + num_components - 1;
1516
1517      /* FIXME: We could be smarter in the below code and loop back over
1518       * trying to fill any locations that we skipped because we couldn't pack
1519       * the varying between an explicit location. For now just let the user
1520       * hit the linking error if we run out of room and suggest they use
1521       * explicit locations.
1522       */
1523      while (slot_end < MAX_VARYING * 4u) {
1524         const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
1525         const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
1526
1527         assert(slots > 0);
1528
1529         if ((reserved_slots & slot_mask) == 0) {
1530            break;
1531         }
1532
1533         *location = ALIGN(*location + 1, 4);
1534         slot_end = *location + num_components - 1;
1535      }
1536
1537      if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
1538         linker_error(prog, "insufficient contiguous locations available for "
1539                      "%s it is possible an array or struct could not be "
1540                      "packed between varyings with explicit locations. Try "
1541                      "using an explicit location for arrays and structs.",
1542                      var->name);
1543      }
1544
1545      if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
1546         for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
1547            components[j] = 4;
1548         components[slot_end / 4u] = (slot_end & 3) + 1;
1549      }
1550
1551      vm->matches[i].generic_location = *location;
1552
1553      *location = slot_end + 1;
1554   }
1555
1556   return (generic_location + 3) / 4;
1557}
1558
1559static void
1560varying_matches_assign_temp_locations(struct varying_matches *vm,
1561                                      struct gl_shader_program *prog,
1562                                      uint64_t reserved_slots)
1563{
1564   unsigned tmp_loc = 0;
1565   for (unsigned i = 0; i < vm->num_matches; i++) {
1566      nir_variable *producer_var = vm->matches[i].producer_var;
1567      nir_variable *consumer_var = vm->matches[i].consumer_var;
1568
1569      while (tmp_loc < MAX_VARYINGS_INCL_PATCH) {
1570         if (reserved_slots & (UINT64_C(1) << tmp_loc))
1571            tmp_loc++;
1572         else
1573            break;
1574      }
1575
1576      if (producer_var) {
1577         assert(producer_var->data.location == -1);
1578         producer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
1579      }
1580
1581      if (consumer_var) {
1582         assert(consumer_var->data.location == -1);
1583         consumer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
1584      }
1585
1586      tmp_loc++;
1587   }
1588}
1589
1590/**
1591 * Update the producer and consumer shaders to reflect the locations
1592 * assignments that were made by varying_matches_assign_locations().
1593 */
1594static void
1595varying_matches_store_locations(struct varying_matches *vm)
1596{
1597   /* Check is location needs to be packed with lower_packed_varyings() or if
1598    * we can just use ARB_enhanced_layouts packing.
1599    */
1600   bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {0};
1601   const struct glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
1602
1603   for (unsigned i = 0; i < vm->num_matches; i++) {
1604      nir_variable *producer_var = vm->matches[i].producer_var;
1605      nir_variable *consumer_var = vm->matches[i].consumer_var;
1606      unsigned generic_location = vm->matches[i].generic_location;
1607      unsigned slot = generic_location / 4;
1608      unsigned offset = generic_location % 4;
1609
1610      if (producer_var) {
1611         producer_var->data.location = VARYING_SLOT_VAR0 + slot;
1612         producer_var->data.location_frac = offset;
1613      }
1614
1615      if (consumer_var) {
1616         consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
1617         consumer_var->data.location_frac = offset;
1618      }
1619
1620      /* Find locations suitable for native packing via
1621       * ARB_enhanced_layouts.
1622       */
1623      if (vm->enhanced_layouts_enabled) {
1624         nir_variable *var = producer_var ? producer_var : consumer_var;
1625         unsigned stage = producer_var ? vm->producer_stage : vm->consumer_stage;
1626         const struct glsl_type *type =
1627            get_varying_type(var, stage);
1628         unsigned comp_slots = glsl_get_component_slots(type) + offset;
1629         unsigned slots = comp_slots / 4;
1630         if (comp_slots % 4)
1631            slots += 1;
1632
1633         if (producer_var && consumer_var) {
1634            if (glsl_type_is_array_or_matrix(type) || glsl_type_is_struct(type) ||
1635                glsl_type_is_64bit(type)) {
1636               for (unsigned j = 0; j < slots; j++) {
1637                  pack_loc[slot + j] = true;
1638               }
1639            } else if (offset + glsl_get_vector_elements(type) > 4) {
1640               pack_loc[slot] = true;
1641               pack_loc[slot + 1] = true;
1642            } else {
1643               loc_type[slot][offset] = type;
1644            }
1645         } else {
1646            for (unsigned j = 0; j < slots; j++) {
1647               pack_loc[slot + j] = true;
1648            }
1649         }
1650      }
1651   }
1652
1653   /* Attempt to use ARB_enhanced_layouts for more efficient packing if
1654    * suitable.
1655    */
1656   if (vm->enhanced_layouts_enabled) {
1657      for (unsigned i = 0; i < vm->num_matches; i++) {
1658         nir_variable *producer_var = vm->matches[i].producer_var;
1659         nir_variable *consumer_var = vm->matches[i].consumer_var;
1660         if (!producer_var || !consumer_var)
1661            continue;
1662
1663         unsigned generic_location = vm->matches[i].generic_location;
1664         unsigned slot = generic_location / 4;
1665         if (pack_loc[slot])
1666            continue;
1667
1668         const struct glsl_type *type =
1669            get_varying_type(producer_var, vm->producer_stage);
1670         bool type_match = true;
1671         for (unsigned j = 0; j < 4; j++) {
1672            if (loc_type[slot][j]) {
1673               if (glsl_get_base_type(type) !=
1674                   glsl_get_base_type(loc_type[slot][j]))
1675                  type_match = false;
1676            }
1677         }
1678
1679         if (type_match) {
1680            producer_var->data.explicit_location = 1;
1681            consumer_var->data.explicit_location = 1;
1682         }
1683      }
1684   }
1685}
1686
1687/**
1688 * Is the given variable a varying variable to be counted against the
1689 * limit in ctx->Const.MaxVarying?
1690 * This includes variables such as texcoords, colors and generic
1691 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
1692 */
1693static bool
1694var_counts_against_varying_limit(gl_shader_stage stage, const nir_variable *var)
1695{
1696   /* Only fragment shaders will take a varying variable as an input */
1697   if (stage == MESA_SHADER_FRAGMENT &&
1698       var->data.mode == nir_var_shader_in) {
1699      switch (var->data.location) {
1700      case VARYING_SLOT_POS:
1701      case VARYING_SLOT_FACE:
1702      case VARYING_SLOT_PNTC:
1703         return false;
1704      default:
1705         return true;
1706      }
1707   }
1708   return false;
1709}
1710
1711struct tfeedback_candidate_generator_state {
1712   /**
1713    * Memory context used to allocate hash table keys and values.
1714    */
1715   void *mem_ctx;
1716
1717   /**
1718    * Hash table in which tfeedback_candidate objects should be stored.
1719    */
1720   struct hash_table *tfeedback_candidates;
1721
1722   gl_shader_stage stage;
1723
1724   /**
1725    * Pointer to the toplevel variable that is being traversed.
1726    */
1727   nir_variable *toplevel_var;
1728
1729   /**
1730    * Total number of varying floats that have been visited so far.  This is
1731    * used to determine the offset to each varying within the toplevel
1732    * variable.
1733    */
1734   unsigned varying_floats;
1735
1736   /**
1737    * Offset within the xfb. Counted in floats.
1738    */
1739   unsigned xfb_offset_floats;
1740};
1741
1742/**
1743 * Generates tfeedback_candidate structs describing all possible targets of
1744 * transform feedback.
1745 *
1746 * tfeedback_candidate structs are stored in the hash table
1747 * tfeedback_candidates.  This hash table maps varying names to instances of the
1748 * tfeedback_candidate struct.
1749 */
1750static void
1751tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
1752                              char **name, size_t name_length,
1753                              const struct glsl_type *type,
1754                              const struct glsl_struct_field *named_ifc_member)
1755{
1756   switch (glsl_get_base_type(type)) {
1757   case GLSL_TYPE_INTERFACE:
1758      if (named_ifc_member) {
1759         ralloc_asprintf_rewrite_tail(name, &name_length, ".%s",
1760                                      named_ifc_member->name);
1761         tfeedback_candidate_generator(state, name, name_length,
1762                                       named_ifc_member->type, NULL);
1763         return;
1764      }
1765      FALLTHROUGH;
1766   case GLSL_TYPE_STRUCT:
1767      for (unsigned i = 0; i < glsl_get_length(type); i++) {
1768         size_t new_length = name_length;
1769
1770         /* Append '.field' to the current variable name. */
1771         if (name) {
1772            ralloc_asprintf_rewrite_tail(name, &new_length, ".%s",
1773                                         glsl_get_struct_elem_name(type, i));
1774         }
1775
1776         tfeedback_candidate_generator(state, name, new_length,
1777                                       glsl_get_struct_field(type, i), NULL);
1778      }
1779
1780      return;
1781   case GLSL_TYPE_ARRAY:
1782      if (glsl_type_is_struct(glsl_without_array(type)) ||
1783          glsl_type_is_interface(glsl_without_array(type)) ||
1784          glsl_type_is_array(glsl_get_array_element(type))) {
1785
1786         for (unsigned i = 0; i < glsl_get_length(type); i++) {
1787            size_t new_length = name_length;
1788
1789            /* Append the subscript to the current variable name */
1790            ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
1791
1792            tfeedback_candidate_generator(state, name, new_length,
1793                                          glsl_get_array_element(type),
1794                                          named_ifc_member);
1795         }
1796
1797         return;
1798      }
1799      FALLTHROUGH;
1800   default:
1801      assert(!glsl_type_is_struct(glsl_without_array(type)));
1802      assert(!glsl_type_is_interface(glsl_without_array(type)));
1803
1804      struct tfeedback_candidate *candidate
1805         = rzalloc(state->mem_ctx, struct tfeedback_candidate);
1806      candidate->toplevel_var = state->toplevel_var;
1807      candidate->type = type;
1808
1809      if (glsl_type_is_64bit(glsl_without_array(type))) {
1810         /*  From ARB_gpu_shader_fp64:
1811          *
1812          * If any variable captured in transform feedback has double-precision
1813          * components, the practical requirements for defined behavior are:
1814          *     ...
1815          * (c) each double-precision variable captured must be aligned to a
1816          *     multiple of eight bytes relative to the beginning of a vertex.
1817          */
1818         state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
1819         /* 64-bit members of structs are also aligned. */
1820         state->varying_floats = ALIGN(state->varying_floats, 2);
1821      }
1822
1823      candidate->xfb_offset_floats = state->xfb_offset_floats;
1824      candidate->struct_offset_floats = state->varying_floats;
1825
1826      _mesa_hash_table_insert(state->tfeedback_candidates,
1827                              ralloc_strdup(state->mem_ctx, *name),
1828                              candidate);
1829
1830      const unsigned component_slots = glsl_get_component_slots(type);
1831
1832      if (varying_has_user_specified_location(state->toplevel_var)) {
1833         state->varying_floats += glsl_count_attribute_slots(type, false) * 4;
1834      } else {
1835         state->varying_floats += component_slots;
1836      }
1837
1838      state->xfb_offset_floats += component_slots;
1839   }
1840}
1841
1842static void
1843populate_consumer_input_sets(void *mem_ctx, nir_shader *nir,
1844                             struct hash_table *consumer_inputs,
1845                             struct hash_table *consumer_interface_inputs,
1846                             nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
1847{
1848   memset(consumer_inputs_with_locations, 0,
1849          sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
1850
1851   nir_foreach_shader_in_variable(input_var, nir) {
1852      /* All interface blocks should have been lowered by this point */
1853      assert(!glsl_type_is_interface(input_var->type));
1854
1855      if (input_var->data.explicit_location) {
1856         /* assign_varying_locations only cares about finding the
1857          * nir_variable at the start of a contiguous location block.
1858          *
1859          *     - For !producer, consumer_inputs_with_locations isn't used.
1860          *
1861          *     - For !consumer, consumer_inputs_with_locations is empty.
1862          *
1863          * For consumer && producer, if you were trying to set some
1864          * nir_variable to the middle of a location block on the other side
1865          * of producer/consumer, cross_validate_outputs_to_inputs() should
1866          * be link-erroring due to either type mismatch or location
1867          * overlaps.  If the variables do match up, then they've got a
1868          * matching data.location and you only looked at
1869          * consumer_inputs_with_locations[var->data.location], not any
1870          * following entries for the array/structure.
1871          */
1872         consumer_inputs_with_locations[input_var->data.location] =
1873            input_var;
1874      } else if (input_var->interface_type != NULL) {
1875         char *const iface_field_name =
1876            ralloc_asprintf(mem_ctx, "%s.%s",
1877               glsl_get_type_name(glsl_without_array(input_var->interface_type)),
1878               input_var->name);
1879         _mesa_hash_table_insert(consumer_interface_inputs,
1880                                 iface_field_name, input_var);
1881      } else {
1882         _mesa_hash_table_insert(consumer_inputs,
1883                                 ralloc_strdup(mem_ctx, input_var->name),
1884                                 input_var);
1885      }
1886   }
1887}
1888
1889/**
1890 * Find a variable from the consumer that "matches" the specified variable
1891 *
1892 * This function only finds inputs with names that match.  There is no
1893 * validation (here) that the types, etc. are compatible.
1894 */
1895static nir_variable *
1896get_matching_input(void *mem_ctx,
1897                   const nir_variable *output_var,
1898                   struct hash_table *consumer_inputs,
1899                   struct hash_table *consumer_interface_inputs,
1900                   nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
1901{
1902   nir_variable *input_var;
1903
1904   if (output_var->data.explicit_location) {
1905      input_var = consumer_inputs_with_locations[output_var->data.location];
1906   } else if (output_var->interface_type != NULL) {
1907      char *const iface_field_name =
1908         ralloc_asprintf(mem_ctx, "%s.%s",
1909            glsl_get_type_name(glsl_without_array(output_var->interface_type)),
1910            output_var->name);
1911      struct hash_entry *entry =
1912         _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
1913      input_var = entry ? (nir_variable *) entry->data : NULL;
1914   } else {
1915      struct hash_entry *entry =
1916         _mesa_hash_table_search(consumer_inputs, output_var->name);
1917      input_var = entry ? (nir_variable *) entry->data : NULL;
1918   }
1919
1920   return (input_var == NULL || input_var->data.mode != nir_var_shader_in)
1921      ? NULL : input_var;
1922}
1923
1924static int
1925io_variable_cmp(const void *_a, const void *_b)
1926{
1927   const nir_variable *const a = *(const nir_variable **) _a;
1928   const nir_variable *const b = *(const nir_variable **) _b;
1929
1930   if (a->data.explicit_location && b->data.explicit_location)
1931      return b->data.location - a->data.location;
1932
1933   if (a->data.explicit_location && !b->data.explicit_location)
1934      return 1;
1935
1936   if (!a->data.explicit_location && b->data.explicit_location)
1937      return -1;
1938
1939   return -strcmp(a->name, b->name);
1940}
1941
1942/**
1943 * Sort the shader IO variables into canonical order
1944 */
1945static void
1946canonicalize_shader_io(nir_shader *nir, nir_variable_mode io_mode)
1947{
1948   nir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
1949   unsigned num_variables = 0;
1950
1951   nir_foreach_variable_with_modes(var, nir, io_mode) {
1952      /* If we have already encountered more I/O variables that could
1953       * successfully link, bail.
1954       */
1955      if (num_variables == ARRAY_SIZE(var_table))
1956         return;
1957
1958      var_table[num_variables++] = var;
1959   }
1960
1961   if (num_variables == 0)
1962      return;
1963
1964   /* Sort the list in reverse order (io_variable_cmp handles this).  Later
1965    * we're going to push the variables on to the IR list as a stack, so we
1966    * want the last variable (in canonical order) to be first in the list.
1967    */
1968   qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
1969
1970   /* Remove the variable from it's current location in the varible list, and
1971    * put it at the front.
1972    */
1973   for (unsigned i = 0; i < num_variables; i++) {
1974      exec_node_remove(&var_table[i]->node);
1975      exec_list_push_head(&nir->variables, &var_table[i]->node);
1976   }
1977}
1978
1979/**
1980 * Generate a bitfield map of the explicit locations for shader varyings.
1981 *
1982 * Note: For Tessellation shaders we are sitting right on the limits of the
1983 * 64 bit map. Per-vertex and per-patch both have separate location domains
1984 * with a max of MAX_VARYING.
1985 */
1986static uint64_t
1987reserved_varying_slot(struct gl_linked_shader *sh,
1988                      nir_variable_mode io_mode)
1989{
1990   assert(io_mode == nir_var_shader_in || io_mode == nir_var_shader_out);
1991   /* Avoid an overflow of the returned value */
1992   assert(MAX_VARYINGS_INCL_PATCH <= 64);
1993
1994   uint64_t slots = 0;
1995   int var_slot;
1996
1997   if (!sh)
1998      return slots;
1999
2000   nir_foreach_variable_with_modes(var, sh->Program->nir, io_mode) {
2001      if (!var->data.explicit_location ||
2002          var->data.location < VARYING_SLOT_VAR0)
2003         continue;
2004
2005      var_slot = var->data.location - VARYING_SLOT_VAR0;
2006
2007      bool is_gl_vertex_input = io_mode == nir_var_shader_in &&
2008                                sh->Stage == MESA_SHADER_VERTEX;
2009      unsigned num_elements =
2010         glsl_count_attribute_slots(get_varying_type(var, sh->Stage),
2011                                    is_gl_vertex_input);
2012      for (unsigned i = 0; i < num_elements; i++) {
2013         if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
2014            slots |= UINT64_C(1) << var_slot;
2015         var_slot += 1;
2016      }
2017   }
2018
2019   return slots;
2020}
2021
2022/**
2023 * Sets the bits in the inputs_read, or outputs_written
2024 * bitfield corresponding to this variable.
2025 */
2026static void
2027set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage)
2028{
2029   assert(var->data.mode == nir_var_shader_in ||
2030          var->data.mode == nir_var_shader_out);
2031   assert(var->data.location >= VARYING_SLOT_VAR0);
2032
2033   const struct glsl_type *type = var->type;
2034   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
2035      assert(glsl_type_is_array(type));
2036      type = glsl_get_array_element(type);
2037   }
2038
2039   unsigned location = var->data.location - VARYING_SLOT_VAR0;
2040   unsigned slots = glsl_count_attribute_slots(type, false);
2041   for (unsigned i = 0; i < slots; i++) {
2042      BITSET_SET(bits, location + i);
2043   }
2044}
2045
2046static uint8_t
2047get_num_components(nir_variable *var)
2048{
2049   if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
2050      return 4;
2051
2052   return glsl_get_vector_elements(glsl_without_array(var->type));
2053}
2054
2055static void
2056tcs_add_output_reads(nir_shader *shader, BITSET_WORD **read)
2057{
2058   nir_foreach_function(function, shader) {
2059      if (!function->impl)
2060         continue;
2061
2062      nir_foreach_block(block, function->impl) {
2063         nir_foreach_instr(instr, block) {
2064            if (instr->type != nir_instr_type_intrinsic)
2065               continue;
2066
2067            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2068            if (intrin->intrinsic != nir_intrinsic_load_deref)
2069               continue;
2070
2071            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2072            if (!nir_deref_mode_is(deref, nir_var_shader_out))
2073               continue;
2074
2075            nir_variable *var = nir_deref_instr_get_variable(deref);
2076            for (unsigned i = 0; i < get_num_components(var); i++) {
2077               if (var->data.location < VARYING_SLOT_VAR0)
2078                  continue;
2079
2080               unsigned comp = var->data.location_frac;
2081               set_variable_io_mask(read[comp + i], var, shader->info.stage);
2082            }
2083         }
2084      }
2085   }
2086}
2087
2088/* We need to replace any interp intrinsics with undefined (shader_temp) inputs
2089 * as no further NIR pass expects to see this.
2090 */
2091static bool
2092replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
2093                                         void *data)
2094{
2095   if (instr->type == nir_instr_type_intrinsic) {
2096      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2097
2098      if (intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
2099          intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
2100          intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) {
2101         nir_variable *var = nir_intrinsic_get_var(intrin, 0);
2102         if (var->data.mode == nir_var_shader_temp) {
2103            /* Create undef and rewrite the interp uses */
2104            nir_ssa_def *undef =
2105               nir_ssa_undef(b, intrin->dest.ssa.num_components,
2106                             intrin->dest.ssa.bit_size);
2107            nir_ssa_def_rewrite_uses(&intrin->dest.ssa, undef);
2108
2109            nir_instr_remove(&intrin->instr);
2110            return true;
2111         }
2112      }
2113   }
2114
2115   return false;
2116}
2117
2118static void
2119fixup_vars_lowered_to_temp(nir_shader *shader, nir_variable_mode mode)
2120{
2121   /* Remove all interpolate uses of the unset varying and replace with undef. */
2122   if (mode == nir_var_shader_in && shader->info.stage == MESA_SHADER_FRAGMENT) {
2123      (void) nir_shader_instructions_pass(shader,
2124                                          replace_unused_interpolate_at_with_undef,
2125                                          nir_metadata_block_index |
2126                                          nir_metadata_dominance,
2127                                          NULL);
2128   }
2129
2130   nir_lower_global_vars_to_local(shader);
2131   nir_fixup_deref_modes(shader);
2132}
2133
2134/**
2135 * Helper for removing unused shader I/O variables, by demoting them to global
2136 * variables (which may then be dead code eliminated).
2137 *
2138 * Example usage is:
2139 *
2140 * progress = nir_remove_unused_io_vars(producer, consumer, nir_var_shader_out,
2141 *                                      read, patches_read) ||
2142 *                                      progress;
2143 *
2144 * The "used" should be an array of 4 BITSET_WORDs representing each
2145 * .location_frac used.  Note that for vector variables, only the first channel
2146 * (.location_frac) is examined for deciding if the variable is used!
2147 */
2148static bool
2149remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
2150                      struct gl_shader_program *prog,
2151                      nir_variable_mode mode,
2152                      BITSET_WORD **used_by_other_stage)
2153{
2154   assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
2155
2156   bool progress = false;
2157   nir_shader *shader = mode == nir_var_shader_out ? producer : consumer;
2158
2159   BITSET_WORD **used;
2160   nir_foreach_variable_with_modes_safe(var, shader, mode) {
2161      used = used_by_other_stage;
2162
2163      /* Skip builtins dead builtins are removed elsewhere */
2164      if (is_gl_identifier(var->name))
2165         continue;
2166
2167      if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
2168         continue;
2169
2170      /* Skip xfb varyings and any other type we cannot remove */
2171      if (var->data.always_active_io)
2172         continue;
2173
2174      if (var->data.explicit_xfb_buffer)
2175         continue;
2176
2177      BITSET_WORD *other_stage = used[var->data.location_frac];
2178
2179      /* if location == -1 lower varying to global as it has no match and is not
2180       * a xfb varying, this must be done after skiping bultins as builtins
2181       * could be assigned a location of -1.
2182       * We also lower unused varyings with explicit locations.
2183       */
2184      bool use_found = false;
2185      if (var->data.location >= 0) {
2186         unsigned location = var->data.location - VARYING_SLOT_VAR0;
2187
2188         const struct glsl_type *type = var->type;
2189         if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
2190            assert(glsl_type_is_array(type));
2191            type = glsl_get_array_element(type);
2192         }
2193
2194         unsigned slots = glsl_count_attribute_slots(type, false);
2195         for (unsigned i = 0; i < slots; i++) {
2196            if (BITSET_TEST(other_stage, location + i)) {
2197               use_found = true;
2198               break;
2199            }
2200         }
2201      }
2202
2203      if (!use_found) {
2204         /* This one is invalid, make it a global variable instead */
2205         var->data.location = 0;
2206         var->data.mode = nir_var_shader_temp;
2207
2208         progress = true;
2209
2210         if (mode == nir_var_shader_in) {
2211            if (!prog->IsES && prog->data->Version <= 120) {
2212               /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
2213                *
2214                *     Only those varying variables used (i.e. read) in
2215                *     the fragment shader executable must be written to
2216                *     by the vertex shader executable; declaring
2217                *     superfluous varying variables in a vertex shader is
2218                *     permissible.
2219                *
2220                * We interpret this text as meaning that the VS must
2221                * write the variable for the FS to read it.  See
2222                * "glsl1-varying read but not written" in piglit.
2223                */
2224               linker_error(prog, "%s shader varying %s not written "
2225                            "by %s shader\n.",
2226                            _mesa_shader_stage_to_string(consumer->info.stage),
2227                            var->name,
2228                            _mesa_shader_stage_to_string(producer->info.stage));
2229            } else {
2230               linker_warning(prog, "%s shader varying %s not written "
2231                              "by %s shader\n.",
2232                              _mesa_shader_stage_to_string(consumer->info.stage),
2233                              var->name,
2234                              _mesa_shader_stage_to_string(producer->info.stage));
2235            }
2236         }
2237      }
2238   }
2239
2240   if (progress)
2241      fixup_vars_lowered_to_temp(shader, mode);
2242
2243   return progress;
2244}
2245
2246static bool
2247remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
2248                       struct gl_shader_program *prog, void *mem_ctx)
2249{
2250   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
2251   assert(consumer->info.stage != MESA_SHADER_VERTEX);
2252
2253   int max_loc_out = 0;
2254   nir_foreach_shader_out_variable(var, producer) {
2255      if (var->data.location < VARYING_SLOT_VAR0)
2256         continue;
2257
2258      const struct glsl_type *type = var->type;
2259      if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
2260         assert(glsl_type_is_array(type));
2261         type = glsl_get_array_element(type);
2262      }
2263      unsigned slots = glsl_count_attribute_slots(type, false);
2264
2265      max_loc_out = max_loc_out < (var->data.location - VARYING_SLOT_VAR0) + slots ?
2266         (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_out;
2267   }
2268
2269   int max_loc_in = 0;
2270   nir_foreach_shader_in_variable(var, consumer) {
2271      if (var->data.location < VARYING_SLOT_VAR0)
2272         continue;
2273
2274      const struct glsl_type *type = var->type;
2275      if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) {
2276         assert(glsl_type_is_array(type));
2277         type = glsl_get_array_element(type);
2278      }
2279      unsigned slots = glsl_count_attribute_slots(type, false);
2280
2281      max_loc_in = max_loc_in < (var->data.location - VARYING_SLOT_VAR0) + slots ?
2282         (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_in;
2283   }
2284
2285   /* Old glsl shaders that don't use explicit locations can contain greater
2286    * than 64 varyings before unused varyings are removed so we must count them
2287    * and make use of the BITSET macros to keep track of used slots. Once we
2288    * have removed these excess varyings we can make use of further nir varying
2289    * linking optimimisation passes.
2290    */
2291   BITSET_WORD *read[4];
2292   BITSET_WORD *written[4];
2293   int max_loc = MAX2(max_loc_in, max_loc_out);
2294   for (unsigned i = 0; i < 4; i++) {
2295      read[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
2296      written[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
2297   }
2298
2299   nir_foreach_shader_out_variable(var, producer) {
2300      if (var->data.location < VARYING_SLOT_VAR0)
2301         continue;
2302
2303      for (unsigned i = 0; i < get_num_components(var); i++) {
2304         unsigned comp = var->data.location_frac;
2305         set_variable_io_mask(written[comp + i], var, producer->info.stage);
2306      }
2307   }
2308
2309   nir_foreach_shader_in_variable(var, consumer) {
2310      if (var->data.location < VARYING_SLOT_VAR0)
2311         continue;
2312
2313      for (unsigned i = 0; i < get_num_components(var); i++) {
2314         unsigned comp = var->data.location_frac;
2315         set_variable_io_mask(read[comp + i], var, consumer->info.stage);
2316      }
2317   }
2318
2319   /* Each TCS invocation can read data written by other TCS invocations,
2320    * so even if the outputs are not used by the TES we must also make
2321    * sure they are not read by the TCS before demoting them to globals.
2322    */
2323   if (producer->info.stage == MESA_SHADER_TESS_CTRL)
2324      tcs_add_output_reads(producer, read);
2325
2326   bool progress = false;
2327   progress =
2328      remove_unused_io_vars(producer, consumer, prog, nir_var_shader_out, read);
2329   progress =
2330      remove_unused_io_vars(producer, consumer, prog, nir_var_shader_in, written) || progress;
2331
2332   return progress;
2333}
2334
2335static bool
2336should_add_varying_match_record(nir_variable *const input_var,
2337                                struct gl_shader_program *prog,
2338                                struct gl_linked_shader *producer,
2339                                struct gl_linked_shader *consumer) {
2340
2341   /* If a matching input variable was found, add this output (and the input) to
2342    * the set.  If this is a separable program and there is no consumer stage,
2343    * add the output.
2344    *
2345    * Always add TCS outputs. They are shared by all invocations
2346    * within a patch and can be used as shared memory.
2347    */
2348   return input_var || (prog->SeparateShader && consumer == NULL) ||
2349             producer->Stage == MESA_SHADER_TESS_CTRL;
2350}
2351
2352/* This assigns some initial unoptimised varying locations so that our nir
2353 * optimisations can perform some initial optimisations and also does initial
2354 * processing of
2355 */
2356static bool
2357assign_initial_varying_locations(const struct gl_constants *consts,
2358                                 const struct gl_extensions *exts,
2359                                 void *mem_ctx,
2360                                 struct gl_shader_program *prog,
2361                                 struct gl_linked_shader *producer,
2362                                 struct gl_linked_shader *consumer,
2363                                 unsigned num_xfb_decls,
2364                                 struct xfb_decl *xfb_decls,
2365                                 struct varying_matches *vm)
2366{
2367   init_varying_matches(mem_ctx, vm, consts, exts,
2368                        producer ? producer->Stage : MESA_SHADER_NONE,
2369                        consumer ? consumer->Stage : MESA_SHADER_NONE,
2370                        prog->SeparateShader);
2371
2372   struct hash_table *tfeedback_candidates =
2373         _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
2374                                 _mesa_key_string_equal);
2375   struct hash_table *consumer_inputs =
2376         _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
2377                                 _mesa_key_string_equal);
2378   struct hash_table *consumer_interface_inputs =
2379         _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
2380                                 _mesa_key_string_equal);
2381   nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
2382      NULL,
2383   };
2384
2385   if (consumer)
2386      populate_consumer_input_sets(mem_ctx, consumer->Program->nir,
2387                                   consumer_inputs, consumer_interface_inputs,
2388                                   consumer_inputs_with_locations);
2389
2390   if (producer) {
2391      nir_foreach_shader_out_variable(output_var, producer->Program->nir) {
2392         /* Only geometry shaders can use non-zero streams */
2393         assert(output_var->data.stream == 0 ||
2394                (output_var->data.stream < MAX_VERTEX_STREAMS &&
2395                 producer->Stage == MESA_SHADER_GEOMETRY));
2396
2397         if (num_xfb_decls > 0) {
2398            /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
2399             * ("Vertex Shader Variables / Output Variables")
2400             *
2401             * "Each program object can specify a set of output variables from
2402             * one shader to be recorded in transform feedback mode (see
2403             * section 13.3). The variables that can be recorded are those
2404             * emitted by the first active shader, in order, from the
2405             * following list:
2406             *
2407             *  * geometry shader
2408             *  * tessellation evaluation shader
2409             *  * tessellation control shader
2410             *  * vertex shader"
2411             *
2412             * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
2413             * Variables / Output Variables") tessellation control shader is
2414             * not included in the stages list.
2415             */
2416            if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
2417
2418               const struct glsl_type *type = output_var->data.from_named_ifc_block ?
2419                  output_var->interface_type : output_var->type;
2420               if (!output_var->data.patch && producer->Stage == MESA_SHADER_TESS_CTRL) {
2421                  assert(glsl_type_is_array(type));
2422                  type = glsl_get_array_element(type);
2423               }
2424
2425               const struct glsl_struct_field *ifc_member = NULL;
2426               if (output_var->data.from_named_ifc_block) {
2427                  ifc_member =
2428                     glsl_get_struct_field_data(glsl_without_array(type),
2429                        glsl_get_field_index(glsl_without_array(type), output_var->name));
2430               }
2431
2432               char *name;
2433               if (glsl_type_is_struct(glsl_without_array(type)) ||
2434                   (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) {
2435                  type = output_var->type;
2436                  name = ralloc_strdup(NULL, output_var->name);
2437               } else if (glsl_type_is_interface(glsl_without_array(type))) {
2438                  name = ralloc_strdup(NULL, glsl_get_type_name(glsl_without_array(type)));
2439               } else  {
2440                  name = ralloc_strdup(NULL, output_var->name);
2441               }
2442
2443               struct tfeedback_candidate_generator_state state;
2444               state.mem_ctx = mem_ctx;
2445               state.tfeedback_candidates = tfeedback_candidates;
2446               state.stage = producer->Stage;
2447               state.toplevel_var = output_var;
2448               state.varying_floats = 0;
2449               state.xfb_offset_floats = 0;
2450
2451               tfeedback_candidate_generator(&state, &name, strlen(name), type,
2452                                             ifc_member);
2453               ralloc_free(name);
2454            }
2455         }
2456
2457         nir_variable *const input_var =
2458            get_matching_input(mem_ctx, output_var, consumer_inputs,
2459                               consumer_interface_inputs,
2460                               consumer_inputs_with_locations);
2461
2462         if (should_add_varying_match_record(input_var, prog, producer,
2463                                             consumer)) {
2464            varying_matches_record(mem_ctx, vm, output_var, input_var);
2465         }
2466
2467         /* Only stream 0 outputs can be consumed in the next stage */
2468         if (input_var && output_var->data.stream != 0) {
2469            linker_error(prog, "output %s is assigned to stream=%d but "
2470                         "is linked to an input, which requires stream=0",
2471                         output_var->name, output_var->data.stream);
2472            return false;
2473         }
2474      }
2475   } else {
2476      /* If there's no producer stage, then this must be a separable program.
2477       * For example, we may have a program that has just a fragment shader.
2478       * Later this program will be used with some arbitrary vertex (or
2479       * geometry) shader program.  This means that locations must be assigned
2480       * for all the inputs.
2481       */
2482      nir_foreach_shader_in_variable(input_var, consumer->Program->nir) {
2483         varying_matches_record(mem_ctx, vm, NULL, input_var);
2484      }
2485   }
2486
2487   for (unsigned i = 0; i < num_xfb_decls; ++i) {
2488      if (!xfb_decl_is_varying(&xfb_decls[i]))
2489         continue;
2490
2491      const struct tfeedback_candidate *matched_candidate
2492         = xfb_decl_find_candidate(&xfb_decls[i], prog, tfeedback_candidates);
2493
2494      if (matched_candidate == NULL)
2495         return false;
2496
2497      /* There are two situations where a new output varying is needed:
2498       *
2499       *  - If varying packing is disabled for xfb and the current declaration
2500       *    is subscripting an array, whether the subscript is aligned or not.
2501       *    to preserve the rest of the array for the consumer.
2502       *
2503       *  - If a builtin variable needs to be copied to a new variable
2504       *    before its content is modified by another lowering pass (e.g.
2505       *    \c gl_Position is transformed by \c nir_lower_viewport_transform).
2506       */
2507      const bool lowered =
2508         (vm->disable_xfb_packing && xfb_decls[i].is_subscripted) ||
2509         (matched_candidate->toplevel_var->data.explicit_location &&
2510          matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
2511          (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
2512          (consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
2513              BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
2514
2515      if (lowered) {
2516         nir_variable *new_var;
2517         struct tfeedback_candidate *new_candidate = NULL;
2518
2519         new_var = gl_nir_lower_xfb_varying(producer->Program->nir,
2520                                            xfb_decls[i].orig_name,
2521                                            matched_candidate->toplevel_var);
2522         if (new_var == NULL)
2523            return false;
2524
2525         /* Create new candidate and replace matched_candidate */
2526         new_candidate = rzalloc(mem_ctx, struct tfeedback_candidate);
2527         new_candidate->toplevel_var = new_var;
2528         new_candidate->type = new_var->type;
2529         new_candidate->struct_offset_floats = 0;
2530         new_candidate->xfb_offset_floats = 0;
2531         _mesa_hash_table_insert(tfeedback_candidates,
2532                                 ralloc_strdup(mem_ctx, new_var->name),
2533                                 new_candidate);
2534
2535         xfb_decl_set_lowered_candidate(&xfb_decls[i], new_candidate);
2536         matched_candidate = new_candidate;
2537      }
2538
2539      /* Mark as xfb varying */
2540      matched_candidate->toplevel_var->data.is_xfb = 1;
2541
2542      /* Mark xfb varyings as always active */
2543      matched_candidate->toplevel_var->data.always_active_io = 1;
2544
2545      /* Mark any corresponding inputs as always active also. We must do this
2546       * because we have a NIR pass that lowers vectors to scalars and another
2547       * that removes unused varyings.
2548       * We don't split varyings marked as always active because there is no
2549       * point in doing so. This means we need to mark both sides of the
2550       * interface as always active otherwise we will have a mismatch and
2551       * start removing things we shouldn't.
2552       */
2553      nir_variable *const input_var =
2554         get_matching_input(mem_ctx, matched_candidate->toplevel_var,
2555                            consumer_inputs, consumer_interface_inputs,
2556                            consumer_inputs_with_locations);
2557      if (input_var) {
2558         input_var->data.is_xfb = 1;
2559         input_var->data.always_active_io = 1;
2560      }
2561
2562      /* Add the xfb varying to varying matches if it wasn't already added */
2563      if ((!should_add_varying_match_record(input_var, prog, producer,
2564                                            consumer) &&
2565           !matched_candidate->toplevel_var->data.is_xfb_only) || lowered) {
2566         matched_candidate->toplevel_var->data.is_xfb_only = 1;
2567         varying_matches_record(mem_ctx, vm, matched_candidate->toplevel_var,
2568                                NULL);
2569      }
2570   }
2571
2572   uint64_t reserved_out_slots = 0;
2573   if (producer)
2574      reserved_out_slots = reserved_varying_slot(producer, nir_var_shader_out);
2575
2576   uint64_t reserved_in_slots = 0;
2577   if (consumer)
2578      reserved_in_slots = reserved_varying_slot(consumer, nir_var_shader_in);
2579
2580   /* Assign temporary user varying locations. This is required for our NIR
2581    * varying optimisations to do their matching.
2582    */
2583   const uint64_t reserved_slots = reserved_out_slots | reserved_in_slots;
2584   varying_matches_assign_temp_locations(vm, prog, reserved_slots);
2585
2586   for (unsigned i = 0; i < num_xfb_decls; ++i) {
2587      if (!xfb_decl_is_varying(&xfb_decls[i]))
2588         continue;
2589
2590      xfb_decls[i].matched_candidate->initial_location =
2591         xfb_decls[i].matched_candidate->toplevel_var->data.location;
2592      xfb_decls[i].matched_candidate->initial_location_frac =
2593         xfb_decls[i].matched_candidate->toplevel_var->data.location_frac;
2594   }
2595
2596   return true;
2597}
2598
2599static void
2600link_shader_opts(struct varying_matches *vm,
2601                 nir_shader *producer, nir_shader *consumer,
2602                 struct gl_shader_program *prog, void *mem_ctx)
2603{
2604   /* If we can't pack the stage using this pass then we can't lower io to
2605    * scalar just yet. Instead we leave it to a later NIR linking pass that uses
2606    * ARB_enhanced_layout style packing to pack things further.
2607    *
2608    * Otherwise we might end up causing linking errors and perf regressions
2609    * because the new scalars will be assigned individual slots and can overflow
2610    * the available slots.
2611    */
2612   if (producer->options->lower_to_scalar && !vm->disable_varying_packing &&
2613      !vm->disable_xfb_packing) {
2614      NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
2615      NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
2616   }
2617
2618   gl_nir_opts(producer);
2619   gl_nir_opts(consumer);
2620
2621   if (nir_link_opt_varyings(producer, consumer))
2622      gl_nir_opts(consumer);
2623
2624   NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
2625   NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
2626
2627   if (remove_unused_varyings(producer, consumer, prog, mem_ctx)) {
2628      NIR_PASS_V(producer, nir_lower_global_vars_to_local);
2629      NIR_PASS_V(consumer, nir_lower_global_vars_to_local);
2630
2631      gl_nir_opts(producer);
2632      gl_nir_opts(consumer);
2633
2634      /* Optimizations can cause varyings to become unused.
2635       * nir_compact_varyings() depends on all dead varyings being removed so
2636       * we need to call nir_remove_dead_variables() again here.
2637       */
2638      NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out,
2639                 NULL);
2640      NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in,
2641                 NULL);
2642   }
2643
2644   nir_link_varying_precision(producer, consumer);
2645}
2646
2647/**
2648 * Assign locations for all variables that are produced in one pipeline stage
2649 * (the "producer") and consumed in the next stage (the "consumer").
2650 *
2651 * Variables produced by the producer may also be consumed by transform
2652 * feedback.
2653 *
2654 * \param num_xfb_decls is the number of declarations indicating
2655 *        variables that may be consumed by transform feedback.
2656 *
2657 * \param xfb_decls is a pointer to an array of xfb_decl objects
2658 *        representing the result of parsing the strings passed to
2659 *        glTransformFeedbackVaryings().  assign_location() will be called for
2660 *        each of these objects that matches one of the outputs of the
2661 *        producer.
2662 *
2663 * When num_xfb_decls is nonzero, it is permissible for the consumer to
2664 * be NULL.  In this case, varying locations are assigned solely based on the
2665 * requirements of transform feedback.
2666 */
2667static bool
2668assign_final_varying_locations(const struct gl_constants *consts,
2669                               const struct gl_extensions *exts,
2670                               void *mem_ctx,
2671                               struct gl_shader_program *prog,
2672                               struct gl_linked_shader *producer,
2673                               struct gl_linked_shader *consumer,
2674                               unsigned num_xfb_decls,
2675                               struct xfb_decl *xfb_decls,
2676                               const uint64_t reserved_slots,
2677                               struct varying_matches *vm)
2678{
2679   init_varying_matches(mem_ctx, vm, consts, exts,
2680                        producer ? producer->Stage : MESA_SHADER_NONE,
2681                        consumer ? consumer->Stage : MESA_SHADER_NONE,
2682                        prog->SeparateShader);
2683
2684   /* Regather varying matches as we ran optimisations and the previous pointers
2685    * are no longer valid.
2686    */
2687   if (producer) {
2688      nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
2689         if (var_out->data.location < VARYING_SLOT_VAR0 ||
2690             var_out->data.explicit_location)
2691            continue;
2692
2693         if (vm->num_matches == vm->matches_capacity) {
2694            vm->matches_capacity *= 2;
2695            vm->matches = (struct match *)
2696               reralloc(mem_ctx, vm->matches, struct match,
2697                        vm->matches_capacity);
2698         }
2699
2700         vm->matches[vm->num_matches].packing_class
2701            = varying_matches_compute_packing_class(var_out);
2702         vm->matches[vm->num_matches].packing_order
2703            = varying_matches_compute_packing_order(var_out);
2704
2705         vm->matches[vm->num_matches].producer_var = var_out;
2706         vm->matches[vm->num_matches].consumer_var = NULL;
2707         vm->num_matches++;
2708      }
2709
2710      /* Regather xfb varyings too */
2711      for (unsigned i = 0; i < num_xfb_decls; i++) {
2712         if (!xfb_decl_is_varying(&xfb_decls[i]))
2713            continue;
2714
2715         /* Varying pointer was already reset */
2716         if (xfb_decls[i].matched_candidate->initial_location == -1)
2717            continue;
2718
2719         bool UNUSED is_reset = false;
2720         bool UNUSED no_outputs = true;
2721         nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
2722            no_outputs = false;
2723            assert(var_out->data.location != -1);
2724            if (var_out->data.location ==
2725                xfb_decls[i].matched_candidate->initial_location &&
2726                var_out->data.location_frac ==
2727                xfb_decls[i].matched_candidate->initial_location_frac) {
2728               xfb_decls[i].matched_candidate->toplevel_var = var_out;
2729               xfb_decls[i].matched_candidate->initial_location = -1;
2730               is_reset = true;
2731               break;
2732            }
2733         }
2734         assert(is_reset || no_outputs);
2735      }
2736   }
2737
2738   bool found_match = false;
2739   if (consumer) {
2740      nir_foreach_shader_in_variable(var_in, consumer->Program->nir) {
2741         if (var_in->data.location < VARYING_SLOT_VAR0 ||
2742             var_in->data.explicit_location)
2743            continue;
2744
2745         found_match = false;
2746         for (unsigned i = 0; i < vm->num_matches; i++) {
2747            if (vm->matches[i].producer_var &&
2748                (vm->matches[i].producer_var->data.location == var_in->data.location &&
2749                 vm->matches[i].producer_var->data.location_frac == var_in->data.location_frac)) {
2750
2751               vm->matches[i].consumer_var = var_in;
2752               found_match = true;
2753               break;
2754            }
2755         }
2756         if (!found_match) {
2757            if (vm->num_matches == vm->matches_capacity) {
2758               vm->matches_capacity *= 2;
2759               vm->matches = (struct match *)
2760                  reralloc(mem_ctx, vm->matches, struct match,
2761                           vm->matches_capacity);
2762            }
2763
2764            vm->matches[vm->num_matches].packing_class
2765               = varying_matches_compute_packing_class(var_in);
2766            vm->matches[vm->num_matches].packing_order
2767               = varying_matches_compute_packing_order(var_in);
2768
2769            vm->matches[vm->num_matches].producer_var = NULL;
2770            vm->matches[vm->num_matches].consumer_var = var_in;
2771            vm->num_matches++;
2772         }
2773      }
2774   }
2775
2776   uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
2777   const unsigned slots_used =
2778      varying_matches_assign_locations(vm, prog, components, reserved_slots);
2779   varying_matches_store_locations(vm);
2780
2781   for (unsigned i = 0; i < num_xfb_decls; ++i) {
2782      if (xfb_decl_is_varying(&xfb_decls[i])) {
2783         if (!xfb_decl_assign_location(&xfb_decls[i], consts, prog))
2784            return false;
2785      }
2786   }
2787
2788   if (producer) {
2789      gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
2790                                   nir_var_shader_out, 0, producer,
2791                                   vm->disable_varying_packing,
2792                                   vm->disable_xfb_packing, vm->xfb_enabled);
2793      nir_lower_pack(producer->Program->nir);
2794   }
2795
2796   if (consumer) {
2797      unsigned consumer_vertices = 0;
2798      if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
2799         consumer_vertices = prog->Geom.VerticesIn;
2800
2801      gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
2802                                   nir_var_shader_in, consumer_vertices,
2803                                   consumer, vm->disable_varying_packing,
2804                                   vm->disable_xfb_packing, vm->xfb_enabled);
2805      nir_lower_pack(consumer->Program->nir);
2806   }
2807
2808   return true;
2809}
2810
2811static bool
2812check_against_output_limit(const struct gl_constants *consts, gl_api api,
2813                           struct gl_shader_program *prog,
2814                           struct gl_linked_shader *producer,
2815                           unsigned num_explicit_locations)
2816{
2817   unsigned output_vectors = num_explicit_locations;
2818   nir_foreach_shader_out_variable(var, producer->Program->nir) {
2819      if (!var->data.explicit_location &&
2820          var_counts_against_varying_limit(producer->Stage, var)) {
2821         /* outputs for fragment shader can't be doubles */
2822         output_vectors += glsl_count_attribute_slots(var->type, false);
2823      }
2824   }
2825
2826   assert(producer->Stage != MESA_SHADER_FRAGMENT);
2827   unsigned max_output_components =
2828      consts->Program[producer->Stage].MaxOutputComponents;
2829
2830   const unsigned output_components = output_vectors * 4;
2831   if (output_components > max_output_components) {
2832      if (api == API_OPENGLES2 || prog->IsES)
2833         linker_error(prog, "%s shader uses too many output vectors "
2834                      "(%u > %u)\n",
2835                      _mesa_shader_stage_to_string(producer->Stage),
2836                      output_vectors,
2837                      max_output_components / 4);
2838      else
2839         linker_error(prog, "%s shader uses too many output components "
2840                      "(%u > %u)\n",
2841                      _mesa_shader_stage_to_string(producer->Stage),
2842                      output_components,
2843                      max_output_components);
2844
2845      return false;
2846   }
2847
2848   return true;
2849}
2850
2851static bool
2852check_against_input_limit(const struct gl_constants *consts, gl_api api,
2853                          struct gl_shader_program *prog,
2854                          struct gl_linked_shader *consumer,
2855                          unsigned num_explicit_locations)
2856{
2857   unsigned input_vectors = num_explicit_locations;
2858
2859   nir_foreach_shader_in_variable(var, consumer->Program->nir) {
2860      if (!var->data.explicit_location &&
2861          var_counts_against_varying_limit(consumer->Stage, var)) {
2862         /* vertex inputs aren't varying counted */
2863         input_vectors += glsl_count_attribute_slots(var->type, false);
2864      }
2865   }
2866
2867   assert(consumer->Stage != MESA_SHADER_VERTEX);
2868   unsigned max_input_components =
2869      consts->Program[consumer->Stage].MaxInputComponents;
2870
2871   const unsigned input_components = input_vectors * 4;
2872   if (input_components > max_input_components) {
2873      if (api == API_OPENGLES2 || prog->IsES)
2874         linker_error(prog, "%s shader uses too many input vectors "
2875                      "(%u > %u)\n",
2876                      _mesa_shader_stage_to_string(consumer->Stage),
2877                      input_vectors,
2878                      max_input_components / 4);
2879      else
2880         linker_error(prog, "%s shader uses too many input components "
2881                      "(%u > %u)\n",
2882                      _mesa_shader_stage_to_string(consumer->Stage),
2883                      input_components,
2884                      max_input_components);
2885
2886      return false;
2887   }
2888
2889   return true;
2890}
2891
2892/* Lower unset/unused inputs/outputs */
2893static void
2894remove_unused_shader_inputs_and_outputs(struct gl_shader_program *prog,
2895                                        unsigned stage, nir_variable_mode mode)
2896{
2897   bool progress = false;
2898   nir_shader *shader = prog->_LinkedShaders[stage]->Program->nir;
2899
2900   nir_foreach_variable_with_modes_safe(var, shader, mode) {
2901      if (!var->data.is_xfb_only && var->data.location == -1) {
2902         var->data.location = 0;
2903         var->data.mode = nir_var_shader_temp;
2904         progress = true;
2905      }
2906   }
2907
2908   if (progress)
2909      fixup_vars_lowered_to_temp(shader, mode);
2910}
2911
2912static bool
2913link_varyings(struct gl_shader_program *prog, unsigned first,
2914              unsigned last, const struct gl_constants *consts,
2915              const struct gl_extensions *exts, gl_api api, void *mem_ctx)
2916{
2917   bool has_xfb_qualifiers = false;
2918   unsigned num_xfb_decls = 0;
2919   char **varying_names = NULL;
2920   struct xfb_decl *xfb_decls = NULL;
2921
2922   if (last > MESA_SHADER_FRAGMENT)
2923      return true;
2924
2925   /* From the ARB_enhanced_layouts spec:
2926    *
2927    *    "If the shader used to record output variables for transform feedback
2928    *    varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
2929    *    qualifiers, the values specified by TransformFeedbackVaryings are
2930    *    ignored, and the set of variables captured for transform feedback is
2931    *    instead derived from the specified layout qualifiers."
2932    */
2933   for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
2934      /* Find last stage before fragment shader */
2935      if (prog->_LinkedShaders[i]) {
2936         has_xfb_qualifiers =
2937            process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
2938                                          prog, &num_xfb_decls,
2939                                          &varying_names);
2940         break;
2941      }
2942   }
2943
2944   if (!has_xfb_qualifiers) {
2945      num_xfb_decls = prog->TransformFeedback.NumVarying;
2946      varying_names = prog->TransformFeedback.VaryingNames;
2947   }
2948
2949   if (num_xfb_decls != 0) {
2950      /* From GL_EXT_transform_feedback:
2951       *   A program will fail to link if:
2952       *
2953       *   * the <count> specified by TransformFeedbackVaryingsEXT is
2954       *     non-zero, but the program object has no vertex or geometry
2955       *     shader;
2956       */
2957      if (first >= MESA_SHADER_FRAGMENT) {
2958         linker_error(prog, "Transform feedback varyings specified, but "
2959                      "no vertex, tessellation, or geometry shader is "
2960                      "present.\n");
2961         return false;
2962      }
2963
2964      xfb_decls = rzalloc_array(mem_ctx, struct xfb_decl,
2965                                      num_xfb_decls);
2966      if (!parse_xfb_decls(consts, exts, prog, mem_ctx, num_xfb_decls,
2967                           varying_names, xfb_decls))
2968         return false;
2969   }
2970
2971   struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES];
2972   unsigned num_shaders = 0;
2973
2974   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
2975      if (prog->_LinkedShaders[i])
2976         linked_shader[num_shaders++] = prog->_LinkedShaders[i];
2977   }
2978
2979   struct varying_matches vm;
2980   if (last < MESA_SHADER_FRAGMENT &&
2981       (num_xfb_decls != 0 || prog->SeparateShader)) {
2982         struct gl_linked_shader *producer = prog->_LinkedShaders[last];
2983         if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
2984                                               producer, NULL, num_xfb_decls,
2985                                               xfb_decls, &vm))
2986            return false;
2987   }
2988
2989   if (last <= MESA_SHADER_FRAGMENT && !prog->SeparateShader) {
2990      remove_unused_shader_inputs_and_outputs(prog, first, nir_var_shader_in);
2991      remove_unused_shader_inputs_and_outputs(prog, last, nir_var_shader_out);
2992   }
2993
2994   if (prog->SeparateShader) {
2995      struct gl_linked_shader *consumer = linked_shader[0];
2996      if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, NULL,
2997                                            consumer, 0, NULL, &vm))
2998         return false;
2999   }
3000
3001   if (num_shaders == 1) {
3002      /* Linking shaders also optimizes them. Separate shaders, compute shaders
3003       * and shaders with a fixed-func VS or FS that don't need linking are
3004       * optimized here.
3005       */
3006      gl_nir_opts(linked_shader[0]->Program->nir);
3007   } else {
3008      /* Linking the stages in the opposite order (from fragment to vertex)
3009       * ensures that inter-shader outputs written to in an earlier stage
3010       * are eliminated if they are (transitively) not used in a later
3011       * stage.
3012       */
3013      for (int i = num_shaders - 2; i >= 0; i--) {
3014         unsigned stage_num_xfb_decls =
3015            linked_shader[i + 1]->Stage == MESA_SHADER_FRAGMENT ?
3016            num_xfb_decls : 0;
3017
3018         if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
3019                                               linked_shader[i],
3020                                               linked_shader[i + 1],
3021                                               stage_num_xfb_decls, xfb_decls,
3022                                               &vm))
3023            return false;
3024
3025         /* Now that validation is done its safe to remove unused varyings. As
3026          * we have both a producer and consumer its safe to remove unused
3027          * varyings even if the program is a SSO because the stages are being
3028          * linked together i.e. we have a multi-stage SSO.
3029          */
3030         link_shader_opts(&vm, linked_shader[i]->Program->nir,
3031                          linked_shader[i + 1]->Program->nir,
3032                          prog, mem_ctx);
3033
3034         remove_unused_shader_inputs_and_outputs(prog, linked_shader[i]->Stage,
3035                                                 nir_var_shader_out);
3036         remove_unused_shader_inputs_and_outputs(prog,
3037                                                 linked_shader[i + 1]->Stage,
3038                                                 nir_var_shader_in);
3039      }
3040   }
3041
3042   if (!prog->SeparateShader) {
3043      /* If not SSO remove unused varyings from the first/last stage */
3044      NIR_PASS_V(prog->_LinkedShaders[first]->Program->nir,
3045                 nir_remove_dead_variables, nir_var_shader_in, NULL);
3046      NIR_PASS_V(prog->_LinkedShaders[last]->Program->nir,
3047                 nir_remove_dead_variables, nir_var_shader_out, NULL);
3048   } else {
3049      /* Sort inputs / outputs into a canonical order.  This is necessary so
3050       * that inputs / outputs of separable shaders will be assigned
3051       * predictable locations regardless of the order in which declarations
3052       * appeared in the shader source.
3053       */
3054      if (first != MESA_SHADER_VERTEX) {
3055         canonicalize_shader_io(prog->_LinkedShaders[first]->Program->nir,
3056                                nir_var_shader_in);
3057      }
3058
3059      if (last != MESA_SHADER_FRAGMENT) {
3060         canonicalize_shader_io(prog->_LinkedShaders[last]->Program->nir,
3061                                nir_var_shader_out);
3062      }
3063   }
3064
3065   /* If there is no fragment shader we need to set transform feedback.
3066    *
3067    * For SSO we also need to assign output locations.  We assign them here
3068    * because we need to do it for both single stage programs and multi stage
3069    * programs.
3070    */
3071   if (last < MESA_SHADER_FRAGMENT &&
3072       (num_xfb_decls != 0 || prog->SeparateShader)) {
3073      const uint64_t reserved_out_slots =
3074         reserved_varying_slot(prog->_LinkedShaders[last], nir_var_shader_out);
3075      if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
3076                                          prog->_LinkedShaders[last], NULL,
3077                                          num_xfb_decls, xfb_decls,
3078                                          reserved_out_slots, &vm))
3079         return false;
3080   }
3081
3082   if (prog->SeparateShader) {
3083      struct gl_linked_shader *const sh = prog->_LinkedShaders[first];
3084
3085      const uint64_t reserved_slots =
3086         reserved_varying_slot(sh, nir_var_shader_in);
3087
3088      /* Assign input locations for SSO, output locations are already
3089       * assigned.
3090       */
3091      if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
3092                                          NULL /* producer */,
3093                                          sh /* consumer */,
3094                                          0 /* num_xfb_decls */,
3095                                          NULL /* xfb_decls */,
3096                                          reserved_slots, &vm))
3097         return false;
3098   }
3099
3100   if (num_shaders == 1) {
3101      gl_nir_opt_dead_builtin_varyings(consts, api, prog, NULL, linked_shader[0],
3102                                       0, NULL);
3103      gl_nir_opt_dead_builtin_varyings(consts, api, prog, linked_shader[0], NULL,
3104                                       num_xfb_decls, xfb_decls);
3105   } else {
3106      /* Linking the stages in the opposite order (from fragment to vertex)
3107       * ensures that inter-shader outputs written to in an earlier stage
3108       * are eliminated if they are (transitively) not used in a later
3109       * stage.
3110       */
3111      int next = last;
3112      for (int i = next - 1; i >= 0; i--) {
3113         if (prog->_LinkedShaders[i] == NULL && i != 0)
3114            continue;
3115
3116         struct gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
3117         struct gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
3118
3119         gl_nir_opt_dead_builtin_varyings(consts, api, prog, sh_i, sh_next,
3120                                          next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
3121                                          xfb_decls);
3122
3123         const uint64_t reserved_out_slots =
3124            reserved_varying_slot(sh_i, nir_var_shader_out);
3125         const uint64_t reserved_in_slots =
3126            reserved_varying_slot(sh_next, nir_var_shader_in);
3127
3128         if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, sh_i,
3129                   sh_next, next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
3130                   xfb_decls, reserved_out_slots | reserved_in_slots, &vm))
3131            return false;
3132
3133         /* This must be done after all dead varyings are eliminated. */
3134         if (sh_i != NULL) {
3135            unsigned slots_used = util_bitcount64(reserved_out_slots);
3136            if (!check_against_output_limit(consts, api, prog, sh_i, slots_used))
3137               return false;
3138         }
3139
3140         unsigned slots_used = util_bitcount64(reserved_in_slots);
3141         if (!check_against_input_limit(consts, api, prog, sh_next, slots_used))
3142            return false;
3143
3144         next = i;
3145      }
3146   }
3147
3148   if (!store_tfeedback_info(consts, prog, num_xfb_decls, xfb_decls,
3149                             has_xfb_qualifiers, mem_ctx))
3150      return false;
3151
3152   return true;
3153}
3154
3155bool
3156gl_nir_link_varyings(const struct gl_constants *consts,
3157                     const struct gl_extensions *exts,
3158                     gl_api api, struct gl_shader_program *prog)
3159{
3160   void *mem_ctx = ralloc_context(NULL);
3161
3162   unsigned first, last;
3163
3164   first = MESA_SHADER_STAGES;
3165   last = 0;
3166
3167   /* We need to initialise the program resource list because the varying
3168    * packing pass my start inserting varyings onto the list.
3169    */
3170   init_program_resource_list(prog);
3171
3172   /* Determine first and last stage. */
3173   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
3174      if (!prog->_LinkedShaders[i])
3175         continue;
3176      if (first == MESA_SHADER_STAGES)
3177         first = i;
3178      last = i;
3179   }
3180
3181   bool r = link_varyings(prog, first, last, consts, exts, api, mem_ctx);
3182   if (r) {
3183      for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
3184         if (!prog->_LinkedShaders[i])
3185            continue;
3186
3187         /* Check for transform feedback varyings specified via the API */
3188         prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings =
3189            prog->TransformFeedback.NumVarying > 0;
3190
3191         /* Check for transform feedback varyings specified in the Shader */
3192         if (prog->last_vert_prog) {
3193            prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings |=
3194               prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
3195         }
3196      }
3197
3198      /* Assign NIR XFB info to the last stage before the fragment shader */
3199      for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) {
3200         struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
3201         if (sh && stage != MESA_SHADER_TESS_CTRL) {
3202            sh->Program->nir->xfb_info =
3203               gl_to_nir_xfb_info(sh->Program->sh.LinkedTransformFeedback,
3204                                  sh->Program->nir);
3205            break;
3206         }
3207      }
3208   }
3209
3210   ralloc_free(mem_ctx);
3211   return r;
3212}
3213