1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_builder.h"
26#include "util/set.h"
27#include "util/hash_table.h"
28
29/* This file contains various little helpers for doing simple linking in
30 * NIR.  Eventually, we'll probably want a full-blown varying packing
31 * implementation in here.  Right now, it just deletes unused things.
32 */
33
34/**
35 * Returns the bits in the inputs_read, or outputs_written
36 * bitfield corresponding to this variable.
37 */
38static uint64_t
39get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40{
41   if (var->data.location < 0)
42      return 0;
43
44   unsigned location = var->data.patch ?
45      var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47   assert(var->data.mode == nir_var_shader_in ||
48          var->data.mode == nir_var_shader_out);
49   assert(var->data.location >= 0);
50   assert(location < 64);
51
52   const struct glsl_type *type = var->type;
53   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
54      assert(glsl_type_is_array(type));
55      type = glsl_get_array_element(type);
56   }
57
58   unsigned slots = glsl_count_attribute_slots(type, false);
59   return BITFIELD64_MASK(slots) << location;
60}
61
62static bool
63is_non_generic_patch_var(nir_variable *var)
64{
65   return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
66          var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
67          var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
68          var->data.location == VARYING_SLOT_BOUNDING_BOX1;
69}
70
71static uint8_t
72get_num_components(nir_variable *var)
73{
74   if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
75      return 4;
76
77   return glsl_get_vector_elements(glsl_without_array(var->type));
78}
79
80static void
81tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
82{
83   nir_foreach_function(function, shader) {
84      if (!function->impl)
85         continue;
86
87      nir_foreach_block(block, function->impl) {
88         nir_foreach_instr(instr, block) {
89            if (instr->type != nir_instr_type_intrinsic)
90               continue;
91
92            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
93            if (intrin->intrinsic != nir_intrinsic_load_deref)
94               continue;
95
96            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
97            if (!nir_deref_mode_is(deref, nir_var_shader_out))
98               continue;
99
100            nir_variable *var = nir_deref_instr_get_variable(deref);
101            for (unsigned i = 0; i < get_num_components(var); i++) {
102               if (var->data.patch) {
103                  if (is_non_generic_patch_var(var))
104                     continue;
105
106                  patches_read[var->data.location_frac + i] |=
107                     get_variable_io_mask(var, shader->info.stage);
108               } else {
109                  read[var->data.location_frac + i] |=
110                     get_variable_io_mask(var, shader->info.stage);
111               }
112            }
113         }
114      }
115   }
116}
117
118/**
119 * Helper for removing unused shader I/O variables, by demoting them to global
120 * variables (which may then by dead code eliminated).
121 *
122 * Example usage is:
123 *
124 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
125 *                                      read, patches_read) ||
126 *                                      progress;
127 *
128 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
129 * representing each .location_frac used.  Note that for vector variables,
130 * only the first channel (.location_frac) is examined for deciding if the
131 * variable is used!
132 */
133bool
134nir_remove_unused_io_vars(nir_shader *shader,
135                          nir_variable_mode mode,
136                          uint64_t *used_by_other_stage,
137                          uint64_t *used_by_other_stage_patches)
138{
139   bool progress = false;
140   uint64_t *used;
141
142   assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
143
144   nir_foreach_variable_with_modes_safe(var, shader, mode) {
145      if (var->data.patch)
146         used = used_by_other_stage_patches;
147      else
148         used = used_by_other_stage;
149
150      if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
151         if (shader->info.stage != MESA_SHADER_MESH || var->data.location != VARYING_SLOT_PRIMITIVE_ID)
152            continue;
153
154      if (var->data.always_active_io)
155         continue;
156
157      if (var->data.explicit_xfb_buffer)
158         continue;
159
160      uint64_t other_stage = used[var->data.location_frac];
161
162      if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
163         /* This one is invalid, make it a global variable instead */
164         if (shader->info.stage == MESA_SHADER_MESH &&
165               (shader->info.outputs_read & BITFIELD64_BIT(var->data.location)))
166            var->data.mode = nir_var_mem_shared;
167         else
168            var->data.mode = nir_var_shader_temp;
169         var->data.location = 0;
170
171         progress = true;
172      }
173   }
174
175   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
176   if (progress) {
177      nir_metadata_preserve(impl, nir_metadata_dominance |
178                            nir_metadata_block_index);
179      nir_fixup_deref_modes(shader);
180   } else {
181      nir_metadata_preserve(impl, nir_metadata_all);
182   }
183
184   return progress;
185}
186
187bool
188nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
189{
190   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
191   assert(consumer->info.stage != MESA_SHADER_VERTEX);
192
193   uint64_t read[4] = { 0 }, written[4] = { 0 };
194   uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
195
196   nir_foreach_shader_out_variable(var, producer) {
197      for (unsigned i = 0; i < get_num_components(var); i++) {
198         if (var->data.patch) {
199            if (is_non_generic_patch_var(var))
200               continue;
201
202            patches_written[var->data.location_frac + i] |=
203               get_variable_io_mask(var, producer->info.stage);
204         } else {
205            written[var->data.location_frac + i] |=
206               get_variable_io_mask(var, producer->info.stage);
207         }
208      }
209   }
210
211   nir_foreach_shader_in_variable(var, consumer) {
212      for (unsigned i = 0; i < get_num_components(var); i++) {
213         if (var->data.patch) {
214            if (is_non_generic_patch_var(var))
215               continue;
216
217            patches_read[var->data.location_frac + i] |=
218               get_variable_io_mask(var, consumer->info.stage);
219         } else {
220            read[var->data.location_frac + i] |=
221               get_variable_io_mask(var, consumer->info.stage);
222         }
223      }
224   }
225
226   /* Each TCS invocation can read data written by other TCS invocations,
227    * so even if the outputs are not used by the TES we must also make
228    * sure they are not read by the TCS before demoting them to globals.
229    */
230   if (producer->info.stage == MESA_SHADER_TESS_CTRL)
231      tcs_add_output_reads(producer, read, patches_read);
232
233   bool progress = false;
234   progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
235                                        patches_read);
236
237   progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
238                                        patches_written) || progress;
239
240   return progress;
241}
242
243static uint8_t
244get_interp_type(nir_variable *var, const struct glsl_type *type,
245                bool default_to_smooth_interp)
246{
247   if (var->data.per_primitive)
248      return INTERP_MODE_NONE;
249   if (glsl_type_is_integer(type))
250      return INTERP_MODE_FLAT;
251   else if (var->data.interpolation != INTERP_MODE_NONE)
252      return var->data.interpolation;
253   else if (default_to_smooth_interp)
254      return INTERP_MODE_SMOOTH;
255   else
256      return INTERP_MODE_NONE;
257}
258
259#define INTERPOLATE_LOC_SAMPLE 0
260#define INTERPOLATE_LOC_CENTROID 1
261#define INTERPOLATE_LOC_CENTER 2
262
263static uint8_t
264get_interp_loc(nir_variable *var)
265{
266   if (var->data.sample)
267      return INTERPOLATE_LOC_SAMPLE;
268   else if (var->data.centroid)
269      return INTERPOLATE_LOC_CENTROID;
270   else
271      return INTERPOLATE_LOC_CENTER;
272}
273
274static bool
275is_packing_supported_for_type(const struct glsl_type *type)
276{
277   /* We ignore complex types such as arrays, matrices, structs and bitsizes
278    * other then 32bit. All other vector types should have been split into
279    * scalar variables by the lower_io_to_scalar pass. The only exception
280    * should be OpenGL xfb varyings.
281    * TODO: add support for more complex types?
282    */
283   return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
284}
285
286struct assigned_comps
287{
288   uint8_t comps;
289   uint8_t interp_type;
290   uint8_t interp_loc;
291   bool is_32bit;
292   bool is_mediump;
293   bool is_per_primitive;
294};
295
296/* Packing arrays and dual slot varyings is difficult so to avoid complex
297 * algorithms this function just assigns them their existing location for now.
298 * TODO: allow better packing of complex types.
299 */
300static void
301get_unmoveable_components_masks(nir_shader *shader,
302                                nir_variable_mode mode,
303                                struct assigned_comps *comps,
304                                gl_shader_stage stage,
305                                bool default_to_smooth_interp)
306{
307   nir_foreach_variable_with_modes_safe(var, shader, mode) {
308      assert(var->data.location >= 0);
309
310      /* Only remap things that aren't built-ins. */
311      if (var->data.location >= VARYING_SLOT_VAR0 &&
312          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
313
314         const struct glsl_type *type = var->type;
315         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
316            assert(glsl_type_is_array(type));
317            type = glsl_get_array_element(type);
318         }
319
320         /* If we can pack this varying then don't mark the components as
321          * used.
322          */
323         if (is_packing_supported_for_type(type) &&
324             !var->data.always_active_io)
325            continue;
326
327         unsigned location = var->data.location - VARYING_SLOT_VAR0;
328
329         unsigned elements =
330            glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
331            glsl_get_vector_elements(glsl_without_array(type)) : 4;
332
333         bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
334         unsigned slots = glsl_count_attribute_slots(type, false);
335         unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
336         unsigned comps_slot2 = 0;
337         for (unsigned i = 0; i < slots; i++) {
338            if (dual_slot) {
339               if (i & 1) {
340                  comps[location + i].comps |= ((1 << comps_slot2) - 1);
341               } else {
342                  unsigned num_comps = 4 - var->data.location_frac;
343                  comps_slot2 = (elements * dmul) - num_comps;
344
345                  /* Assume ARB_enhanced_layouts packing rules for doubles */
346                  assert(var->data.location_frac == 0 ||
347                         var->data.location_frac == 2);
348                  assert(comps_slot2 <= 4);
349
350                  comps[location + i].comps |=
351                     ((1 << num_comps) - 1) << var->data.location_frac;
352               }
353            } else {
354               comps[location + i].comps |=
355                  ((1 << (elements * dmul)) - 1) << var->data.location_frac;
356            }
357
358            comps[location + i].interp_type =
359               get_interp_type(var, type, default_to_smooth_interp);
360            comps[location + i].interp_loc = get_interp_loc(var);
361            comps[location + i].is_32bit =
362               glsl_type_is_32bit(glsl_without_array(type));
363            comps[location + i].is_mediump =
364               var->data.precision == GLSL_PRECISION_MEDIUM ||
365               var->data.precision == GLSL_PRECISION_LOW;
366            comps[location + i].is_per_primitive = var->data.per_primitive;
367         }
368      }
369   }
370}
371
372struct varying_loc
373{
374   uint8_t component;
375   uint32_t location;
376};
377
378static void
379mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
380                    uint64_t slots_used_mask, unsigned num_slots)
381{
382   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
383
384   slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
385      BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
386}
387
388static void
389mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
390{
391   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
392
393   slots_used[var->data.patch ? 1 : 0] |=
394      BITFIELD64_BIT(var->data.location - loc_offset + offset);
395}
396
397static void
398remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
399                           struct varying_loc (*remap)[4],
400                           uint64_t *slots_used, uint64_t *out_slots_read,
401                           uint32_t *p_slots_used, uint32_t *p_out_slots_read)
402 {
403   const gl_shader_stage stage = shader->info.stage;
404   uint64_t out_slots_read_tmp[2] = {0};
405   uint64_t slots_used_tmp[2] = {0};
406
407   /* We don't touch builtins so just copy the bitmask */
408   slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
409
410   nir_foreach_variable_with_modes(var, shader, mode) {
411      assert(var->data.location >= 0);
412
413      /* Only remap things that aren't built-ins */
414      if (var->data.location >= VARYING_SLOT_VAR0 &&
415          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
416
417         const struct glsl_type *type = var->type;
418         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
419            assert(glsl_type_is_array(type));
420            type = glsl_get_array_element(type);
421         }
422
423         unsigned num_slots = glsl_count_attribute_slots(type, false);
424         bool used_across_stages = false;
425         bool outputs_read = false;
426
427         unsigned location = var->data.location - VARYING_SLOT_VAR0;
428         struct varying_loc *new_loc = &remap[location][var->data.location_frac];
429
430         unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
431         uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
432         uint64_t outs_used =
433            var->data.patch ? *p_out_slots_read : *out_slots_read;
434         uint64_t slots =
435            BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
436
437         if (slots & used)
438            used_across_stages = true;
439
440         if (slots & outs_used)
441            outputs_read = true;
442
443         if (new_loc->location) {
444            var->data.location = new_loc->location;
445            var->data.location_frac = new_loc->component;
446         }
447
448         if (var->data.always_active_io) {
449            /* We can't apply link time optimisations (specifically array
450             * splitting) to these so we need to copy the existing mask
451             * otherwise we will mess up the mask for things like partially
452             * marked arrays.
453             */
454            if (used_across_stages)
455               mark_all_used_slots(var, slots_used_tmp, used, num_slots);
456
457            if (outputs_read) {
458               mark_all_used_slots(var, out_slots_read_tmp, outs_used,
459                                   num_slots);
460            }
461         } else {
462            for (unsigned i = 0; i < num_slots; i++) {
463               if (used_across_stages)
464                  mark_used_slot(var, slots_used_tmp, i);
465
466               if (outputs_read)
467                  mark_used_slot(var, out_slots_read_tmp, i);
468            }
469         }
470      }
471   }
472
473   *slots_used = slots_used_tmp[0];
474   *out_slots_read = out_slots_read_tmp[0];
475   *p_slots_used = slots_used_tmp[1];
476   *p_out_slots_read = out_slots_read_tmp[1];
477}
478
479struct varying_component {
480   nir_variable *var;
481   uint8_t interp_type;
482   uint8_t interp_loc;
483   bool is_32bit;
484   bool is_patch;
485   bool is_per_primitive;
486   bool is_mediump;
487   bool is_intra_stage_only;
488   bool initialised;
489};
490
491static int
492cmp_varying_component(const void *comp1_v, const void *comp2_v)
493{
494   struct varying_component *comp1 = (struct varying_component *) comp1_v;
495   struct varying_component *comp2 = (struct varying_component *) comp2_v;
496
497   /* We want patches to be order at the end of the array */
498   if (comp1->is_patch != comp2->is_patch)
499      return comp1->is_patch ? 1 : -1;
500
501   /* Sort per-primitive outputs after per-vertex ones to allow
502    * better compaction when they are mixed in the shader's source.
503    */
504   if (comp1->is_per_primitive != comp2->is_per_primitive)
505      return comp1->is_per_primitive ? 1 : -1;
506
507   /* We want to try to group together TCS outputs that are only read by other
508    * TCS invocations and not consumed by the follow stage.
509    */
510   if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
511      return comp1->is_intra_stage_only ? 1 : -1;
512
513   /* Group mediump varyings together. */
514   if (comp1->is_mediump != comp2->is_mediump)
515      return comp1->is_mediump ? 1 : -1;
516
517   /* We can only pack varyings with matching interpolation types so group
518    * them together.
519    */
520   if (comp1->interp_type != comp2->interp_type)
521      return comp1->interp_type - comp2->interp_type;
522
523   /* Interpolation loc must match also. */
524   if (comp1->interp_loc != comp2->interp_loc)
525      return comp1->interp_loc - comp2->interp_loc;
526
527   /* If everything else matches just use the original location to sort */
528   const struct nir_variable_data *const data1 = &comp1->var->data;
529   const struct nir_variable_data *const data2 = &comp2->var->data;
530   if (data1->location != data2->location)
531      return data1->location - data2->location;
532   return (int)data1->location_frac - (int)data2->location_frac;
533}
534
535static void
536gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
537                              struct varying_component **varying_comp_info,
538                              unsigned *varying_comp_info_size,
539                              bool default_to_smooth_interp)
540{
541   unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
542   unsigned num_of_comps_to_pack = 0;
543
544   /* Count the number of varying that can be packed and create a mapping
545    * of those varyings to the array we will pass to qsort.
546    */
547   nir_foreach_shader_out_variable(var, producer) {
548
549      /* Only remap things that aren't builtins. */
550      if (var->data.location >= VARYING_SLOT_VAR0 &&
551          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
552
553         /* We can't repack xfb varyings. */
554         if (var->data.always_active_io)
555            continue;
556
557         const struct glsl_type *type = var->type;
558         if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
559            assert(glsl_type_is_array(type));
560            type = glsl_get_array_element(type);
561         }
562
563         if (!is_packing_supported_for_type(type))
564            continue;
565
566         unsigned loc = var->data.location - VARYING_SLOT_VAR0;
567         store_varying_info_idx[loc][var->data.location_frac] =
568            ++num_of_comps_to_pack;
569      }
570   }
571
572   *varying_comp_info_size = num_of_comps_to_pack;
573   *varying_comp_info = rzalloc_array(NULL, struct varying_component,
574                                      num_of_comps_to_pack);
575
576   nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
577
578   /* Walk over the shader and populate the varying component info array */
579   nir_foreach_block(block, impl) {
580      nir_foreach_instr(instr, block) {
581         if (instr->type != nir_instr_type_intrinsic)
582            continue;
583
584         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
585         if (intr->intrinsic != nir_intrinsic_load_deref &&
586             intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
587             intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
588             intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
589             intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
590            continue;
591
592         nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
593         if (!nir_deref_mode_is(deref, nir_var_shader_in))
594            continue;
595
596         /* We only remap things that aren't builtins. */
597         nir_variable *in_var = nir_deref_instr_get_variable(deref);
598         if (in_var->data.location < VARYING_SLOT_VAR0)
599            continue;
600
601         unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
602         if (location >= MAX_VARYINGS_INCL_PATCH)
603            continue;
604
605         unsigned var_info_idx =
606            store_varying_info_idx[location][in_var->data.location_frac];
607         if (!var_info_idx)
608            continue;
609
610         struct varying_component *vc_info =
611            &(*varying_comp_info)[var_info_idx-1];
612
613         if (!vc_info->initialised) {
614            const struct glsl_type *type = in_var->type;
615            if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
616                in_var->data.per_view) {
617               assert(glsl_type_is_array(type));
618               type = glsl_get_array_element(type);
619            }
620
621            vc_info->var = in_var;
622            vc_info->interp_type =
623               get_interp_type(in_var, type, default_to_smooth_interp);
624            vc_info->interp_loc = get_interp_loc(in_var);
625            vc_info->is_32bit = glsl_type_is_32bit(type);
626            vc_info->is_patch = in_var->data.patch;
627            vc_info->is_per_primitive = in_var->data.per_primitive;
628            vc_info->is_mediump = !producer->options->linker_ignore_precision &&
629               (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
630                in_var->data.precision == GLSL_PRECISION_LOW);
631            vc_info->is_intra_stage_only = false;
632            vc_info->initialised = true;
633         }
634      }
635   }
636
637   /* Walk over the shader and populate the varying component info array
638    * for varyings which are read by other TCS instances but are not consumed
639    * by the TES.
640    */
641   if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
642      impl = nir_shader_get_entrypoint(producer);
643
644      nir_foreach_block(block, impl) {
645         nir_foreach_instr(instr, block) {
646            if (instr->type != nir_instr_type_intrinsic)
647               continue;
648
649            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
650            if (intr->intrinsic != nir_intrinsic_load_deref)
651               continue;
652
653            nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
654            if (!nir_deref_mode_is(deref, nir_var_shader_out))
655               continue;
656
657            /* We only remap things that aren't builtins. */
658            nir_variable *out_var = nir_deref_instr_get_variable(deref);
659            if (out_var->data.location < VARYING_SLOT_VAR0)
660               continue;
661
662            unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
663            if (location >= MAX_VARYINGS_INCL_PATCH)
664               continue;
665
666            unsigned var_info_idx =
667               store_varying_info_idx[location][out_var->data.location_frac];
668            if (!var_info_idx) {
669               /* Something went wrong, the shader interfaces didn't match, so
670                * abandon packing. This can happen for example when the
671                * inputs are scalars but the outputs are struct members.
672                */
673               *varying_comp_info_size = 0;
674               break;
675            }
676
677            struct varying_component *vc_info =
678               &(*varying_comp_info)[var_info_idx-1];
679
680            if (!vc_info->initialised) {
681               const struct glsl_type *type = out_var->type;
682               if (nir_is_arrayed_io(out_var, producer->info.stage)) {
683                  assert(glsl_type_is_array(type));
684                  type = glsl_get_array_element(type);
685               }
686
687               vc_info->var = out_var;
688               vc_info->interp_type =
689                  get_interp_type(out_var, type, default_to_smooth_interp);
690               vc_info->interp_loc = get_interp_loc(out_var);
691               vc_info->is_32bit = glsl_type_is_32bit(type);
692               vc_info->is_patch = out_var->data.patch;
693               vc_info->is_per_primitive = out_var->data.per_primitive;
694               vc_info->is_mediump = !producer->options->linker_ignore_precision &&
695                  (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
696                   out_var->data.precision == GLSL_PRECISION_LOW);
697               vc_info->is_intra_stage_only = true;
698               vc_info->initialised = true;
699            }
700         }
701      }
702   }
703
704   for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
705      struct varying_component *vc_info = &(*varying_comp_info)[i];
706      if (!vc_info->initialised) {
707         /* Something went wrong, the shader interfaces didn't match, so
708          * abandon packing. This can happen for example when the outputs are
709          * scalars but the inputs are struct members.
710          */
711         *varying_comp_info_size = 0;
712         break;
713      }
714   }
715}
716
717static bool
718allow_pack_interp_type(nir_pack_varying_options options, int type)
719{
720   int sel;
721
722   switch (type) {
723   case INTERP_MODE_NONE:
724      sel = nir_pack_varying_interp_mode_none;
725      break;
726   case INTERP_MODE_SMOOTH:
727      sel = nir_pack_varying_interp_mode_smooth;
728      break;
729   case INTERP_MODE_FLAT:
730      sel = nir_pack_varying_interp_mode_flat;
731      break;
732   case INTERP_MODE_NOPERSPECTIVE:
733      sel = nir_pack_varying_interp_mode_noperspective;
734      break;
735   default:
736      return false;
737   }
738
739   return options & sel;
740}
741
742static bool
743allow_pack_interp_loc(nir_pack_varying_options options, int loc)
744{
745   int sel;
746
747   switch (loc) {
748   case INTERPOLATE_LOC_SAMPLE:
749      sel = nir_pack_varying_interp_loc_sample;
750      break;
751   case INTERPOLATE_LOC_CENTROID:
752      sel = nir_pack_varying_interp_loc_centroid;
753      break;
754   case INTERPOLATE_LOC_CENTER:
755      sel = nir_pack_varying_interp_loc_center;
756      break;
757   default:
758      return false;
759   }
760
761   return options & sel;
762}
763
764static void
765assign_remap_locations(struct varying_loc (*remap)[4],
766                       struct assigned_comps *assigned_comps,
767                       struct varying_component *info,
768                       unsigned *cursor, unsigned *comp,
769                       unsigned max_location,
770                       nir_pack_varying_options options)
771{
772   unsigned tmp_cursor = *cursor;
773   unsigned tmp_comp = *comp;
774
775   for (; tmp_cursor < max_location; tmp_cursor++) {
776
777      if (assigned_comps[tmp_cursor].comps) {
778         /* Don't pack per-primitive and per-vertex varyings together. */
779         if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) {
780            tmp_comp = 0;
781            continue;
782         }
783
784         /* We can only pack varyings with matching precision. */
785         if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
786            tmp_comp = 0;
787            continue;
788         }
789
790         /* We can only pack varyings with matching interpolation type
791          * if driver does not support it.
792          */
793         if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
794             (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
795              !allow_pack_interp_type(options, info->interp_type))) {
796            tmp_comp = 0;
797            continue;
798         }
799
800         /* We can only pack varyings with matching interpolation location
801          * if driver does not support it.
802          */
803         if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
804             (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) ||
805              !allow_pack_interp_loc(options, info->interp_loc))) {
806            tmp_comp = 0;
807            continue;
808         }
809
810         /* We can only pack varyings with matching types, and the current
811          * algorithm only supports packing 32-bit.
812          */
813         if (!assigned_comps[tmp_cursor].is_32bit) {
814            tmp_comp = 0;
815            continue;
816         }
817
818         while (tmp_comp < 4 &&
819                (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
820            tmp_comp++;
821         }
822      }
823
824      if (tmp_comp == 4) {
825         tmp_comp = 0;
826         continue;
827      }
828
829      unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
830
831      /* Once we have assigned a location mark it as used */
832      assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
833      assigned_comps[tmp_cursor].interp_type = info->interp_type;
834      assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
835      assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
836      assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
837      assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive;
838
839      /* Assign remap location */
840      remap[location][info->var->data.location_frac].component = tmp_comp++;
841      remap[location][info->var->data.location_frac].location =
842         tmp_cursor + VARYING_SLOT_VAR0;
843
844      break;
845   }
846
847   *cursor = tmp_cursor;
848   *comp = tmp_comp;
849}
850
851/* If there are empty components in the slot compact the remaining components
852 * as close to component 0 as possible. This will make it easier to fill the
853 * empty components with components from a different slot in a following pass.
854 */
855static void
856compact_components(nir_shader *producer, nir_shader *consumer,
857                   struct assigned_comps *assigned_comps,
858                   bool default_to_smooth_interp)
859{
860   struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
861   struct varying_component *varying_comp_info;
862   unsigned varying_comp_info_size;
863
864   /* Gather varying component info */
865   gather_varying_component_info(producer, consumer, &varying_comp_info,
866                                 &varying_comp_info_size,
867                                 default_to_smooth_interp);
868
869   /* Sort varying components. */
870   qsort(varying_comp_info, varying_comp_info_size,
871         sizeof(struct varying_component), cmp_varying_component);
872
873   nir_pack_varying_options options = consumer->options->pack_varying_options;
874
875   unsigned cursor = 0;
876   unsigned comp = 0;
877
878   /* Set the remap array based on the sorted components */
879   for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
880      struct varying_component *info = &varying_comp_info[i];
881
882      assert(info->is_patch || cursor < MAX_VARYING);
883      if (info->is_patch) {
884         /* The list should be sorted with all non-patch inputs first followed
885          * by patch inputs.  When we hit our first patch input, we need to
886          * reset the cursor to MAX_VARYING so we put them in the right slot.
887          */
888         if (cursor < MAX_VARYING) {
889            cursor = MAX_VARYING;
890            comp = 0;
891         }
892
893         assign_remap_locations(remap, assigned_comps, info,
894                                &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
895                                options);
896      } else {
897         assign_remap_locations(remap, assigned_comps, info,
898                                &cursor, &comp, MAX_VARYING,
899                                options);
900
901         /* Check if we failed to assign a remap location. This can happen if
902          * for example there are a bunch of unmovable components with
903          * mismatching interpolation types causing us to skip over locations
904          * that would have been useful for packing later components.
905          * The solution is to iterate over the locations again (this should
906          * happen very rarely in practice).
907          */
908         if (cursor == MAX_VARYING) {
909            cursor = 0;
910            comp = 0;
911            assign_remap_locations(remap, assigned_comps, info,
912                                   &cursor, &comp, MAX_VARYING,
913                                   options);
914         }
915      }
916   }
917
918   ralloc_free(varying_comp_info);
919
920   uint64_t zero = 0;
921   uint32_t zero32 = 0;
922   remap_slots_and_components(consumer, nir_var_shader_in, remap,
923                              &consumer->info.inputs_read, &zero,
924                              &consumer->info.patch_inputs_read, &zero32);
925   remap_slots_and_components(producer, nir_var_shader_out, remap,
926                              &producer->info.outputs_written,
927                              &producer->info.outputs_read,
928                              &producer->info.patch_outputs_written,
929                              &producer->info.patch_outputs_read);
930}
931
932/* We assume that this has been called more-or-less directly after
933 * remove_unused_varyings.  At this point, all of the varyings that we
934 * aren't going to be using have been completely removed and the
935 * inputs_read and outputs_written fields in nir_shader_info reflect
936 * this.  Therefore, the total set of valid slots is the OR of the two
937 * sets of varyings;  this accounts for varyings which one side may need
938 * to read/write even if the other doesn't.  This can happen if, for
939 * instance, an array is used indirectly from one side causing it to be
940 * unsplittable but directly from the other.
941 */
942void
943nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
944                     bool default_to_smooth_interp)
945{
946   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
947   assert(consumer->info.stage != MESA_SHADER_VERTEX);
948
949   struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
950
951   get_unmoveable_components_masks(producer, nir_var_shader_out,
952                                   assigned_comps,
953                                   producer->info.stage,
954                                   default_to_smooth_interp);
955   get_unmoveable_components_masks(consumer, nir_var_shader_in,
956                                   assigned_comps,
957                                   consumer->info.stage,
958                                   default_to_smooth_interp);
959
960   compact_components(producer, consumer, assigned_comps,
961                      default_to_smooth_interp);
962}
963
964/*
965 * Mark XFB varyings as always_active_io in the consumer so the linking opts
966 * don't touch them.
967 */
968void
969nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
970{
971   nir_variable *input_vars[MAX_VARYING][4] = { 0 };
972
973   nir_foreach_shader_in_variable(var, consumer) {
974      if (var->data.location >= VARYING_SLOT_VAR0 &&
975          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
976
977         unsigned location = var->data.location - VARYING_SLOT_VAR0;
978         input_vars[location][var->data.location_frac] = var;
979      }
980   }
981
982   nir_foreach_shader_out_variable(var, producer) {
983      if (var->data.location >= VARYING_SLOT_VAR0 &&
984          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
985
986         if (!var->data.always_active_io)
987            continue;
988
989         unsigned location = var->data.location - VARYING_SLOT_VAR0;
990         if (input_vars[location][var->data.location_frac]) {
991            input_vars[location][var->data.location_frac]->data.always_active_io = true;
992         }
993      }
994   }
995}
996
997static bool
998does_varying_match(nir_variable *out_var, nir_variable *in_var)
999{
1000   return in_var->data.location == out_var->data.location &&
1001          in_var->data.location_frac == out_var->data.location_frac;
1002}
1003
1004static nir_variable *
1005get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
1006{
1007   nir_foreach_shader_in_variable(var, consumer) {
1008      if (does_varying_match(out_var, var))
1009         return var;
1010   }
1011
1012   return NULL;
1013}
1014
1015static bool
1016can_replace_varying(nir_variable *out_var)
1017{
1018   /* Skip types that require more complex handling.
1019    * TODO: add support for these types.
1020    */
1021   if (glsl_type_is_array(out_var->type) ||
1022       glsl_type_is_dual_slot(out_var->type) ||
1023       glsl_type_is_matrix(out_var->type) ||
1024       glsl_type_is_struct_or_ifc(out_var->type))
1025      return false;
1026
1027   /* Limit this pass to scalars for now to keep things simple. Most varyings
1028    * should have been lowered to scalars at this point anyway.
1029    */
1030   if (!glsl_type_is_scalar(out_var->type))
1031      return false;
1032
1033   if (out_var->data.location < VARYING_SLOT_VAR0 ||
1034       out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1035      return false;
1036
1037   return true;
1038}
1039
1040static bool
1041replace_varying_input_by_constant_load(nir_shader *shader,
1042                                       nir_intrinsic_instr *store_intr)
1043{
1044   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1045
1046   nir_builder b;
1047   nir_builder_init(&b, impl);
1048
1049   nir_variable *out_var =
1050      nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1051
1052   bool progress = false;
1053   nir_foreach_block(block, impl) {
1054      nir_foreach_instr(instr, block) {
1055         if (instr->type != nir_instr_type_intrinsic)
1056            continue;
1057
1058         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1059         if (intr->intrinsic != nir_intrinsic_load_deref)
1060            continue;
1061
1062         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1063         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1064            continue;
1065
1066         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1067
1068         if (!does_varying_match(out_var, in_var))
1069            continue;
1070
1071         b.cursor = nir_before_instr(instr);
1072
1073         nir_load_const_instr *out_const =
1074            nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1075
1076         /* Add new const to replace the input */
1077         nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
1078                                             intr->dest.ssa.bit_size,
1079                                             out_const->value);
1080
1081         nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
1082
1083         progress = true;
1084      }
1085   }
1086
1087   return progress;
1088}
1089
1090static bool
1091replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1092                         nir_intrinsic_instr *dup_store_intr)
1093{
1094   assert(input_var);
1095
1096   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1097
1098   nir_builder b;
1099   nir_builder_init(&b, impl);
1100
1101   nir_variable *dup_out_var =
1102      nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
1103
1104   bool progress = false;
1105   nir_foreach_block(block, impl) {
1106      nir_foreach_instr(instr, block) {
1107         if (instr->type != nir_instr_type_intrinsic)
1108            continue;
1109
1110         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1111         if (intr->intrinsic != nir_intrinsic_load_deref)
1112            continue;
1113
1114         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1115         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1116            continue;
1117
1118         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1119
1120         if (!does_varying_match(dup_out_var, in_var) ||
1121             in_var->data.interpolation != input_var->data.interpolation ||
1122             get_interp_loc(in_var) != get_interp_loc(input_var))
1123            continue;
1124
1125         b.cursor = nir_before_instr(instr);
1126
1127         nir_ssa_def *load = nir_load_var(&b, input_var);
1128         nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1129
1130         progress = true;
1131      }
1132   }
1133
1134   return progress;
1135}
1136
1137static bool
1138is_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s)
1139{
1140   /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1141   assert(def->num_components == 1);
1142
1143   /* Uniform load may hide behind some move instruction for converting
1144    * vector to scalar:
1145    *
1146    *     vec1 32 ssa_1 = deref_var &color (uniform vec3)
1147    *     vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1148    *     vec1 32 ssa_3 = mov ssa_2.x
1149    *     vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1150    *     intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1151    */
1152   *s = nir_ssa_scalar_resolved(def, 0);
1153
1154   nir_ssa_def *ssa = s->def;
1155   if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1156      return false;
1157
1158   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1159   if (intr->intrinsic != nir_intrinsic_load_deref)
1160      return false;
1161
1162   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1163   /* TODO: support nir_var_mem_ubo. */
1164   if (!nir_deref_mode_is(deref, nir_var_uniform))
1165      return false;
1166
1167   /* Does not support indirect uniform load. */
1168   return !nir_deref_instr_has_indirect(deref);
1169}
1170
1171static nir_variable *
1172get_uniform_var_in_consumer(nir_shader *consumer,
1173                            nir_variable *var_in_producer)
1174{
1175   /* Find if uniform already exists in consumer. */
1176   nir_variable *new_var = NULL;
1177   nir_foreach_uniform_variable(v, consumer) {
1178      if (!strcmp(var_in_producer->name, v->name)) {
1179         new_var = v;
1180         break;
1181      }
1182   }
1183
1184   /* Create a variable if not exist. */
1185   if (!new_var) {
1186      new_var = nir_variable_clone(var_in_producer, consumer);
1187      nir_shader_add_variable(consumer, new_var);
1188   }
1189
1190   return new_var;
1191}
1192
1193static nir_deref_instr *
1194clone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref)
1195{
1196   if (deref->deref_type == nir_deref_type_var)
1197       return nir_build_deref_var(b, var);
1198
1199   nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1200   nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref);
1201
1202   /* Build array and struct deref instruction.
1203    * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1204    */
1205   switch (deref->deref_type) {
1206   case nir_deref_type_array: {
1207      nir_load_const_instr *index =
1208         nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1209      return nir_build_deref_array_imm(b, parent, index->value->i64);
1210   }
1211   case nir_deref_type_ptr_as_array: {
1212      nir_load_const_instr *index =
1213         nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1214      nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64,
1215                                        parent->dest.ssa.bit_size);
1216      return nir_build_deref_ptr_as_array(b, parent, ssa);
1217   }
1218   case nir_deref_type_struct:
1219      return nir_build_deref_struct(b, parent, deref->strct.index);
1220   default:
1221      unreachable("invalid type");
1222      return NULL;
1223   }
1224}
1225
1226static bool
1227replace_varying_input_by_uniform_load(nir_shader *shader,
1228                                      nir_intrinsic_instr *store_intr,
1229                                      nir_ssa_scalar *scalar)
1230{
1231   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1232
1233   nir_builder b;
1234   nir_builder_init(&b, impl);
1235
1236   nir_variable *out_var =
1237      nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1238
1239   nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1240   nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1241   nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1242   uni_var = get_uniform_var_in_consumer(shader, uni_var);
1243
1244   bool progress = false;
1245   nir_foreach_block(block, impl) {
1246      nir_foreach_instr(instr, block) {
1247         if (instr->type != nir_instr_type_intrinsic)
1248            continue;
1249
1250         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1251         if (intr->intrinsic != nir_intrinsic_load_deref)
1252            continue;
1253
1254         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1255         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1256            continue;
1257
1258         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1259
1260         if (!does_varying_match(out_var, in_var))
1261            continue;
1262
1263         b.cursor = nir_before_instr(instr);
1264
1265         /* Clone instructions start from deref load to variable deref. */
1266         nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref);
1267         nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref);
1268
1269         /* Add a vector to scalar move if uniform is a vector. */
1270         if (uni_def->num_components > 1) {
1271            nir_alu_src src = {0};
1272            src.src = nir_src_for_ssa(uni_def);
1273            src.swizzle[0] = scalar->comp;
1274            uni_def = nir_mov_alu(&b, src, 1);
1275         }
1276
1277         /* Replace load input with load uniform. */
1278         nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def);
1279
1280         progress = true;
1281      }
1282   }
1283
1284   return progress;
1285}
1286
1287/* The GLSL ES 3.20 spec says:
1288 *
1289 * "The precision of a vertex output does not need to match the precision of
1290 * the corresponding fragment input. The minimum precision at which vertex
1291 * outputs are interpolated is the minimum of the vertex output precision and
1292 * the fragment input precision, with the exception that for highp,
1293 * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1294 * Output Matching by Name in Linked Programs")
1295 *
1296 * To implement this, when linking shaders we will take the minimum precision
1297 * qualifier (allowing drivers to interpolate at lower precision). For
1298 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1299 * requires we use the *last* specified precision if there is a conflict.
1300 *
1301 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1302 * NONE, we'll return the other precision, since there is no conflict.
1303 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1304 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1305 * "backwards". For non-fragment stages, we'll pick the latter precision to
1306 * comply with the spec. (Note that the order matters.)
1307 *
1308 * For streamout, "Variables declared with lowp or mediump precision are
1309 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1310 * of OpenGL ES 3.2 specification). So drivers should promote them
1311 * the transform feedback memory store, but not the output store.
1312 */
1313
1314static unsigned
1315nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1316{
1317   if (producer == GLSL_PRECISION_NONE)
1318      return consumer;
1319   else if (consumer == GLSL_PRECISION_NONE)
1320      return producer;
1321   else
1322      return fs ? MAX2(producer, consumer) : consumer;
1323}
1324
1325void
1326nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1327{
1328   bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1329
1330   nir_foreach_shader_out_variable(producer_var, producer) {
1331      /* Skip if the slot is not assigned */
1332      if (producer_var->data.location < 0)
1333         continue;
1334
1335      nir_variable *consumer_var = nir_find_variable_with_location(consumer,
1336            nir_var_shader_in, producer_var->data.location);
1337
1338      /* Skip if the variable will be eliminated */
1339      if (!consumer_var)
1340         continue;
1341
1342      /* Now we have a pair of variables. Let's pick the smaller precision. */
1343      unsigned precision_1 = producer_var->data.precision;
1344      unsigned precision_2 = consumer_var->data.precision;
1345      unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1346
1347      /* Propagate the new precision */
1348      producer_var->data.precision = consumer_var->data.precision = minimum;
1349   }
1350}
1351
1352bool
1353nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1354{
1355   /* TODO: Add support for more shader stage combinations */
1356   if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1357       (producer->info.stage != MESA_SHADER_VERTEX &&
1358        producer->info.stage != MESA_SHADER_TESS_EVAL))
1359      return false;
1360
1361   bool progress = false;
1362
1363   nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1364
1365   struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1366
1367   /* If we find a store in the last block of the producer we can be sure this
1368    * is the only possible value for this output.
1369    */
1370   nir_block *last_block = nir_impl_last_block(impl);
1371   nir_foreach_instr_reverse(instr, last_block) {
1372      if (instr->type != nir_instr_type_intrinsic)
1373         continue;
1374
1375      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1376
1377      if (intr->intrinsic != nir_intrinsic_store_deref)
1378         continue;
1379
1380      nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1381      if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1382         continue;
1383
1384      nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1385      if (!can_replace_varying(out_var))
1386         continue;
1387
1388      nir_ssa_def *ssa = intr->src[1].ssa;
1389      if (ssa->parent_instr->type == nir_instr_type_load_const) {
1390         progress |= replace_varying_input_by_constant_load(consumer, intr);
1391         continue;
1392      }
1393
1394      nir_ssa_scalar uni_scalar;
1395      if (is_direct_uniform_load(ssa, &uni_scalar)) {
1396         if (consumer->options->lower_varying_from_uniform) {
1397            progress |= replace_varying_input_by_uniform_load(consumer, intr,
1398                                                              &uni_scalar);
1399            continue;
1400         } else {
1401            nir_variable *in_var = get_matching_input_var(consumer, out_var);
1402            /* The varying is loaded from same uniform, so no need to do any
1403             * interpolation. Mark it as flat explicitly.
1404             */
1405            if (!consumer->options->no_integers &&
1406                in_var && in_var->data.interpolation <= INTERP_MODE_NOPERSPECTIVE) {
1407               in_var->data.interpolation = INTERP_MODE_FLAT;
1408               out_var->data.interpolation = INTERP_MODE_FLAT;
1409            }
1410         }
1411      }
1412
1413      struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa);
1414      if (entry) {
1415         progress |= replace_duplicate_input(consumer,
1416                                             (nir_variable *) entry->data,
1417                                             intr);
1418      } else {
1419         nir_variable *in_var = get_matching_input_var(consumer, out_var);
1420         if (in_var) {
1421            _mesa_hash_table_insert(varying_values, ssa, in_var);
1422         }
1423      }
1424   }
1425
1426   _mesa_hash_table_destroy(varying_values, NULL);
1427
1428   return progress;
1429}
1430
1431/* TODO any better helper somewhere to sort a list? */
1432
1433static void
1434insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1435{
1436   nir_foreach_variable_in_list(var, var_list) {
1437      /* Use the `per_primitive` bool to sort per-primitive variables
1438       * to the end of the list, so they get the last driver locations
1439       * by nir_assign_io_var_locations.
1440       *
1441       * This is done because AMD HW requires that per-primitive outputs
1442       * are the last params.
1443       * In the future we can add an option for this, if needed by other HW.
1444       */
1445      if (new_var->data.per_primitive < var->data.per_primitive ||
1446          (new_var->data.per_primitive == var->data.per_primitive &&
1447           (var->data.location > new_var->data.location ||
1448            (var->data.location == new_var->data.location &&
1449             var->data.location_frac > new_var->data.location_frac)))) {
1450         exec_node_insert_node_before(&var->node, &new_var->node);
1451         return;
1452      }
1453   }
1454   exec_list_push_tail(var_list, &new_var->node);
1455}
1456
1457static void
1458sort_varyings(nir_shader *shader, nir_variable_mode mode,
1459              struct exec_list *sorted_list)
1460{
1461   exec_list_make_empty(sorted_list);
1462   nir_foreach_variable_with_modes_safe(var, shader, mode) {
1463      exec_node_remove(&var->node);
1464      insert_sorted(sorted_list, var);
1465   }
1466}
1467
1468void
1469nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1470                            unsigned *size, gl_shader_stage stage)
1471{
1472   unsigned location = 0;
1473   unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1474   uint64_t processed_locs[2] = {0};
1475
1476   struct exec_list io_vars;
1477   sort_varyings(shader, mode, &io_vars);
1478
1479   int ASSERTED last_loc = 0;
1480   bool ASSERTED last_per_prim = false;
1481   bool last_partial = false;
1482   nir_foreach_variable_in_list(var, &io_vars) {
1483      const struct glsl_type *type = var->type;
1484      if (nir_is_arrayed_io(var, stage)) {
1485         assert(glsl_type_is_array(type));
1486         type = glsl_get_array_element(type);
1487      }
1488
1489      int base;
1490      if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1491         base = VERT_ATTRIB_GENERIC0;
1492      else if (var->data.mode == nir_var_shader_out &&
1493               stage == MESA_SHADER_FRAGMENT)
1494         base = FRAG_RESULT_DATA0;
1495      else
1496         base = VARYING_SLOT_VAR0;
1497
1498      unsigned var_size, driver_size;
1499      if (var->data.compact) {
1500         /* If we are inside a partial compact,
1501          * don't allow another compact to be in this slot
1502          * if it starts at component 0.
1503          */
1504         if (last_partial && var->data.location_frac == 0) {
1505            location++;
1506         }
1507
1508         /* compact variables must be arrays of scalars */
1509         assert(!var->data.per_view);
1510         assert(glsl_type_is_array(type));
1511         assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1512         unsigned start = 4 * location + var->data.location_frac;
1513         unsigned end = start + glsl_get_length(type);
1514         var_size = driver_size = end / 4 - location;
1515         last_partial = end % 4 != 0;
1516      } else {
1517         /* Compact variables bypass the normal varying compacting pass,
1518          * which means they cannot be in the same vec4 slot as a normal
1519          * variable. If part of the current slot is taken up by a compact
1520          * variable, we need to go to the next one.
1521          */
1522         if (last_partial) {
1523            location++;
1524            last_partial = false;
1525         }
1526
1527         /* per-view variables have an extra array dimension, which is ignored
1528          * when counting user-facing slots (var->data.location), but *not*
1529          * with driver slots (var->data.driver_location). That is, each user
1530          * slot maps to multiple driver slots.
1531          */
1532         driver_size = glsl_count_attribute_slots(type, false);
1533         if (var->data.per_view) {
1534            assert(glsl_type_is_array(type));
1535            var_size =
1536               glsl_count_attribute_slots(glsl_get_array_element(type), false);
1537         } else {
1538            var_size = driver_size;
1539         }
1540      }
1541
1542      /* Builtins don't allow component packing so we only need to worry about
1543       * user defined varyings sharing the same location.
1544       */
1545      bool processed = false;
1546      if (var->data.location >= base) {
1547         unsigned glsl_location = var->data.location - base;
1548
1549         for (unsigned i = 0; i < var_size; i++) {
1550            if (processed_locs[var->data.index] &
1551                ((uint64_t)1 << (glsl_location + i)))
1552               processed = true;
1553            else
1554               processed_locs[var->data.index] |=
1555                  ((uint64_t)1 << (glsl_location + i));
1556         }
1557      }
1558
1559      /* Because component packing allows varyings to share the same location
1560       * we may have already have processed this location.
1561       */
1562      if (processed) {
1563         /* TODO handle overlapping per-view variables */
1564         assert(!var->data.per_view);
1565         unsigned driver_location = assigned_locations[var->data.location];
1566         var->data.driver_location = driver_location;
1567
1568         /* An array may be packed such that is crosses multiple other arrays
1569          * or variables, we need to make sure we have allocated the elements
1570          * consecutively if the previously proccessed var was shorter than
1571          * the current array we are processing.
1572          *
1573          * NOTE: The code below assumes the var list is ordered in ascending
1574          * location order, but per-vertex/per-primitive outputs may be
1575          * grouped separately.
1576          */
1577         assert(last_loc <= var->data.location ||
1578                last_per_prim != var->data.per_primitive);
1579         last_loc = var->data.location;
1580         last_per_prim = var->data.per_primitive;
1581         unsigned last_slot_location = driver_location + var_size;
1582         if (last_slot_location > location) {
1583            unsigned num_unallocated_slots = last_slot_location - location;
1584            unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1585            for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1586               assigned_locations[var->data.location + i] = location;
1587               location++;
1588            }
1589         }
1590         continue;
1591      }
1592
1593      for (unsigned i = 0; i < var_size; i++) {
1594         assigned_locations[var->data.location + i] = location + i;
1595      }
1596
1597      var->data.driver_location = location;
1598      location += driver_size;
1599   }
1600
1601   if (last_partial)
1602      location++;
1603
1604   exec_list_append(&shader->variables, &io_vars);
1605   *size = location;
1606}
1607
1608static uint64_t
1609get_linked_variable_location(unsigned location, bool patch)
1610{
1611   if (!patch)
1612      return location;
1613
1614   /* Reserve locations 0...3 for special patch variables
1615    * like tess factors and bounding boxes, and the generic patch
1616    * variables will come after them.
1617    */
1618   if (location >= VARYING_SLOT_PATCH0)
1619      return location - VARYING_SLOT_PATCH0 + 4;
1620   else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1621            location <= VARYING_SLOT_BOUNDING_BOX1)
1622      return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1623   else
1624      unreachable("Unsupported variable in get_linked_variable_location.");
1625}
1626
1627static uint64_t
1628get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1629{
1630   const struct glsl_type *type = variable->type;
1631
1632   if (nir_is_arrayed_io(variable, stage)) {
1633      assert(glsl_type_is_array(type));
1634      type = glsl_get_array_element(type);
1635   }
1636
1637   unsigned slots = glsl_count_attribute_slots(type, false);
1638   if (variable->data.compact) {
1639      unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1640      slots = DIV_ROUND_UP(component_count, 4);
1641   }
1642
1643   uint64_t mask = u_bit_consecutive64(0, slots);
1644   return mask;
1645}
1646
1647nir_linked_io_var_info
1648nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1649{
1650   assert(producer);
1651   assert(consumer);
1652
1653   uint64_t producer_output_mask = 0;
1654   uint64_t producer_patch_output_mask = 0;
1655
1656   nir_foreach_shader_out_variable(variable, producer) {
1657      uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1658      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1659
1660      if (variable->data.patch)
1661         producer_patch_output_mask |= mask << loc;
1662      else
1663         producer_output_mask |= mask << loc;
1664   }
1665
1666   uint64_t consumer_input_mask = 0;
1667   uint64_t consumer_patch_input_mask = 0;
1668
1669   nir_foreach_shader_in_variable(variable, consumer) {
1670      uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1671      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1672
1673      if (variable->data.patch)
1674         consumer_patch_input_mask |= mask << loc;
1675      else
1676         consumer_input_mask |= mask << loc;
1677   }
1678
1679   uint64_t io_mask = producer_output_mask | consumer_input_mask;
1680   uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1681
1682   nir_foreach_shader_out_variable(variable, producer) {
1683      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1684
1685      if (variable->data.patch)
1686         variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1687      else
1688         variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1689   }
1690
1691   nir_foreach_shader_in_variable(variable, consumer) {
1692      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1693
1694      if (variable->data.patch)
1695         variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1696      else
1697         variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1698   }
1699
1700   nir_linked_io_var_info result = {
1701      .num_linked_io_vars = util_bitcount64(io_mask),
1702      .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1703   };
1704
1705   return result;
1706}
1707