1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2021 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci * All Rights Reserved.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "si_pipe.h"
26bf215546Sopenharmony_ci#include "util/mesa-sha1.h"
27bf215546Sopenharmony_ci#include "util/u_prim.h"
28bf215546Sopenharmony_ci#include "sid.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cistruct si_shader_profile {
32bf215546Sopenharmony_ci   uint32_t sha1[SHA1_DIGEST_LENGTH32];
33bf215546Sopenharmony_ci   uint32_t options;
34bf215546Sopenharmony_ci};
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_cistatic struct si_shader_profile profiles[] =
37bf215546Sopenharmony_ci{
38bf215546Sopenharmony_ci   {
39bf215546Sopenharmony_ci      /* Plot3D */
40bf215546Sopenharmony_ci      {0x485320cd, 0x87a9ba05, 0x24a60e4f, 0x25aa19f7, 0xf5287451},
41bf215546Sopenharmony_ci      SI_PROFILE_VS_NO_BINNING,
42bf215546Sopenharmony_ci   },
43bf215546Sopenharmony_ci   {
44bf215546Sopenharmony_ci      /* Viewperf/Energy isn't affected by the discard bug. */
45bf215546Sopenharmony_ci      {0x17118671, 0xd0102e0c, 0x947f3592, 0xb2057e7b, 0x4da5d9b0},
46bf215546Sopenharmony_ci      SI_PROFILE_IGNORE_LLVM13_DISCARD_BUG,
47bf215546Sopenharmony_ci   },
48bf215546Sopenharmony_ci   {
49bf215546Sopenharmony_ci      /* Viewperf/Medical */
50bf215546Sopenharmony_ci      {0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0},
51bf215546Sopenharmony_ci      SI_PROFILE_PS_NO_BINNING,
52bf215546Sopenharmony_ci   },
53bf215546Sopenharmony_ci   {
54bf215546Sopenharmony_ci      /* Viewperf/Medical, a shader with a divergent loop doesn't benefit from Wave32,
55bf215546Sopenharmony_ci       * probably due to interpolation performance.
56bf215546Sopenharmony_ci       */
57bf215546Sopenharmony_ci      {0x29f0f4a0, 0x0672258d, 0x47ccdcfd, 0x31e67dcc, 0xdcb1fda8},
58bf215546Sopenharmony_ci      SI_PROFILE_WAVE64,
59bf215546Sopenharmony_ci   },
60bf215546Sopenharmony_ci   {
61bf215546Sopenharmony_ci      /* Viewperf/Creo */
62bf215546Sopenharmony_ci      {0x1f288a73, 0xba46cce5, 0xbf68e6c6, 0x58543651, 0xca3c8efd},
63bf215546Sopenharmony_ci      SI_PROFILE_CLAMP_DIV_BY_ZERO,
64bf215546Sopenharmony_ci   },
65bf215546Sopenharmony_ci};
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_cistatic unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
68bf215546Sopenharmony_ci{
69bf215546Sopenharmony_ci   if (intrin->intrinsic != nir_intrinsic_store_output)
70bf215546Sopenharmony_ci      return 0;
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin);
73bf215546Sopenharmony_ci   unsigned location = nir_intrinsic_io_semantics(intrin).location;
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
76bf215546Sopenharmony_ci      return writemask << 4;
77bf215546Sopenharmony_ci   else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
78bf215546Sopenharmony_ci      return writemask;
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   return 0;
81bf215546Sopenharmony_ci}
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_cistatic void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
84bf215546Sopenharmony_ci                           unsigned *cond_block_tf_writemask,
85bf215546Sopenharmony_ci                           bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
86bf215546Sopenharmony_ci{
87bf215546Sopenharmony_ci   switch (cf_node->type) {
88bf215546Sopenharmony_ci   case nir_cf_node_block: {
89bf215546Sopenharmony_ci      nir_block *block = nir_cf_node_as_block(cf_node);
90bf215546Sopenharmony_ci      nir_foreach_instr (instr, block) {
91bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
92bf215546Sopenharmony_ci            continue;
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
95bf215546Sopenharmony_ci         if (intrin->intrinsic == nir_intrinsic_control_barrier) {
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci            /* If we find a barrier in nested control flow put this in the
98bf215546Sopenharmony_ci             * too hard basket. In GLSL this is not possible but it is in
99bf215546Sopenharmony_ci             * SPIR-V.
100bf215546Sopenharmony_ci             */
101bf215546Sopenharmony_ci            if (is_nested_cf) {
102bf215546Sopenharmony_ci               *tessfactors_are_def_in_all_invocs = false;
103bf215546Sopenharmony_ci               return;
104bf215546Sopenharmony_ci            }
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci            /* The following case must be prevented:
107bf215546Sopenharmony_ci             *    gl_TessLevelInner = ...;
108bf215546Sopenharmony_ci             *    barrier();
109bf215546Sopenharmony_ci             *    if (gl_InvocationID == 1)
110bf215546Sopenharmony_ci             *       gl_TessLevelInner = ...;
111bf215546Sopenharmony_ci             *
112bf215546Sopenharmony_ci             * If you consider disjoint code segments separated by barriers, each
113bf215546Sopenharmony_ci             * such segment that writes tess factor channels should write the same
114bf215546Sopenharmony_ci             * channels in all codepaths within that segment.
115bf215546Sopenharmony_ci             */
116bf215546Sopenharmony_ci            if (*upper_block_tf_writemask || *cond_block_tf_writemask) {
117bf215546Sopenharmony_ci               /* Accumulate the result: */
118bf215546Sopenharmony_ci               *tessfactors_are_def_in_all_invocs &=
119bf215546Sopenharmony_ci                  !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci               /* Analyze the next code segment from scratch. */
122bf215546Sopenharmony_ci               *upper_block_tf_writemask = 0;
123bf215546Sopenharmony_ci               *cond_block_tf_writemask = 0;
124bf215546Sopenharmony_ci            }
125bf215546Sopenharmony_ci         } else
126bf215546Sopenharmony_ci            *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
127bf215546Sopenharmony_ci      }
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci      break;
130bf215546Sopenharmony_ci   }
131bf215546Sopenharmony_ci   case nir_cf_node_if: {
132bf215546Sopenharmony_ci      unsigned then_tessfactor_writemask = 0;
133bf215546Sopenharmony_ci      unsigned else_tessfactor_writemask = 0;
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
136bf215546Sopenharmony_ci      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
137bf215546Sopenharmony_ci      {
138bf215546Sopenharmony_ci         scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask,
139bf215546Sopenharmony_ci                        tessfactors_are_def_in_all_invocs, true);
140bf215546Sopenharmony_ci      }
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
143bf215546Sopenharmony_ci      {
144bf215546Sopenharmony_ci         scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask,
145bf215546Sopenharmony_ci                        tessfactors_are_def_in_all_invocs, true);
146bf215546Sopenharmony_ci      }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci      if (then_tessfactor_writemask || else_tessfactor_writemask) {
149bf215546Sopenharmony_ci         /* If both statements write the same tess factor channels,
150bf215546Sopenharmony_ci          * we can say that the upper block writes them too.
151bf215546Sopenharmony_ci          */
152bf215546Sopenharmony_ci         *upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask;
153bf215546Sopenharmony_ci         *cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask;
154bf215546Sopenharmony_ci      }
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci      break;
157bf215546Sopenharmony_ci   }
158bf215546Sopenharmony_ci   case nir_cf_node_loop: {
159bf215546Sopenharmony_ci      nir_loop *loop = nir_cf_node_as_loop(cf_node);
160bf215546Sopenharmony_ci      foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
161bf215546Sopenharmony_ci      {
162bf215546Sopenharmony_ci         scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask,
163bf215546Sopenharmony_ci                        tessfactors_are_def_in_all_invocs, true);
164bf215546Sopenharmony_ci      }
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci      break;
167bf215546Sopenharmony_ci   }
168bf215546Sopenharmony_ci   default:
169bf215546Sopenharmony_ci      unreachable("unknown cf node type");
170bf215546Sopenharmony_ci   }
171bf215546Sopenharmony_ci}
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_cistatic bool are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
174bf215546Sopenharmony_ci{
175bf215546Sopenharmony_ci   assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci   /* The pass works as follows:
178bf215546Sopenharmony_ci    * If all codepaths write tess factors, we can say that all
179bf215546Sopenharmony_ci    * invocations define tess factors.
180bf215546Sopenharmony_ci    *
181bf215546Sopenharmony_ci    * Each tess factor channel is tracked separately.
182bf215546Sopenharmony_ci    */
183bf215546Sopenharmony_ci   unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
184bf215546Sopenharmony_ci   unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   /* Initial value = true. Here the pass will accumulate results from
187bf215546Sopenharmony_ci    * multiple segments surrounded by barriers. If tess factors aren't
188bf215546Sopenharmony_ci    * written at all, it's a shader bug and we don't care if this will be
189bf215546Sopenharmony_ci    * true.
190bf215546Sopenharmony_ci    */
191bf215546Sopenharmony_ci   bool tessfactors_are_def_in_all_invocs = true;
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci   nir_foreach_function (function, nir) {
194bf215546Sopenharmony_ci      if (function->impl) {
195bf215546Sopenharmony_ci         foreach_list_typed(nir_cf_node, node, node, &function->impl->body)
196bf215546Sopenharmony_ci         {
197bf215546Sopenharmony_ci            scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask,
198bf215546Sopenharmony_ci                           &tessfactors_are_def_in_all_invocs, false);
199bf215546Sopenharmony_ci         }
200bf215546Sopenharmony_ci      }
201bf215546Sopenharmony_ci   }
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   /* Accumulate the result for the last code segment separated by a
204bf215546Sopenharmony_ci    * barrier.
205bf215546Sopenharmony_ci    */
206bf215546Sopenharmony_ci   if (main_block_tf_writemask || cond_block_tf_writemask) {
207bf215546Sopenharmony_ci      tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask);
208bf215546Sopenharmony_ci   }
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   return tessfactors_are_def_in_all_invocs;
211bf215546Sopenharmony_ci}
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_cistatic const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type type)
214bf215546Sopenharmony_ci{
215bf215546Sopenharmony_ci   for (unsigned i = 0; i < instr->num_srcs; i++) {
216bf215546Sopenharmony_ci      if (instr->src[i].src_type == type)
217bf215546Sopenharmony_ci         return &instr->src[i].src;
218bf215546Sopenharmony_ci   }
219bf215546Sopenharmony_ci   return NULL;
220bf215546Sopenharmony_ci}
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_cistatic void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
223bf215546Sopenharmony_ci                          nir_intrinsic_instr *intr, bool is_input)
224bf215546Sopenharmony_ci{
225bf215546Sopenharmony_ci   unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
228bf215546Sopenharmony_ci      nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci      if (baryc) {
231bf215546Sopenharmony_ci         if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0)
232bf215546Sopenharmony_ci            interp = nir_intrinsic_interp_mode(baryc);
233bf215546Sopenharmony_ci         else
234bf215546Sopenharmony_ci            unreachable("unknown barycentric intrinsic");
235bf215546Sopenharmony_ci      } else {
236bf215546Sopenharmony_ci         unreachable("unknown barycentric expression");
237bf215546Sopenharmony_ci      }
238bf215546Sopenharmony_ci   }
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   unsigned mask, bit_size;
241bf215546Sopenharmony_ci   bool is_output_load;
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci   if (nir_intrinsic_has_write_mask(intr)) {
244bf215546Sopenharmony_ci      mask = nir_intrinsic_write_mask(intr); /* store */
245bf215546Sopenharmony_ci      bit_size = nir_src_bit_size(intr->src[0]);
246bf215546Sopenharmony_ci      is_output_load = false;
247bf215546Sopenharmony_ci   } else {
248bf215546Sopenharmony_ci      mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */
249bf215546Sopenharmony_ci      bit_size = intr->dest.ssa.bit_size;
250bf215546Sopenharmony_ci      is_output_load = !is_input;
251bf215546Sopenharmony_ci   }
252bf215546Sopenharmony_ci   assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered");
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs
255bf215546Sopenharmony_ci    * where the mask is untyped.
256bf215546Sopenharmony_ci    */
257bf215546Sopenharmony_ci   if (bit_size == 16 && !is_input) {
258bf215546Sopenharmony_ci      unsigned new_mask = 0;
259bf215546Sopenharmony_ci      for (unsigned i = 0; i < 4; i++) {
260bf215546Sopenharmony_ci         if (mask & (1 << i))
261bf215546Sopenharmony_ci            new_mask |= 0x1 << (i / 2);
262bf215546Sopenharmony_ci      }
263bf215546Sopenharmony_ci      mask = new_mask;
264bf215546Sopenharmony_ci   }
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   mask <<= nir_intrinsic_component(intr);
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci   nir_src offset = *nir_get_io_offset_src(intr);
269bf215546Sopenharmony_ci   bool indirect = !nir_src_is_const(offset);
270bf215546Sopenharmony_ci   if (!indirect)
271bf215546Sopenharmony_ci      assert(nir_src_as_uint(offset) == 0);
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   unsigned semantic = 0;
274bf215546Sopenharmony_ci   /* VS doesn't have semantics. */
275bf215546Sopenharmony_ci   if (nir->info.stage != MESA_SHADER_VERTEX || !is_input)
276bf215546Sopenharmony_ci      semantic = nir_intrinsic_io_semantics(intr).location;
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
279bf215546Sopenharmony_ci      /* Never use FRAG_RESULT_COLOR directly. */
280bf215546Sopenharmony_ci      if (semantic == FRAG_RESULT_COLOR)
281bf215546Sopenharmony_ci         semantic = FRAG_RESULT_DATA0;
282bf215546Sopenharmony_ci      semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index;
283bf215546Sopenharmony_ci   }
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   unsigned driver_location = nir_intrinsic_base(intr);
286bf215546Sopenharmony_ci   unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1;
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   if (is_input) {
289bf215546Sopenharmony_ci      assert(driver_location + num_slots <= ARRAY_SIZE(info->input));
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_slots; i++) {
292bf215546Sopenharmony_ci         unsigned loc = driver_location + i;
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci         info->input[loc].semantic = semantic + i;
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci         if (semantic == VARYING_SLOT_PRIMITIVE_ID)
297bf215546Sopenharmony_ci            info->input[loc].interpolate = INTERP_MODE_FLAT;
298bf215546Sopenharmony_ci         else
299bf215546Sopenharmony_ci            info->input[loc].interpolate = interp;
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci         if (mask) {
302bf215546Sopenharmony_ci            info->input[loc].usage_mask |= mask;
303bf215546Sopenharmony_ci            if (bit_size == 16) {
304bf215546Sopenharmony_ci               if (nir_intrinsic_io_semantics(intr).high_16bits)
305bf215546Sopenharmony_ci                  info->input[loc].fp16_lo_hi_valid |= 0x2;
306bf215546Sopenharmony_ci               else
307bf215546Sopenharmony_ci                  info->input[loc].fp16_lo_hi_valid |= 0x1;
308bf215546Sopenharmony_ci            }
309bf215546Sopenharmony_ci            info->num_inputs = MAX2(info->num_inputs, loc + 1);
310bf215546Sopenharmony_ci         }
311bf215546Sopenharmony_ci      }
312bf215546Sopenharmony_ci   } else {
313bf215546Sopenharmony_ci      /* Outputs. */
314bf215546Sopenharmony_ci      assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci      for (unsigned i = 0; i < num_slots; i++) {
317bf215546Sopenharmony_ci         unsigned loc = driver_location + i;
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci         info->output_semantic[loc] = semantic + i;
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci         if (is_output_load) {
322bf215546Sopenharmony_ci            /* Output loads have only a few things that we need to track. */
323bf215546Sopenharmony_ci            info->output_readmask[loc] |= mask;
324bf215546Sopenharmony_ci         } else if (mask) {
325bf215546Sopenharmony_ci            /* Output stores. */
326bf215546Sopenharmony_ci            unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
327bf215546Sopenharmony_ci                                  (nir_intrinsic_component(intr) * 2);
328bf215546Sopenharmony_ci            unsigned new_mask = mask & ~info->output_usagemask[loc];
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci            /* Iterate over all components. */
331bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++) {
332bf215546Sopenharmony_ci               unsigned stream = (gs_streams >> (i * 2)) & 0x3;
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci               if (new_mask & (1 << i)) {
335bf215546Sopenharmony_ci                  info->output_streams[loc] |= stream << (i * 2);
336bf215546Sopenharmony_ci                  info->num_stream_output_components[stream]++;
337bf215546Sopenharmony_ci               }
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci               if (nir_intrinsic_has_io_xfb(intr)) {
340bf215546Sopenharmony_ci                  nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) :
341bf215546Sopenharmony_ci                                           nir_intrinsic_io_xfb2(intr);
342bf215546Sopenharmony_ci                  if (xfb.out[i % 2].num_components) {
343bf215546Sopenharmony_ci                     unsigned stream = (gs_streams >> (i * 2)) & 0x3;
344bf215546Sopenharmony_ci                     info->enabled_streamout_buffer_mask |=
345bf215546Sopenharmony_ci                        BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
346bf215546Sopenharmony_ci                  }
347bf215546Sopenharmony_ci               }
348bf215546Sopenharmony_ci            }
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci            if (nir_intrinsic_has_src_type(intr))
351bf215546Sopenharmony_ci               info->output_type[loc] = nir_intrinsic_src_type(intr);
352bf215546Sopenharmony_ci            else if (nir_intrinsic_has_dest_type(intr))
353bf215546Sopenharmony_ci               info->output_type[loc] = nir_intrinsic_dest_type(intr);
354bf215546Sopenharmony_ci            else
355bf215546Sopenharmony_ci               info->output_type[loc] = nir_type_float32;
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci            info->output_usagemask[loc] |= mask;
358bf215546Sopenharmony_ci            info->num_outputs = MAX2(info->num_outputs, loc + 1);
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci            if (nir->info.stage == MESA_SHADER_FRAGMENT &&
361bf215546Sopenharmony_ci                semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
362bf215546Sopenharmony_ci               unsigned index = semantic - FRAG_RESULT_DATA0;
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci               if (nir_intrinsic_src_type(intr) == nir_type_float16)
365bf215546Sopenharmony_ci                  info->output_color_types |= SI_TYPE_FLOAT16 << (index * 2);
366bf215546Sopenharmony_ci               else if (nir_intrinsic_src_type(intr) == nir_type_int16)
367bf215546Sopenharmony_ci                  info->output_color_types |= SI_TYPE_INT16 << (index * 2);
368bf215546Sopenharmony_ci               else if (nir_intrinsic_src_type(intr) == nir_type_uint16)
369bf215546Sopenharmony_ci                  info->output_color_types |= SI_TYPE_UINT16 << (index * 2);
370bf215546Sopenharmony_ci            }
371bf215546Sopenharmony_ci         }
372bf215546Sopenharmony_ci      }
373bf215546Sopenharmony_ci   }
374bf215546Sopenharmony_ci}
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_cistatic bool is_bindless_handle_indirect(nir_instr *src)
377bf215546Sopenharmony_ci{
378bf215546Sopenharmony_ci   /* Check if the bindless handle comes from indirect load_ubo. */
379bf215546Sopenharmony_ci   if (src->type == nir_instr_type_intrinsic &&
380bf215546Sopenharmony_ci       nir_instr_as_intrinsic(src)->intrinsic == nir_intrinsic_load_ubo) {
381bf215546Sopenharmony_ci      if (!nir_src_is_const(nir_instr_as_intrinsic(src)->src[0]))
382bf215546Sopenharmony_ci         return true;
383bf215546Sopenharmony_ci   } else {
384bf215546Sopenharmony_ci      /* Some other instruction. Return the worst-case result. */
385bf215546Sopenharmony_ci      return true;
386bf215546Sopenharmony_ci   }
387bf215546Sopenharmony_ci   return false;
388bf215546Sopenharmony_ci}
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci/* TODO: convert to nir_shader_instructions_pass */
391bf215546Sopenharmony_cistatic void scan_instruction(const struct nir_shader *nir, struct si_shader_info *info,
392bf215546Sopenharmony_ci                             nir_instr *instr)
393bf215546Sopenharmony_ci{
394bf215546Sopenharmony_ci   if (instr->type == nir_instr_type_tex) {
395bf215546Sopenharmony_ci      nir_tex_instr *tex = nir_instr_as_tex(instr);
396bf215546Sopenharmony_ci      const nir_src *handle = get_texture_src(tex, nir_tex_src_texture_handle);
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci      /* Gather the types of used VMEM instructions that return something. */
399bf215546Sopenharmony_ci      switch (tex->op) {
400bf215546Sopenharmony_ci      case nir_texop_tex:
401bf215546Sopenharmony_ci      case nir_texop_txb:
402bf215546Sopenharmony_ci      case nir_texop_txl:
403bf215546Sopenharmony_ci      case nir_texop_txd:
404bf215546Sopenharmony_ci      case nir_texop_lod:
405bf215546Sopenharmony_ci      case nir_texop_tg4:
406bf215546Sopenharmony_ci         info->uses_vmem_sampler_or_bvh = true;
407bf215546Sopenharmony_ci         break;
408bf215546Sopenharmony_ci      default:
409bf215546Sopenharmony_ci         info->uses_vmem_load_other = true;
410bf215546Sopenharmony_ci         break;
411bf215546Sopenharmony_ci      }
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci      if (handle) {
414bf215546Sopenharmony_ci         info->uses_bindless_samplers = true;
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci         if (is_bindless_handle_indirect(handle->ssa->parent_instr))
417bf215546Sopenharmony_ci            info->uses_indirect_descriptor = true;
418bf215546Sopenharmony_ci      } else {
419bf215546Sopenharmony_ci         const nir_src *deref = get_texture_src(tex, nir_tex_src_texture_deref);
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci         if (nir_deref_instr_has_indirect(nir_src_as_deref(*deref)))
422bf215546Sopenharmony_ci            info->uses_indirect_descriptor = true;
423bf215546Sopenharmony_ci      }
424bf215546Sopenharmony_ci   } else if (instr->type == nir_instr_type_intrinsic) {
425bf215546Sopenharmony_ci      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
426bf215546Sopenharmony_ci      const char *intr_name = nir_intrinsic_infos[intr->intrinsic].name;
427bf215546Sopenharmony_ci      bool is_ssbo = strstr(intr_name, "ssbo");
428bf215546Sopenharmony_ci      bool is_image = strstr(intr_name, "image") == intr_name;
429bf215546Sopenharmony_ci      bool is_bindless_image = strstr(intr_name, "bindless_image") == intr_name;
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci      /* Gather the types of used VMEM instructions that return something. */
432bf215546Sopenharmony_ci      if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
433bf215546Sopenharmony_ci         switch (intr->intrinsic) {
434bf215546Sopenharmony_ci         case nir_intrinsic_load_ubo:
435bf215546Sopenharmony_ci            if (!nir_src_is_const(intr->src[1]))
436bf215546Sopenharmony_ci               info->uses_vmem_load_other = true;
437bf215546Sopenharmony_ci            break;
438bf215546Sopenharmony_ci
439bf215546Sopenharmony_ci         case nir_intrinsic_load_input:
440bf215546Sopenharmony_ci         case nir_intrinsic_load_input_vertex:
441bf215546Sopenharmony_ci         case nir_intrinsic_load_per_vertex_input:
442bf215546Sopenharmony_ci            if (nir->info.stage == MESA_SHADER_VERTEX ||
443bf215546Sopenharmony_ci                nir->info.stage == MESA_SHADER_TESS_EVAL)
444bf215546Sopenharmony_ci               info->uses_vmem_load_other = true;
445bf215546Sopenharmony_ci            break;
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci         case nir_intrinsic_load_constant:
448bf215546Sopenharmony_ci         case nir_intrinsic_load_barycentric_at_sample: /* This loads sample positions. */
449bf215546Sopenharmony_ci         case nir_intrinsic_load_buffer_amd:
450bf215546Sopenharmony_ci            info->uses_vmem_load_other = true;
451bf215546Sopenharmony_ci            break;
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci         default:
454bf215546Sopenharmony_ci            if (is_image ||
455bf215546Sopenharmony_ci                is_bindless_image ||
456bf215546Sopenharmony_ci                is_ssbo ||
457bf215546Sopenharmony_ci                (strstr(intr_name, "global") == intr_name ||
458bf215546Sopenharmony_ci                 intr->intrinsic == nir_intrinsic_load_global ||
459bf215546Sopenharmony_ci                 intr->intrinsic == nir_intrinsic_store_global) ||
460bf215546Sopenharmony_ci                strstr(intr_name, "scratch"))
461bf215546Sopenharmony_ci               info->uses_vmem_load_other = true;
462bf215546Sopenharmony_ci            break;
463bf215546Sopenharmony_ci         }
464bf215546Sopenharmony_ci      }
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci      if (is_bindless_image)
467bf215546Sopenharmony_ci         info->uses_bindless_images = true;
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci      if (nir_intrinsic_writes_external_memory(intr))
470bf215546Sopenharmony_ci         info->num_memory_stores++;
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci      if (is_image && nir_deref_instr_has_indirect(nir_src_as_deref(intr->src[0])))
473bf215546Sopenharmony_ci         info->uses_indirect_descriptor = true;
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci      if (is_bindless_image && is_bindless_handle_indirect(intr->src[0].ssa->parent_instr))
476bf215546Sopenharmony_ci         info->uses_indirect_descriptor = true;
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci      if (intr->intrinsic != nir_intrinsic_store_ssbo && is_ssbo &&
479bf215546Sopenharmony_ci          !nir_src_is_const(intr->src[0]))
480bf215546Sopenharmony_ci         info->uses_indirect_descriptor = true;
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci      switch (intr->intrinsic) {
483bf215546Sopenharmony_ci      case nir_intrinsic_store_ssbo:
484bf215546Sopenharmony_ci         if (!nir_src_is_const(intr->src[1]))
485bf215546Sopenharmony_ci            info->uses_indirect_descriptor = true;
486bf215546Sopenharmony_ci         break;
487bf215546Sopenharmony_ci      case nir_intrinsic_load_ubo:
488bf215546Sopenharmony_ci         if (!nir_src_is_const(intr->src[0]))
489bf215546Sopenharmony_ci            info->uses_indirect_descriptor = true;
490bf215546Sopenharmony_ci         break;
491bf215546Sopenharmony_ci      case nir_intrinsic_load_local_invocation_id:
492bf215546Sopenharmony_ci      case nir_intrinsic_load_workgroup_id: {
493bf215546Sopenharmony_ci         unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa);
494bf215546Sopenharmony_ci         while (mask) {
495bf215546Sopenharmony_ci            unsigned i = u_bit_scan(&mask);
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_ci            if (intr->intrinsic == nir_intrinsic_load_workgroup_id)
498bf215546Sopenharmony_ci               info->uses_block_id[i] = true;
499bf215546Sopenharmony_ci            else
500bf215546Sopenharmony_ci               info->uses_thread_id[i] = true;
501bf215546Sopenharmony_ci         }
502bf215546Sopenharmony_ci         break;
503bf215546Sopenharmony_ci      }
504bf215546Sopenharmony_ci      case nir_intrinsic_load_color0:
505bf215546Sopenharmony_ci      case nir_intrinsic_load_color1: {
506bf215546Sopenharmony_ci         unsigned index = intr->intrinsic == nir_intrinsic_load_color1;
507bf215546Sopenharmony_ci         uint8_t mask = nir_ssa_def_components_read(&intr->dest.ssa);
508bf215546Sopenharmony_ci         info->colors_read |= mask << (index * 4);
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci         switch (info->color_interpolate[index]) {
511bf215546Sopenharmony_ci         case INTERP_MODE_SMOOTH:
512bf215546Sopenharmony_ci            if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
513bf215546Sopenharmony_ci               info->uses_persp_sample = true;
514bf215546Sopenharmony_ci            else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
515bf215546Sopenharmony_ci               info->uses_persp_centroid = true;
516bf215546Sopenharmony_ci            else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
517bf215546Sopenharmony_ci               info->uses_persp_center = true;
518bf215546Sopenharmony_ci            break;
519bf215546Sopenharmony_ci         case INTERP_MODE_NOPERSPECTIVE:
520bf215546Sopenharmony_ci            if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
521bf215546Sopenharmony_ci               info->uses_linear_sample = true;
522bf215546Sopenharmony_ci            else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
523bf215546Sopenharmony_ci               info->uses_linear_centroid = true;
524bf215546Sopenharmony_ci            else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
525bf215546Sopenharmony_ci               info->uses_linear_center = true;
526bf215546Sopenharmony_ci            break;
527bf215546Sopenharmony_ci         case INTERP_MODE_COLOR:
528bf215546Sopenharmony_ci            /* We don't know the final value. This will be FLAT if flatshading is enabled
529bf215546Sopenharmony_ci             * in the rasterizer state, otherwise it will be SMOOTH.
530bf215546Sopenharmony_ci             */
531bf215546Sopenharmony_ci            info->uses_interp_color = true;
532bf215546Sopenharmony_ci            if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_SAMPLE)
533bf215546Sopenharmony_ci               info->uses_persp_sample_color = true;
534bf215546Sopenharmony_ci            else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTROID)
535bf215546Sopenharmony_ci               info->uses_persp_centroid_color = true;
536bf215546Sopenharmony_ci            else if (info->color_interpolate_loc[index] == TGSI_INTERPOLATE_LOC_CENTER)
537bf215546Sopenharmony_ci               info->uses_persp_center_color = true;
538bf215546Sopenharmony_ci            break;
539bf215546Sopenharmony_ci         }
540bf215546Sopenharmony_ci         break;
541bf215546Sopenharmony_ci      }
542bf215546Sopenharmony_ci      case nir_intrinsic_load_barycentric_at_offset:   /* uses center */
543bf215546Sopenharmony_ci      case nir_intrinsic_load_barycentric_at_sample:   /* uses center */
544bf215546Sopenharmony_ci         if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_FLAT)
545bf215546Sopenharmony_ci            break;
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci         if (nir_intrinsic_interp_mode(intr) == INTERP_MODE_NOPERSPECTIVE) {
548bf215546Sopenharmony_ci            info->uses_linear_center = true;
549bf215546Sopenharmony_ci         } else {
550bf215546Sopenharmony_ci            info->uses_persp_center = true;
551bf215546Sopenharmony_ci         }
552bf215546Sopenharmony_ci         if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
553bf215546Sopenharmony_ci            info->uses_interp_at_sample = true;
554bf215546Sopenharmony_ci         break;
555bf215546Sopenharmony_ci      case nir_intrinsic_load_input:
556bf215546Sopenharmony_ci      case nir_intrinsic_load_per_vertex_input:
557bf215546Sopenharmony_ci      case nir_intrinsic_load_input_vertex:
558bf215546Sopenharmony_ci      case nir_intrinsic_load_interpolated_input:
559bf215546Sopenharmony_ci         scan_io_usage(nir, info, intr, true);
560bf215546Sopenharmony_ci         break;
561bf215546Sopenharmony_ci      case nir_intrinsic_load_output:
562bf215546Sopenharmony_ci      case nir_intrinsic_load_per_vertex_output:
563bf215546Sopenharmony_ci      case nir_intrinsic_store_output:
564bf215546Sopenharmony_ci      case nir_intrinsic_store_per_vertex_output:
565bf215546Sopenharmony_ci         scan_io_usage(nir, info, intr, false);
566bf215546Sopenharmony_ci         break;
567bf215546Sopenharmony_ci      case nir_intrinsic_load_deref:
568bf215546Sopenharmony_ci      case nir_intrinsic_store_deref:
569bf215546Sopenharmony_ci         /* These can only occur if there is indirect temp indexing. */
570bf215546Sopenharmony_ci         break;
571bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_centroid:
572bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_sample:
573bf215546Sopenharmony_ci      case nir_intrinsic_interp_deref_at_offset:
574bf215546Sopenharmony_ci         unreachable("these opcodes should have been lowered");
575bf215546Sopenharmony_ci         break;
576bf215546Sopenharmony_ci      default:
577bf215546Sopenharmony_ci         break;
578bf215546Sopenharmony_ci      }
579bf215546Sopenharmony_ci   }
580bf215546Sopenharmony_ci}
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_civoid si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
583bf215546Sopenharmony_ci                        struct si_shader_info *info)
584bf215546Sopenharmony_ci{
585bf215546Sopenharmony_ci   memset(info, 0, sizeof(*info));
586bf215546Sopenharmony_ci   info->base = nir->info;
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   /* Get options from shader profiles. */
589bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(profiles); i++) {
590bf215546Sopenharmony_ci      if (_mesa_printed_sha1_equal(info->base.source_sha1, profiles[i].sha1)) {
591bf215546Sopenharmony_ci         info->options = profiles[i].options;
592bf215546Sopenharmony_ci         break;
593bf215546Sopenharmony_ci      }
594bf215546Sopenharmony_ci   }
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
597bf215546Sopenharmony_ci      /* post_depth_coverage implies early_fragment_tests */
598bf215546Sopenharmony_ci      info->base.fs.early_fragment_tests |= info->base.fs.post_depth_coverage;
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci      info->color_interpolate[0] = nir->info.fs.color0_interp;
601bf215546Sopenharmony_ci      info->color_interpolate[1] = nir->info.fs.color1_interp;
602bf215546Sopenharmony_ci      for (unsigned i = 0; i < 2; i++) {
603bf215546Sopenharmony_ci         if (info->color_interpolate[i] == INTERP_MODE_NONE)
604bf215546Sopenharmony_ci            info->color_interpolate[i] = INTERP_MODE_COLOR;
605bf215546Sopenharmony_ci      }
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci      info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
608bf215546Sopenharmony_ci                                       nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
609bf215546Sopenharmony_ci                                                                      TGSI_INTERPOLATE_LOC_CENTER;
610bf215546Sopenharmony_ci      info->color_interpolate_loc[1] = nir->info.fs.color1_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
611bf215546Sopenharmony_ci                                       nir->info.fs.color1_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
612bf215546Sopenharmony_ci                                                                      TGSI_INTERPOLATE_LOC_CENTER;
613bf215546Sopenharmony_ci      /* Set an invalid value. Will be determined at draw time if needed when the expected
614bf215546Sopenharmony_ci       * conditions are met.
615bf215546Sopenharmony_ci       */
616bf215546Sopenharmony_ci      info->writes_1_if_tex_is_1 = nir->info.writes_memory ? 0 : 0xff;
617bf215546Sopenharmony_ci   }
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci   info->constbuf0_num_slots = nir->num_uniforms;
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
622bf215546Sopenharmony_ci      info->tessfactors_are_def_in_all_invocs = are_tessfactors_def_in_all_invocs(nir);
623bf215546Sopenharmony_ci   }
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci   /* tess factors are loaded as input instead of system value */
626bf215546Sopenharmony_ci   info->reads_tess_factors = nir->info.patch_inputs_read &
627bf215546Sopenharmony_ci      (BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) |
628bf215546Sopenharmony_ci       BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER));
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci   info->uses_frontface = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);
631bf215546Sopenharmony_ci   info->uses_instanceid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
632bf215546Sopenharmony_ci   info->uses_base_vertex = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX);
633bf215546Sopenharmony_ci   info->uses_base_instance = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
634bf215546Sopenharmony_ci   info->uses_invocationid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
635bf215546Sopenharmony_ci   info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
636bf215546Sopenharmony_ci   info->uses_subgroup_info = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
637bf215546Sopenharmony_ci                              BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) ||
638bf215546Sopenharmony_ci                              BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
639bf215546Sopenharmony_ci   info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
640bf215546Sopenharmony_ci   info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID);
641bf215546Sopenharmony_ci   info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
642bf215546Sopenharmony_ci                       nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
643bf215546Sopenharmony_ci   info->reads_samplemask = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
644bf215546Sopenharmony_ci   info->uses_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
645bf215546Sopenharmony_ci   info->uses_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
646bf215546Sopenharmony_ci   info->uses_linear_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
647bf215546Sopenharmony_ci   info->uses_persp_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
648bf215546Sopenharmony_ci   info->uses_persp_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
649bf215546Sopenharmony_ci   info->uses_persp_center = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
650bf215546Sopenharmony_ci
651bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
652bf215546Sopenharmony_ci      info->writes_z = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
653bf215546Sopenharmony_ci      info->writes_stencil = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
654bf215546Sopenharmony_ci      info->writes_samplemask = nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci      info->colors_written = nir->info.outputs_written >> FRAG_RESULT_DATA0;
657bf215546Sopenharmony_ci      if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) {
658bf215546Sopenharmony_ci         info->color0_writes_all_cbufs = true;
659bf215546Sopenharmony_ci         info->colors_written |= 0x1;
660bf215546Sopenharmony_ci      }
661bf215546Sopenharmony_ci      if (nir->info.fs.color_is_dual_source)
662bf215546Sopenharmony_ci         info->colors_written |= 0x2;
663bf215546Sopenharmony_ci   } else {
664bf215546Sopenharmony_ci      info->writes_primid = nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID;
665bf215546Sopenharmony_ci      info->writes_viewport_index = nir->info.outputs_written & VARYING_BIT_VIEWPORT;
666bf215546Sopenharmony_ci      info->writes_layer = nir->info.outputs_written & VARYING_BIT_LAYER;
667bf215546Sopenharmony_ci      info->writes_psize = nir->info.outputs_written & VARYING_BIT_PSIZ;
668bf215546Sopenharmony_ci      info->writes_clipvertex = nir->info.outputs_written & VARYING_BIT_CLIP_VERTEX;
669bf215546Sopenharmony_ci      info->writes_edgeflag = nir->info.outputs_written & VARYING_BIT_EDGE;
670bf215546Sopenharmony_ci      info->writes_position = nir->info.outputs_written & VARYING_BIT_POS;
671bf215546Sopenharmony_ci   }
672bf215546Sopenharmony_ci
673bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader*)nir);
674bf215546Sopenharmony_ci   nir_foreach_block (block, impl) {
675bf215546Sopenharmony_ci      nir_foreach_instr (instr, block)
676bf215546Sopenharmony_ci         scan_instruction(nir, info, instr);
677bf215546Sopenharmony_ci   }
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
680bf215546Sopenharmony_ci      /* Add the PrimitiveID output, but don't increment num_outputs.
681bf215546Sopenharmony_ci       * The driver inserts PrimitiveID only when it's used by the pixel shader,
682bf215546Sopenharmony_ci       * and si_emit_spi_map uses this unconditionally when such a pixel shader is used.
683bf215546Sopenharmony_ci       */
684bf215546Sopenharmony_ci      info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID;
685bf215546Sopenharmony_ci      info->output_type[info->num_outputs] = nir_type_uint32;
686bf215546Sopenharmony_ci      info->output_usagemask[info->num_outputs] = 0x1;
687bf215546Sopenharmony_ci   }
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
690bf215546Sopenharmony_ci      info->allow_flat_shading = !(info->uses_persp_center || info->uses_persp_centroid ||
691bf215546Sopenharmony_ci                                   info->uses_persp_sample || info->uses_linear_center ||
692bf215546Sopenharmony_ci                                   info->uses_linear_centroid || info->uses_linear_sample ||
693bf215546Sopenharmony_ci                                   info->uses_interp_at_sample || nir->info.writes_memory ||
694bf215546Sopenharmony_ci                                   nir->info.fs.uses_fbfetch_output ||
695bf215546Sopenharmony_ci                                   nir->info.fs.needs_quad_helper_invocations ||
696bf215546Sopenharmony_ci                                   BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
697bf215546Sopenharmony_ci                                   BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
698bf215546Sopenharmony_ci                                   BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
699bf215546Sopenharmony_ci                                   BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
700bf215546Sopenharmony_ci                                   BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
701bf215546Sopenharmony_ci                                   BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci      info->uses_vmem_load_other |= info->base.fs.uses_fbfetch_output;
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci      /* Add both front and back color inputs. */
706bf215546Sopenharmony_ci      unsigned num_inputs_with_colors = info->num_inputs;
707bf215546Sopenharmony_ci      for (unsigned back = 0; back < 2; back++) {
708bf215546Sopenharmony_ci         for (unsigned i = 0; i < 2; i++) {
709bf215546Sopenharmony_ci            if ((info->colors_read >> (i * 4)) & 0xf) {
710bf215546Sopenharmony_ci               unsigned index = num_inputs_with_colors;
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_ci               info->input[index].semantic = (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i;
713bf215546Sopenharmony_ci               info->input[index].interpolate = info->color_interpolate[i];
714bf215546Sopenharmony_ci               info->input[index].usage_mask = info->colors_read >> (i * 4);
715bf215546Sopenharmony_ci               num_inputs_with_colors++;
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci               /* Back-face color don't increment num_inputs. si_emit_spi_map will use
718bf215546Sopenharmony_ci                * back-face colors conditionally only when they are needed.
719bf215546Sopenharmony_ci                */
720bf215546Sopenharmony_ci               if (!back)
721bf215546Sopenharmony_ci                  info->num_inputs = num_inputs_with_colors;
722bf215546Sopenharmony_ci            }
723bf215546Sopenharmony_ci         }
724bf215546Sopenharmony_ci      }
725bf215546Sopenharmony_ci   }
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_ci   info->uses_vmem_load_other |= info->uses_indirect_descriptor;
728bf215546Sopenharmony_ci
729bf215546Sopenharmony_ci   /* Trim output read masks based on write masks. */
730bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->num_outputs; i++)
731bf215546Sopenharmony_ci      info->output_readmask[i] &= info->output_usagemask[i];
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci   info->has_divergent_loop = nir_has_divergent_loop((nir_shader*)nir);
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_VERTEX ||
736bf215546Sopenharmony_ci       nir->info.stage == MESA_SHADER_TESS_CTRL ||
737bf215546Sopenharmony_ci       nir->info.stage == MESA_SHADER_TESS_EVAL ||
738bf215546Sopenharmony_ci       nir->info.stage == MESA_SHADER_GEOMETRY) {
739bf215546Sopenharmony_ci      if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
740bf215546Sopenharmony_ci         /* Always reserve space for these. */
741bf215546Sopenharmony_ci         info->patch_outputs_written |=
742bf215546Sopenharmony_ci            (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER)) |
743bf215546Sopenharmony_ci            (1ull << si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER));
744bf215546Sopenharmony_ci      }
745bf215546Sopenharmony_ci      for (unsigned i = 0; i < info->num_outputs; i++) {
746bf215546Sopenharmony_ci         unsigned semantic = info->output_semantic[i];
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci         if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
749bf215546Sopenharmony_ci             semantic == VARYING_SLOT_TESS_LEVEL_OUTER ||
750bf215546Sopenharmony_ci             (semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX)) {
751bf215546Sopenharmony_ci            info->patch_outputs_written |= 1ull << si_shader_io_get_unique_index_patch(semantic);
752bf215546Sopenharmony_ci         } else if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
753bf215546Sopenharmony_ci                    semantic != VARYING_SLOT_EDGE) {
754bf215546Sopenharmony_ci            info->outputs_written |= 1ull << si_shader_io_get_unique_index(semantic, false);
755bf215546Sopenharmony_ci
756bf215546Sopenharmony_ci            /* Ignore outputs that are not passed from VS to PS. */
757bf215546Sopenharmony_ci            if (semantic != VARYING_SLOT_POS &&
758bf215546Sopenharmony_ci                semantic != VARYING_SLOT_PSIZ &&
759bf215546Sopenharmony_ci                semantic != VARYING_SLOT_CLIP_VERTEX) {
760bf215546Sopenharmony_ci               info->outputs_written_before_ps |= 1ull
761bf215546Sopenharmony_ci                                                  << si_shader_io_get_unique_index(semantic, true);
762bf215546Sopenharmony_ci            }
763bf215546Sopenharmony_ci         }
764bf215546Sopenharmony_ci      }
765bf215546Sopenharmony_ci   }
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_VERTEX) {
768bf215546Sopenharmony_ci      info->num_vs_inputs =
769bf215546Sopenharmony_ci         nir->info.stage == MESA_SHADER_VERTEX && !info->base.vs.blit_sgprs_amd ? info->num_inputs : 0;
770bf215546Sopenharmony_ci      unsigned num_vbos_in_sgprs = si_num_vbos_in_user_sgprs_inline(sscreen->info.gfx_level);
771bf215546Sopenharmony_ci      info->num_vbos_in_user_sgprs = MIN2(info->num_vs_inputs, num_vbos_in_sgprs);
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_ci      /* The prolog is a no-op if there are no inputs. */
774bf215546Sopenharmony_ci      info->vs_needs_prolog = info->num_inputs && !info->base.vs.blit_sgprs_amd;
775bf215546Sopenharmony_ci   }
776bf215546Sopenharmony_ci
777bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_VERTEX ||
778bf215546Sopenharmony_ci       nir->info.stage == MESA_SHADER_TESS_CTRL ||
779bf215546Sopenharmony_ci       nir->info.stage == MESA_SHADER_TESS_EVAL) {
780bf215546Sopenharmony_ci      info->esgs_itemsize = util_last_bit64(info->outputs_written) * 16;
781bf215546Sopenharmony_ci      info->lshs_vertex_stride = info->esgs_itemsize;
782bf215546Sopenharmony_ci
783bf215546Sopenharmony_ci      /* Add 1 dword to reduce LDS bank conflicts, so that each vertex
784bf215546Sopenharmony_ci       * will start on a different bank. (except for the maximum 32*16).
785bf215546Sopenharmony_ci       */
786bf215546Sopenharmony_ci      if (info->lshs_vertex_stride < 32 * 16)
787bf215546Sopenharmony_ci         info->lshs_vertex_stride += 4;
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci      /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
790bf215546Sopenharmony_ci       * conflicts, i.e. each vertex will start at a different bank.
791bf215546Sopenharmony_ci       */
792bf215546Sopenharmony_ci      if (sscreen->info.gfx_level >= GFX9)
793bf215546Sopenharmony_ci         info->esgs_itemsize += 4;
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci      assert(((info->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_ci      info->tcs_vgpr_only_inputs = ~info->base.tess.tcs_cross_invocation_inputs_read &
798bf215546Sopenharmony_ci                                   ~info->base.inputs_read_indirectly &
799bf215546Sopenharmony_ci                                   info->base.inputs_read;
800bf215546Sopenharmony_ci   }
801bf215546Sopenharmony_ci
802bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_GEOMETRY) {
803bf215546Sopenharmony_ci      info->gsvs_vertex_size = info->num_outputs * 16;
804bf215546Sopenharmony_ci      info->max_gsvs_emit_size = info->gsvs_vertex_size * info->base.gs.vertices_out;
805bf215546Sopenharmony_ci      info->gs_input_verts_per_prim =
806bf215546Sopenharmony_ci         u_vertices_per_prim((enum pipe_prim_type)info->base.gs.input_primitive);
807bf215546Sopenharmony_ci   }
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci   info->clipdist_mask = info->writes_clipvertex ? SI_USER_CLIP_PLANE_MASK :
810bf215546Sopenharmony_ci                         u_bit_consecutive(0, info->base.clip_distance_array_size);
811bf215546Sopenharmony_ci   info->culldist_mask = u_bit_consecutive(0, info->base.cull_distance_array_size) <<
812bf215546Sopenharmony_ci                         info->base.clip_distance_array_size;
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
815bf215546Sopenharmony_ci      for (unsigned i = 0; i < info->num_inputs; i++) {
816bf215546Sopenharmony_ci         unsigned semantic = info->input[i].semantic;
817bf215546Sopenharmony_ci
818bf215546Sopenharmony_ci         if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
819bf215546Sopenharmony_ci             semantic != VARYING_SLOT_PNTC) {
820bf215546Sopenharmony_ci            info->inputs_read |= 1ull << si_shader_io_get_unique_index(semantic, true);
821bf215546Sopenharmony_ci         }
822bf215546Sopenharmony_ci      }
823bf215546Sopenharmony_ci
824bf215546Sopenharmony_ci      for (unsigned i = 0; i < 8; i++)
825bf215546Sopenharmony_ci         if (info->colors_written & (1 << i))
826bf215546Sopenharmony_ci            info->colors_written_4bit |= 0xf << (4 * i);
827bf215546Sopenharmony_ci
828bf215546Sopenharmony_ci      for (unsigned i = 0; i < info->num_inputs; i++) {
829bf215546Sopenharmony_ci         if (info->input[i].semantic == VARYING_SLOT_COL0)
830bf215546Sopenharmony_ci            info->color_attr_index[0] = i;
831bf215546Sopenharmony_ci         else if (info->input[i].semantic == VARYING_SLOT_COL1)
832bf215546Sopenharmony_ci            info->color_attr_index[1] = i;
833bf215546Sopenharmony_ci      }
834bf215546Sopenharmony_ci   }
835bf215546Sopenharmony_ci}
836