1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat.
3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * based in part on anv driver which is:
6bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation
7bf215546Sopenharmony_ci *
8bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
9bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
10bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
11bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
13bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
16bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
17bf215546Sopenharmony_ci * Software.
18bf215546Sopenharmony_ci *
19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25bf215546Sopenharmony_ci * IN THE SOFTWARE.
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "nir/nir.h"
29bf215546Sopenharmony_ci#include "nir/nir_builder.h"
30bf215546Sopenharmony_ci#include "spirv/nir_spirv.h"
31bf215546Sopenharmony_ci#include "util/disk_cache.h"
32bf215546Sopenharmony_ci#include "util/mesa-sha1.h"
33bf215546Sopenharmony_ci#include "util/os_time.h"
34bf215546Sopenharmony_ci#include "util/u_atomic.h"
35bf215546Sopenharmony_ci#include "radv_cs.h"
36bf215546Sopenharmony_ci#include "radv_debug.h"
37bf215546Sopenharmony_ci#include "radv_meta.h"
38bf215546Sopenharmony_ci#include "radv_private.h"
39bf215546Sopenharmony_ci#include "radv_shader.h"
40bf215546Sopenharmony_ci#include "radv_shader_args.h"
41bf215546Sopenharmony_ci#include "vk_pipeline.h"
42bf215546Sopenharmony_ci#include "vk_util.h"
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci#include "util/debug.h"
45bf215546Sopenharmony_ci#include "ac_binary.h"
46bf215546Sopenharmony_ci#include "ac_nir.h"
47bf215546Sopenharmony_ci#include "ac_shader_util.h"
48bf215546Sopenharmony_ci#include "aco_interface.h"
49bf215546Sopenharmony_ci#include "sid.h"
50bf215546Sopenharmony_ci#include "vk_format.h"
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_cistruct radv_blend_state {
53bf215546Sopenharmony_ci   uint32_t blend_enable_4bit;
54bf215546Sopenharmony_ci   uint32_t need_src_alpha;
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci   uint32_t cb_target_mask;
57bf215546Sopenharmony_ci   uint32_t cb_target_enabled_4bit;
58bf215546Sopenharmony_ci   uint32_t sx_mrt_blend_opt[8];
59bf215546Sopenharmony_ci   uint32_t cb_blend_control[8];
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   uint32_t spi_shader_col_format;
62bf215546Sopenharmony_ci   uint32_t col_format_is_int8;
63bf215546Sopenharmony_ci   uint32_t col_format_is_int10;
64bf215546Sopenharmony_ci   uint32_t col_format_is_float32;
65bf215546Sopenharmony_ci   uint32_t cb_shader_mask;
66bf215546Sopenharmony_ci   uint32_t db_alpha_to_mask;
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci   uint32_t commutative_4bit;
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci   bool mrt0_is_dual_src;
71bf215546Sopenharmony_ci};
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_cistruct radv_depth_stencil_state {
74bf215546Sopenharmony_ci   uint32_t db_render_control;
75bf215546Sopenharmony_ci   uint32_t db_render_override;
76bf215546Sopenharmony_ci   uint32_t db_render_override2;
77bf215546Sopenharmony_ci};
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_cistruct radv_dsa_order_invariance {
80bf215546Sopenharmony_ci   /* Whether the final result in Z/S buffers is guaranteed to be
81bf215546Sopenharmony_ci    * invariant under changes to the order in which fragments arrive.
82bf215546Sopenharmony_ci    */
83bf215546Sopenharmony_ci   bool zs;
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   /* Whether the set of fragments that pass the combined Z/S test is
86bf215546Sopenharmony_ci    * guaranteed to be invariant under changes to the order in which
87bf215546Sopenharmony_ci    * fragments arrive.
88bf215546Sopenharmony_ci    */
89bf215546Sopenharmony_ci   bool pass_set;
90bf215546Sopenharmony_ci};
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_cistatic bool
93bf215546Sopenharmony_ciradv_is_raster_enabled(const struct radv_graphics_pipeline *pipeline,
94bf215546Sopenharmony_ci                       const VkGraphicsPipelineCreateInfo *pCreateInfo)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   return !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
97bf215546Sopenharmony_ci          (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_cistatic bool
101bf215546Sopenharmony_ciradv_is_static_vrs_enabled(const struct radv_graphics_pipeline *pipeline,
102bf215546Sopenharmony_ci                           const struct radv_graphics_pipeline_info *info)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   return info->fsr.size.width != 1 || info->fsr.size.height != 1 ||
105bf215546Sopenharmony_ci          info->fsr.combiner_ops[0] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR ||
106bf215546Sopenharmony_ci          info->fsr.combiner_ops[1] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_cistatic bool
110bf215546Sopenharmony_ciradv_is_vrs_enabled(const struct radv_graphics_pipeline *pipeline,
111bf215546Sopenharmony_ci                    const struct radv_graphics_pipeline_info *info)
112bf215546Sopenharmony_ci{
113bf215546Sopenharmony_ci   return radv_is_static_vrs_enabled(pipeline, info) ||
114bf215546Sopenharmony_ci          (pipeline->dynamic_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE);
115bf215546Sopenharmony_ci}
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_cistatic bool
118bf215546Sopenharmony_ciradv_pipeline_has_ds_attachments(const struct radv_rendering_info *ri_info)
119bf215546Sopenharmony_ci{
120bf215546Sopenharmony_ci   return ri_info->depth_att_format != VK_FORMAT_UNDEFINED ||
121bf215546Sopenharmony_ci          ri_info->stencil_att_format != VK_FORMAT_UNDEFINED;
122bf215546Sopenharmony_ci}
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_cistatic bool
125bf215546Sopenharmony_ciradv_pipeline_has_color_attachments(const struct radv_rendering_info *ri_info)
126bf215546Sopenharmony_ci{
127bf215546Sopenharmony_ci   for (uint32_t i = 0; i < ri_info->color_att_count; ++i) {
128bf215546Sopenharmony_ci      if (ri_info->color_att_formats[i] != VK_FORMAT_UNDEFINED)
129bf215546Sopenharmony_ci         return true;
130bf215546Sopenharmony_ci   }
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci   return false;
133bf215546Sopenharmony_ci}
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_cistatic bool
136bf215546Sopenharmony_ciradv_pipeline_has_ngg(const struct radv_graphics_pipeline *pipeline)
137bf215546Sopenharmony_ci{
138bf215546Sopenharmony_ci   struct radv_shader *shader = pipeline->base.shaders[pipeline->last_vgt_api_stage];
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   return shader->info.is_ngg;
141bf215546Sopenharmony_ci}
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_cibool
144bf215546Sopenharmony_ciradv_pipeline_has_ngg_passthrough(const struct radv_graphics_pipeline *pipeline)
145bf215546Sopenharmony_ci{
146bf215546Sopenharmony_ci   assert(radv_pipeline_has_ngg(pipeline));
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   struct radv_shader *shader = pipeline->base.shaders[pipeline->last_vgt_api_stage];
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci   return shader->info.is_ngg_passthrough;
151bf215546Sopenharmony_ci}
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_cibool
154bf215546Sopenharmony_ciradv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline)
155bf215546Sopenharmony_ci{
156bf215546Sopenharmony_ci   return !!pipeline->gs_copy_shader;
157bf215546Sopenharmony_ci}
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_cistatic struct radv_pipeline_slab *
160bf215546Sopenharmony_ciradv_pipeline_slab_create(struct radv_device *device, struct radv_pipeline *pipeline,
161bf215546Sopenharmony_ci                          uint32_t code_size)
162bf215546Sopenharmony_ci{
163bf215546Sopenharmony_ci   struct radv_pipeline_slab *slab;
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   slab = calloc(1, sizeof(*slab));
166bf215546Sopenharmony_ci   if (!slab)
167bf215546Sopenharmony_ci      return NULL;
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   slab->ref_count = 1;
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   slab->alloc = radv_alloc_shader_memory(device, code_size, pipeline);
172bf215546Sopenharmony_ci   if (!slab->alloc) {
173bf215546Sopenharmony_ci      free(slab);
174bf215546Sopenharmony_ci      return NULL;
175bf215546Sopenharmony_ci   }
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci   return slab;
178bf215546Sopenharmony_ci}
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_civoid
181bf215546Sopenharmony_ciradv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab)
182bf215546Sopenharmony_ci{
183bf215546Sopenharmony_ci   if (!p_atomic_dec_zero(&slab->ref_count))
184bf215546Sopenharmony_ci      return;
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   radv_free_shader_memory(device, slab->alloc);
187bf215546Sopenharmony_ci   free(slab);
188bf215546Sopenharmony_ci}
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_civoid
191bf215546Sopenharmony_ciradv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
192bf215546Sopenharmony_ci                      const VkAllocationCallbacks *allocator)
193bf215546Sopenharmony_ci{
194bf215546Sopenharmony_ci   if (pipeline->type == RADV_PIPELINE_COMPUTE) {
195bf215546Sopenharmony_ci      struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci      free(compute_pipeline->rt_group_handles);
198bf215546Sopenharmony_ci      free(compute_pipeline->rt_stack_sizes);
199bf215546Sopenharmony_ci   } else if (pipeline->type == RADV_PIPELINE_LIBRARY) {
200bf215546Sopenharmony_ci      struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline);
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci      free(library_pipeline->groups);
203bf215546Sopenharmony_ci      for (uint32_t i = 0; i < library_pipeline->stage_count; i++) {
204bf215546Sopenharmony_ci         RADV_FROM_HANDLE(vk_shader_module, module, library_pipeline->stages[i].module);
205bf215546Sopenharmony_ci         if (module) {
206bf215546Sopenharmony_ci            vk_object_base_finish(&module->base);
207bf215546Sopenharmony_ci            ralloc_free(module);
208bf215546Sopenharmony_ci         }
209bf215546Sopenharmony_ci      }
210bf215546Sopenharmony_ci      free(library_pipeline->stages);
211bf215546Sopenharmony_ci      free(library_pipeline->identifiers);
212bf215546Sopenharmony_ci      free(library_pipeline->hashes);
213bf215546Sopenharmony_ci   }
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   if (pipeline->slab)
216bf215546Sopenharmony_ci      radv_pipeline_slab_destroy(device, pipeline->slab);
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci   for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
219bf215546Sopenharmony_ci      if (pipeline->shaders[i])
220bf215546Sopenharmony_ci         radv_shader_destroy(device, pipeline->shaders[i]);
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   if (pipeline->gs_copy_shader)
223bf215546Sopenharmony_ci      radv_shader_destroy(device, pipeline->gs_copy_shader);
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   if (pipeline->cs.buf)
226bf215546Sopenharmony_ci      free(pipeline->cs.buf);
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci   vk_object_base_finish(&pipeline->base);
229bf215546Sopenharmony_ci   vk_free2(&device->vk.alloc, allocator, pipeline);
230bf215546Sopenharmony_ci}
231bf215546Sopenharmony_ci
232bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
233bf215546Sopenharmony_ciradv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
234bf215546Sopenharmony_ci                     const VkAllocationCallbacks *pAllocator)
235bf215546Sopenharmony_ci{
236bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
237bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci   if (!_pipeline)
240bf215546Sopenharmony_ci      return;
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci   radv_pipeline_destroy(device, pipeline, pAllocator);
243bf215546Sopenharmony_ci}
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ciuint32_t
246bf215546Sopenharmony_ciradv_get_hash_flags(const struct radv_device *device, bool stats)
247bf215546Sopenharmony_ci{
248bf215546Sopenharmony_ci   uint32_t hash_flags = 0;
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   if (device->physical_device->use_ngg_culling)
251bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_USE_NGG_CULLING;
252bf215546Sopenharmony_ci   if (device->instance->perftest_flags & RADV_PERFTEST_EMULATE_RT)
253bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_EMULATE_RT;
254bf215546Sopenharmony_ci   if (device->physical_device->rt_wave_size == 64)
255bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_RT_WAVE64;
256bf215546Sopenharmony_ci   if (device->physical_device->cs_wave_size == 32)
257bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_CS_WAVE32;
258bf215546Sopenharmony_ci   if (device->physical_device->ps_wave_size == 32)
259bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
260bf215546Sopenharmony_ci   if (device->physical_device->ge_wave_size == 32)
261bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
262bf215546Sopenharmony_ci   if (device->physical_device->use_llvm)
263bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_LLVM;
264bf215546Sopenharmony_ci   if (stats)
265bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS;
266bf215546Sopenharmony_ci   if (device->robust_buffer_access) /* forces per-attribute vertex descriptors */
267bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS;
268bf215546Sopenharmony_ci   if (device->robust_buffer_access2) /* affects load/store vectorizer */
269bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2;
270bf215546Sopenharmony_ci   if (device->instance->debug_flags & RADV_DEBUG_SPLIT_FMA)
271bf215546Sopenharmony_ci      hash_flags |= RADV_HASH_SHADER_SPLIT_FMA;
272bf215546Sopenharmony_ci   return hash_flags;
273bf215546Sopenharmony_ci}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_cistatic void
276bf215546Sopenharmony_ciradv_pipeline_init_scratch(const struct radv_device *device, struct radv_pipeline *pipeline)
277bf215546Sopenharmony_ci{
278bf215546Sopenharmony_ci   unsigned scratch_bytes_per_wave = 0;
279bf215546Sopenharmony_ci   unsigned max_waves = 0;
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
282bf215546Sopenharmony_ci      if (pipeline->shaders[i] && pipeline->shaders[i]->config.scratch_bytes_per_wave) {
283bf215546Sopenharmony_ci         unsigned max_stage_waves = device->scratch_waves;
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci         scratch_bytes_per_wave =
286bf215546Sopenharmony_ci            MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave);
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci         max_stage_waves =
289bf215546Sopenharmony_ci            MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_cu *
290bf215546Sopenharmony_ci                 radv_get_max_waves(device, pipeline->shaders[i], i));
291bf215546Sopenharmony_ci         max_waves = MAX2(max_waves, max_stage_waves);
292bf215546Sopenharmony_ci      }
293bf215546Sopenharmony_ci   }
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
296bf215546Sopenharmony_ci   pipeline->max_waves = max_waves;
297bf215546Sopenharmony_ci}
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_cistatic uint32_t
300bf215546Sopenharmony_cisi_translate_blend_function(VkBlendOp op)
301bf215546Sopenharmony_ci{
302bf215546Sopenharmony_ci   switch (op) {
303bf215546Sopenharmony_ci   case VK_BLEND_OP_ADD:
304bf215546Sopenharmony_ci      return V_028780_COMB_DST_PLUS_SRC;
305bf215546Sopenharmony_ci   case VK_BLEND_OP_SUBTRACT:
306bf215546Sopenharmony_ci      return V_028780_COMB_SRC_MINUS_DST;
307bf215546Sopenharmony_ci   case VK_BLEND_OP_REVERSE_SUBTRACT:
308bf215546Sopenharmony_ci      return V_028780_COMB_DST_MINUS_SRC;
309bf215546Sopenharmony_ci   case VK_BLEND_OP_MIN:
310bf215546Sopenharmony_ci      return V_028780_COMB_MIN_DST_SRC;
311bf215546Sopenharmony_ci   case VK_BLEND_OP_MAX:
312bf215546Sopenharmony_ci      return V_028780_COMB_MAX_DST_SRC;
313bf215546Sopenharmony_ci   default:
314bf215546Sopenharmony_ci      return 0;
315bf215546Sopenharmony_ci   }
316bf215546Sopenharmony_ci}
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_cistatic uint32_t
319bf215546Sopenharmony_cisi_translate_blend_factor(enum amd_gfx_level gfx_level, VkBlendFactor factor)
320bf215546Sopenharmony_ci{
321bf215546Sopenharmony_ci   switch (factor) {
322bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ZERO:
323bf215546Sopenharmony_ci      return V_028780_BLEND_ZERO;
324bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE:
325bf215546Sopenharmony_ci      return V_028780_BLEND_ONE;
326bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_SRC_COLOR:
327bf215546Sopenharmony_ci      return V_028780_BLEND_SRC_COLOR;
328bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
329bf215546Sopenharmony_ci      return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
330bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_DST_COLOR:
331bf215546Sopenharmony_ci      return V_028780_BLEND_DST_COLOR;
332bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
333bf215546Sopenharmony_ci      return V_028780_BLEND_ONE_MINUS_DST_COLOR;
334bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_SRC_ALPHA:
335bf215546Sopenharmony_ci      return V_028780_BLEND_SRC_ALPHA;
336bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
337bf215546Sopenharmony_ci      return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
338bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_DST_ALPHA:
339bf215546Sopenharmony_ci      return V_028780_BLEND_DST_ALPHA;
340bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
341bf215546Sopenharmony_ci      return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
342bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_CONSTANT_COLOR:
343bf215546Sopenharmony_ci      return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11
344bf215546Sopenharmony_ci                                : V_028780_BLEND_CONSTANT_COLOR_GFX6;
345bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
346bf215546Sopenharmony_ci      return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11
347bf215546Sopenharmony_ci                                 : V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6;
348bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
349bf215546Sopenharmony_ci      return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11
350bf215546Sopenharmony_ci                                 : V_028780_BLEND_CONSTANT_ALPHA_GFX6;
351bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
352bf215546Sopenharmony_ci      return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11
353bf215546Sopenharmony_ci                                 : V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6;
354bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
355bf215546Sopenharmony_ci      return V_028780_BLEND_SRC_ALPHA_SATURATE;
356bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_SRC1_COLOR:
357bf215546Sopenharmony_ci      return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11 : V_028780_BLEND_SRC1_COLOR_GFX6;
358bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
359bf215546Sopenharmony_ci      return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11
360bf215546Sopenharmony_ci                                 : V_028780_BLEND_INV_SRC1_COLOR_GFX6;
361bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_SRC1_ALPHA:
362bf215546Sopenharmony_ci      return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11 : V_028780_BLEND_SRC1_ALPHA_GFX6;
363bf215546Sopenharmony_ci   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
364bf215546Sopenharmony_ci      return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11
365bf215546Sopenharmony_ci                                 : V_028780_BLEND_INV_SRC1_ALPHA_GFX6;
366bf215546Sopenharmony_ci   default:
367bf215546Sopenharmony_ci      return 0;
368bf215546Sopenharmony_ci   }
369bf215546Sopenharmony_ci}
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_cistatic uint32_t
372bf215546Sopenharmony_cisi_translate_blend_opt_function(unsigned op)
373bf215546Sopenharmony_ci{
374bf215546Sopenharmony_ci   switch (op) {
375bf215546Sopenharmony_ci   case V_028780_COMB_DST_PLUS_SRC:
376bf215546Sopenharmony_ci      return V_028760_OPT_COMB_ADD;
377bf215546Sopenharmony_ci   case V_028780_COMB_SRC_MINUS_DST:
378bf215546Sopenharmony_ci      return V_028760_OPT_COMB_SUBTRACT;
379bf215546Sopenharmony_ci   case V_028780_COMB_DST_MINUS_SRC:
380bf215546Sopenharmony_ci      return V_028760_OPT_COMB_REVSUBTRACT;
381bf215546Sopenharmony_ci   case V_028780_COMB_MIN_DST_SRC:
382bf215546Sopenharmony_ci      return V_028760_OPT_COMB_MIN;
383bf215546Sopenharmony_ci   case V_028780_COMB_MAX_DST_SRC:
384bf215546Sopenharmony_ci      return V_028760_OPT_COMB_MAX;
385bf215546Sopenharmony_ci   default:
386bf215546Sopenharmony_ci      return V_028760_OPT_COMB_BLEND_DISABLED;
387bf215546Sopenharmony_ci   }
388bf215546Sopenharmony_ci}
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_cistatic uint32_t
391bf215546Sopenharmony_cisi_translate_blend_opt_factor(unsigned factor, bool is_alpha)
392bf215546Sopenharmony_ci{
393bf215546Sopenharmony_ci   switch (factor) {
394bf215546Sopenharmony_ci   case V_028780_BLEND_ZERO:
395bf215546Sopenharmony_ci      return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
396bf215546Sopenharmony_ci   case V_028780_BLEND_ONE:
397bf215546Sopenharmony_ci      return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
398bf215546Sopenharmony_ci   case V_028780_BLEND_SRC_COLOR:
399bf215546Sopenharmony_ci      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
400bf215546Sopenharmony_ci                      : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
401bf215546Sopenharmony_ci   case V_028780_BLEND_ONE_MINUS_SRC_COLOR:
402bf215546Sopenharmony_ci      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
403bf215546Sopenharmony_ci                      : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
404bf215546Sopenharmony_ci   case V_028780_BLEND_SRC_ALPHA:
405bf215546Sopenharmony_ci      return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
406bf215546Sopenharmony_ci   case V_028780_BLEND_ONE_MINUS_SRC_ALPHA:
407bf215546Sopenharmony_ci      return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
408bf215546Sopenharmony_ci   case V_028780_BLEND_SRC_ALPHA_SATURATE:
409bf215546Sopenharmony_ci      return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
410bf215546Sopenharmony_ci                      : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
411bf215546Sopenharmony_ci   default:
412bf215546Sopenharmony_ci      return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
413bf215546Sopenharmony_ci   }
414bf215546Sopenharmony_ci}
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci/**
417bf215546Sopenharmony_ci * Get rid of DST in the blend factors by commuting the operands:
418bf215546Sopenharmony_ci *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
419bf215546Sopenharmony_ci */
420bf215546Sopenharmony_cistatic void
421bf215546Sopenharmony_cisi_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor,
422bf215546Sopenharmony_ci                    unsigned expected_dst, unsigned replacement_src)
423bf215546Sopenharmony_ci{
424bf215546Sopenharmony_ci   if (*src_factor == expected_dst && *dst_factor == V_028780_BLEND_ZERO) {
425bf215546Sopenharmony_ci      *src_factor = V_028780_BLEND_ZERO;
426bf215546Sopenharmony_ci      *dst_factor = replacement_src;
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci      /* Commuting the operands requires reversing subtractions. */
429bf215546Sopenharmony_ci      if (*func == V_028780_COMB_SRC_MINUS_DST)
430bf215546Sopenharmony_ci         *func = V_028780_COMB_DST_MINUS_SRC;
431bf215546Sopenharmony_ci      else if (*func == V_028780_COMB_DST_MINUS_SRC)
432bf215546Sopenharmony_ci         *func = V_028780_COMB_SRC_MINUS_DST;
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_cistatic bool
437bf215546Sopenharmony_cisi_blend_factor_uses_dst(unsigned factor)
438bf215546Sopenharmony_ci{
439bf215546Sopenharmony_ci   return factor == V_028780_BLEND_DST_COLOR ||
440bf215546Sopenharmony_ci          factor == V_028780_BLEND_DST_ALPHA ||
441bf215546Sopenharmony_ci          factor == V_028780_BLEND_SRC_ALPHA_SATURATE ||
442bf215546Sopenharmony_ci          factor == V_028780_BLEND_ONE_MINUS_DST_ALPHA ||
443bf215546Sopenharmony_ci          factor == V_028780_BLEND_ONE_MINUS_DST_COLOR;
444bf215546Sopenharmony_ci}
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_cistatic bool
447bf215546Sopenharmony_ciis_dual_src(enum amd_gfx_level gfx_level, unsigned factor)
448bf215546Sopenharmony_ci{
449bf215546Sopenharmony_ci   if (gfx_level >= GFX11) {
450bf215546Sopenharmony_ci      switch (factor) {
451bf215546Sopenharmony_ci      case V_028780_BLEND_SRC1_COLOR_GFX11:
452bf215546Sopenharmony_ci      case V_028780_BLEND_INV_SRC1_COLOR_GFX11:
453bf215546Sopenharmony_ci      case V_028780_BLEND_SRC1_ALPHA_GFX11:
454bf215546Sopenharmony_ci      case V_028780_BLEND_INV_SRC1_ALPHA_GFX11:
455bf215546Sopenharmony_ci         return true;
456bf215546Sopenharmony_ci      default:
457bf215546Sopenharmony_ci         return false;
458bf215546Sopenharmony_ci      }
459bf215546Sopenharmony_ci   } else {
460bf215546Sopenharmony_ci      switch (factor) {
461bf215546Sopenharmony_ci      case V_028780_BLEND_SRC1_COLOR_GFX6:
462bf215546Sopenharmony_ci      case V_028780_BLEND_INV_SRC1_COLOR_GFX6:
463bf215546Sopenharmony_ci      case V_028780_BLEND_SRC1_ALPHA_GFX6:
464bf215546Sopenharmony_ci      case V_028780_BLEND_INV_SRC1_ALPHA_GFX6:
465bf215546Sopenharmony_ci         return true;
466bf215546Sopenharmony_ci      default:
467bf215546Sopenharmony_ci         return false;
468bf215546Sopenharmony_ci      }
469bf215546Sopenharmony_ci   }
470bf215546Sopenharmony_ci}
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_cistatic unsigned
473bf215546Sopenharmony_ciradv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format,
474bf215546Sopenharmony_ci                             bool blend_enable, bool blend_need_alpha)
475bf215546Sopenharmony_ci{
476bf215546Sopenharmony_ci   const struct util_format_description *desc = vk_format_description(vk_format);
477bf215546Sopenharmony_ci   bool use_rbplus = device->physical_device->rad_info.rbplus_allowed;
478bf215546Sopenharmony_ci   struct ac_spi_color_formats formats = {0};
479bf215546Sopenharmony_ci   unsigned format, ntype, swap;
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci   format = radv_translate_colorformat(vk_format);
482bf215546Sopenharmony_ci   ntype = radv_translate_color_numformat(vk_format, desc,
483bf215546Sopenharmony_ci                                          vk_format_get_first_non_void_channel(vk_format));
484bf215546Sopenharmony_ci   swap = radv_translate_colorswap(vk_format, false);
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci   ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats);
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci   if (blend_enable && blend_need_alpha)
489bf215546Sopenharmony_ci      return formats.blend_alpha;
490bf215546Sopenharmony_ci   else if (blend_need_alpha)
491bf215546Sopenharmony_ci      return formats.alpha;
492bf215546Sopenharmony_ci   else if (blend_enable)
493bf215546Sopenharmony_ci      return formats.blend;
494bf215546Sopenharmony_ci   else
495bf215546Sopenharmony_ci      return formats.normal;
496bf215546Sopenharmony_ci}
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_cistatic bool
499bf215546Sopenharmony_ciformat_is_int8(VkFormat format)
500bf215546Sopenharmony_ci{
501bf215546Sopenharmony_ci   const struct util_format_description *desc = vk_format_description(format);
502bf215546Sopenharmony_ci   int channel = vk_format_get_first_non_void_channel(format);
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci   return channel >= 0 && desc->channel[channel].pure_integer && desc->channel[channel].size == 8;
505bf215546Sopenharmony_ci}
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_cistatic bool
508bf215546Sopenharmony_ciformat_is_int10(VkFormat format)
509bf215546Sopenharmony_ci{
510bf215546Sopenharmony_ci   const struct util_format_description *desc = vk_format_description(format);
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci   if (desc->nr_channels != 4)
513bf215546Sopenharmony_ci      return false;
514bf215546Sopenharmony_ci   for (unsigned i = 0; i < 4; i++) {
515bf215546Sopenharmony_ci      if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
516bf215546Sopenharmony_ci         return true;
517bf215546Sopenharmony_ci   }
518bf215546Sopenharmony_ci   return false;
519bf215546Sopenharmony_ci}
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_cistatic bool
522bf215546Sopenharmony_ciformat_is_float32(VkFormat format)
523bf215546Sopenharmony_ci{
524bf215546Sopenharmony_ci   const struct util_format_description *desc = vk_format_description(format);
525bf215546Sopenharmony_ci   int channel = vk_format_get_first_non_void_channel(format);
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci   return channel >= 0 &&
528bf215546Sopenharmony_ci          desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
529bf215546Sopenharmony_ci}
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_cistatic void
532bf215546Sopenharmony_ciradv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pipeline,
533bf215546Sopenharmony_ci                                        const VkGraphicsPipelineCreateInfo *pCreateInfo,
534bf215546Sopenharmony_ci                                        struct radv_blend_state *blend,
535bf215546Sopenharmony_ci                                        const struct radv_graphics_pipeline_info *info)
536bf215546Sopenharmony_ci{
537bf215546Sopenharmony_ci   unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0;
538bf215546Sopenharmony_ci   unsigned num_targets;
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->ri.color_att_count; ++i) {
541bf215546Sopenharmony_ci      unsigned cf;
542bf215546Sopenharmony_ci      VkFormat fmt = info->ri.color_att_formats[i];
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci      if (fmt == VK_FORMAT_UNDEFINED || !(blend->cb_target_mask & (0xfu << (i * 4)))) {
545bf215546Sopenharmony_ci         cf = V_028714_SPI_SHADER_ZERO;
546bf215546Sopenharmony_ci      } else {
547bf215546Sopenharmony_ci         bool blend_enable = blend->blend_enable_4bit & (0xfu << (i * 4));
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci         cf = radv_choose_spi_color_format(pipeline->base.device, fmt, blend_enable,
550bf215546Sopenharmony_ci                                           blend->need_src_alpha & (1 << i));
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci         if (format_is_int8(fmt))
553bf215546Sopenharmony_ci            is_int8 |= 1 << i;
554bf215546Sopenharmony_ci         if (format_is_int10(fmt))
555bf215546Sopenharmony_ci            is_int10 |= 1 << i;
556bf215546Sopenharmony_ci         if (format_is_float32(fmt))
557bf215546Sopenharmony_ci            is_float32 |= 1 << i;
558bf215546Sopenharmony_ci      }
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci      col_format |= cf << (4 * i);
561bf215546Sopenharmony_ci   }
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) {
564bf215546Sopenharmony_ci      /* When a subpass doesn't have any color attachments, write the
565bf215546Sopenharmony_ci       * alpha channel of MRT0 when alpha coverage is enabled because
566bf215546Sopenharmony_ci       * the depth attachment needs it.
567bf215546Sopenharmony_ci       */
568bf215546Sopenharmony_ci      col_format |= V_028714_SPI_SHADER_32_AR;
569bf215546Sopenharmony_ci   }
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci   /* If the i-th target format is set, all previous target formats must
572bf215546Sopenharmony_ci    * be non-zero to avoid hangs.
573bf215546Sopenharmony_ci    */
574bf215546Sopenharmony_ci   num_targets = (util_last_bit(col_format) + 3) / 4;
575bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_targets; i++) {
576bf215546Sopenharmony_ci      if (!(col_format & (0xfu << (i * 4)))) {
577bf215546Sopenharmony_ci         col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
578bf215546Sopenharmony_ci      }
579bf215546Sopenharmony_ci   }
580bf215546Sopenharmony_ci
581bf215546Sopenharmony_ci   /* The output for dual source blending should have the same format as
582bf215546Sopenharmony_ci    * the first output.
583bf215546Sopenharmony_ci    */
584bf215546Sopenharmony_ci   if (blend->mrt0_is_dual_src) {
585bf215546Sopenharmony_ci      assert(!(col_format >> 4));
586bf215546Sopenharmony_ci      col_format |= (col_format & 0xf) << 4;
587bf215546Sopenharmony_ci   }
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci   blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
590bf215546Sopenharmony_ci   blend->spi_shader_col_format = col_format;
591bf215546Sopenharmony_ci   blend->col_format_is_int8 = is_int8;
592bf215546Sopenharmony_ci   blend->col_format_is_int10 = is_int10;
593bf215546Sopenharmony_ci   blend->col_format_is_float32 = is_float32;
594bf215546Sopenharmony_ci}
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci/*
597bf215546Sopenharmony_ci * Ordered so that for each i,
598bf215546Sopenharmony_ci * radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i.
599bf215546Sopenharmony_ci */
600bf215546Sopenharmony_ciconst VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
601bf215546Sopenharmony_ci   VK_FORMAT_R32_SFLOAT,
602bf215546Sopenharmony_ci   VK_FORMAT_R32G32_SFLOAT,
603bf215546Sopenharmony_ci   VK_FORMAT_R8G8B8A8_UNORM,
604bf215546Sopenharmony_ci   VK_FORMAT_R16G16B16A16_UNORM,
605bf215546Sopenharmony_ci   VK_FORMAT_R16G16B16A16_SNORM,
606bf215546Sopenharmony_ci   VK_FORMAT_R16G16B16A16_UINT,
607bf215546Sopenharmony_ci   VK_FORMAT_R16G16B16A16_SINT,
608bf215546Sopenharmony_ci   VK_FORMAT_R32G32B32A32_SFLOAT,
609bf215546Sopenharmony_ci   VK_FORMAT_R8G8B8A8_UINT,
610bf215546Sopenharmony_ci   VK_FORMAT_R8G8B8A8_SINT,
611bf215546Sopenharmony_ci   VK_FORMAT_A2R10G10B10_UINT_PACK32,
612bf215546Sopenharmony_ci   VK_FORMAT_A2R10G10B10_SINT_PACK32,
613bf215546Sopenharmony_ci};
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_ciunsigned
616bf215546Sopenharmony_ciradv_format_meta_fs_key(struct radv_device *device, VkFormat format)
617bf215546Sopenharmony_ci{
618bf215546Sopenharmony_ci   unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
619bf215546Sopenharmony_ci   assert(col_format != V_028714_SPI_SHADER_32_AR);
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci   bool is_int8 = format_is_int8(format);
622bf215546Sopenharmony_ci   bool is_int10 = format_is_int10(format);
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_ci   if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
625bf215546Sopenharmony_ci      return 8;
626bf215546Sopenharmony_ci   else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
627bf215546Sopenharmony_ci      return 9;
628bf215546Sopenharmony_ci   else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
629bf215546Sopenharmony_ci      return 10;
630bf215546Sopenharmony_ci   else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
631bf215546Sopenharmony_ci      return 11;
632bf215546Sopenharmony_ci   else {
633bf215546Sopenharmony_ci      if (col_format >= V_028714_SPI_SHADER_32_AR)
634bf215546Sopenharmony_ci         --col_format; /* Skip V_028714_SPI_SHADER_32_AR  since there is no such VkFormat */
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci      --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
637bf215546Sopenharmony_ci      return col_format;
638bf215546Sopenharmony_ci   }
639bf215546Sopenharmony_ci}
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_cistatic void
642bf215546Sopenharmony_ciradv_blend_check_commutativity(enum amd_gfx_level gfx_level, struct radv_blend_state *blend,
643bf215546Sopenharmony_ci                               unsigned op, unsigned src, unsigned dst, unsigned chanmask)
644bf215546Sopenharmony_ci{
645bf215546Sopenharmony_ci   bool is_src_allowed = false;
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci   /* Src factor is allowed when it does not depend on Dst. */
648bf215546Sopenharmony_ci   if (src == V_028780_BLEND_ZERO ||
649bf215546Sopenharmony_ci       src == V_028780_BLEND_ONE ||
650bf215546Sopenharmony_ci       src == V_028780_BLEND_SRC_COLOR ||
651bf215546Sopenharmony_ci       src == V_028780_BLEND_SRC_ALPHA ||
652bf215546Sopenharmony_ci       src == V_028780_BLEND_SRC_ALPHA_SATURATE ||
653bf215546Sopenharmony_ci       src == V_028780_BLEND_ONE_MINUS_SRC_COLOR ||
654bf215546Sopenharmony_ci       src == V_028780_BLEND_ONE_MINUS_SRC_ALPHA) {
655bf215546Sopenharmony_ci      is_src_allowed = true;
656bf215546Sopenharmony_ci   }
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_ci   if (gfx_level >= GFX11) {
659bf215546Sopenharmony_ci      if (src == V_028780_BLEND_CONSTANT_COLOR_GFX11 ||
660bf215546Sopenharmony_ci          src == V_028780_BLEND_CONSTANT_ALPHA_GFX11 ||
661bf215546Sopenharmony_ci          src == V_028780_BLEND_SRC1_COLOR_GFX11 ||
662bf215546Sopenharmony_ci          src == V_028780_BLEND_SRC1_ALPHA_GFX11 ||
663bf215546Sopenharmony_ci          src == V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11 ||
664bf215546Sopenharmony_ci          src == V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11 ||
665bf215546Sopenharmony_ci          src == V_028780_BLEND_INV_SRC1_COLOR_GFX11 ||
666bf215546Sopenharmony_ci          src == V_028780_BLEND_INV_SRC1_ALPHA_GFX11) {
667bf215546Sopenharmony_ci         is_src_allowed = true;
668bf215546Sopenharmony_ci      }
669bf215546Sopenharmony_ci   } else {
670bf215546Sopenharmony_ci      if (src == V_028780_BLEND_CONSTANT_COLOR_GFX6 ||
671bf215546Sopenharmony_ci          src == V_028780_BLEND_CONSTANT_ALPHA_GFX6 ||
672bf215546Sopenharmony_ci          src == V_028780_BLEND_SRC1_COLOR_GFX6 ||
673bf215546Sopenharmony_ci          src == V_028780_BLEND_SRC1_ALPHA_GFX6 ||
674bf215546Sopenharmony_ci          src == V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6 ||
675bf215546Sopenharmony_ci          src == V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6 ||
676bf215546Sopenharmony_ci          src == V_028780_BLEND_INV_SRC1_COLOR_GFX6 ||
677bf215546Sopenharmony_ci          src == V_028780_BLEND_INV_SRC1_ALPHA_GFX6) {
678bf215546Sopenharmony_ci         is_src_allowed = true;
679bf215546Sopenharmony_ci      }
680bf215546Sopenharmony_ci   }
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci   if (dst == V_028780_BLEND_ONE && is_src_allowed) {
683bf215546Sopenharmony_ci      /* Addition is commutative, but floating point addition isn't
684bf215546Sopenharmony_ci       * associative: subtle changes can be introduced via different
685bf215546Sopenharmony_ci       * rounding. Be conservative, only enable for min and max.
686bf215546Sopenharmony_ci       */
687bf215546Sopenharmony_ci      if (op == V_028780_COMB_MAX_DST_SRC || op == V_028780_COMB_MIN_DST_SRC)
688bf215546Sopenharmony_ci         blend->commutative_4bit |= chanmask;
689bf215546Sopenharmony_ci   }
690bf215546Sopenharmony_ci}
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_cistatic struct radv_blend_state
693bf215546Sopenharmony_ciradv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
694bf215546Sopenharmony_ci                               const VkGraphicsPipelineCreateInfo *pCreateInfo,
695bf215546Sopenharmony_ci                               const struct radv_graphics_pipeline_info *info)
696bf215546Sopenharmony_ci{
697bf215546Sopenharmony_ci   const struct radv_device *device = pipeline->base.device;
698bf215546Sopenharmony_ci   struct radv_blend_state blend = {0};
699bf215546Sopenharmony_ci   unsigned cb_color_control = 0;
700bf215546Sopenharmony_ci   const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
701bf215546Sopenharmony_ci   int i;
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci   if (info->cb.logic_op_enable)
704bf215546Sopenharmony_ci      cb_color_control |= S_028808_ROP3(info->cb.logic_op);
705bf215546Sopenharmony_ci   else
706bf215546Sopenharmony_ci      cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY);
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci   if (device->instance->debug_flags & RADV_DEBUG_NO_ATOC_DITHERING)
709bf215546Sopenharmony_ci   {
710bf215546Sopenharmony_ci      blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
711bf215546Sopenharmony_ci                               S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
712bf215546Sopenharmony_ci                               S_028B70_OFFSET_ROUND(0);
713bf215546Sopenharmony_ci   }
714bf215546Sopenharmony_ci   else
715bf215546Sopenharmony_ci   {
716bf215546Sopenharmony_ci      blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
717bf215546Sopenharmony_ci                               S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
718bf215546Sopenharmony_ci                               S_028B70_OFFSET_ROUND(1);
719bf215546Sopenharmony_ci   }
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   if (info->ms.alpha_to_coverage_enable) {
722bf215546Sopenharmony_ci      blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
723bf215546Sopenharmony_ci      blend.need_src_alpha |= 0x1;
724bf215546Sopenharmony_ci   }
725bf215546Sopenharmony_ci
726bf215546Sopenharmony_ci   blend.cb_target_mask = 0;
727bf215546Sopenharmony_ci   for (i = 0; i < info->cb.att_count; i++) {
728bf215546Sopenharmony_ci      unsigned blend_cntl = 0;
729bf215546Sopenharmony_ci      unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
730bf215546Sopenharmony_ci      unsigned eqRGB = info->cb.att[i].color_blend_op;
731bf215546Sopenharmony_ci      unsigned srcRGB = info->cb.att[i].src_color_blend_factor;
732bf215546Sopenharmony_ci      unsigned dstRGB = info->cb.att[i].dst_color_blend_factor;
733bf215546Sopenharmony_ci      unsigned eqA = info->cb.att[i].alpha_blend_op;
734bf215546Sopenharmony_ci      unsigned srcA = info->cb.att[i].src_alpha_blend_factor;
735bf215546Sopenharmony_ci      unsigned dstA = info->cb.att[i].dst_alpha_blend_factor;
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci      blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
738bf215546Sopenharmony_ci                                  S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
739bf215546Sopenharmony_ci
740bf215546Sopenharmony_ci      if (!info->cb.att[i].color_write_mask)
741bf215546Sopenharmony_ci         continue;
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_ci      /* Ignore other blend targets if dual-source blending
744bf215546Sopenharmony_ci       * is enabled to prevent wrong behaviour.
745bf215546Sopenharmony_ci       */
746bf215546Sopenharmony_ci      if (blend.mrt0_is_dual_src)
747bf215546Sopenharmony_ci         continue;
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_ci      blend.cb_target_mask |= (unsigned)info->cb.att[i].color_write_mask << (4 * i);
750bf215546Sopenharmony_ci      blend.cb_target_enabled_4bit |= 0xfu << (4 * i);
751bf215546Sopenharmony_ci      if (!info->cb.att[i].blend_enable) {
752bf215546Sopenharmony_ci         blend.cb_blend_control[i] = blend_cntl;
753bf215546Sopenharmony_ci         continue;
754bf215546Sopenharmony_ci      }
755bf215546Sopenharmony_ci
756bf215546Sopenharmony_ci      if (is_dual_src(gfx_level, srcRGB) || is_dual_src(gfx_level, dstRGB) ||
757bf215546Sopenharmony_ci          is_dual_src(gfx_level, srcA) || is_dual_src(gfx_level, dstA))
758bf215546Sopenharmony_ci         if (i == 0)
759bf215546Sopenharmony_ci            blend.mrt0_is_dual_src = true;
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_ci      if (eqRGB == V_028780_COMB_MIN_DST_SRC || eqRGB == V_028780_COMB_MAX_DST_SRC) {
763bf215546Sopenharmony_ci         srcRGB = V_028780_BLEND_ONE;
764bf215546Sopenharmony_ci         dstRGB = V_028780_BLEND_ONE;
765bf215546Sopenharmony_ci      }
766bf215546Sopenharmony_ci      if (eqA == V_028780_COMB_MIN_DST_SRC || eqA == V_028780_COMB_MAX_DST_SRC) {
767bf215546Sopenharmony_ci         srcA = V_028780_BLEND_ONE;
768bf215546Sopenharmony_ci         dstA = V_028780_BLEND_ONE;
769bf215546Sopenharmony_ci      }
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_ci      radv_blend_check_commutativity(gfx_level, &blend, eqRGB, srcRGB, dstRGB, 0x7u << (4 * i));
772bf215546Sopenharmony_ci      radv_blend_check_commutativity(gfx_level, &blend, eqA, srcA, dstA, 0x8u << (4 * i));
773bf215546Sopenharmony_ci
774bf215546Sopenharmony_ci      /* Blending optimizations for RB+.
775bf215546Sopenharmony_ci       * These transformations don't change the behavior.
776bf215546Sopenharmony_ci       *
777bf215546Sopenharmony_ci       * First, get rid of DST in the blend factors:
778bf215546Sopenharmony_ci       *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
779bf215546Sopenharmony_ci       */
780bf215546Sopenharmony_ci      si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, V_028780_BLEND_DST_COLOR,
781bf215546Sopenharmony_ci                          V_028780_BLEND_SRC_COLOR);
782bf215546Sopenharmony_ci
783bf215546Sopenharmony_ci      si_blend_remove_dst(&eqA, &srcA, &dstA, V_028780_BLEND_DST_COLOR,
784bf215546Sopenharmony_ci                          V_028780_BLEND_SRC_COLOR);
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci      si_blend_remove_dst(&eqA, &srcA, &dstA, V_028780_BLEND_DST_ALPHA,
787bf215546Sopenharmony_ci                          V_028780_BLEND_SRC_ALPHA);
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci      /* Look up the ideal settings from tables. */
790bf215546Sopenharmony_ci      srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
791bf215546Sopenharmony_ci      dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
792bf215546Sopenharmony_ci      srcA_opt = si_translate_blend_opt_factor(srcA, true);
793bf215546Sopenharmony_ci      dstA_opt = si_translate_blend_opt_factor(dstA, true);
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci      /* Handle interdependencies. */
796bf215546Sopenharmony_ci      if (si_blend_factor_uses_dst(srcRGB))
797bf215546Sopenharmony_ci         dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
798bf215546Sopenharmony_ci      if (si_blend_factor_uses_dst(srcA))
799bf215546Sopenharmony_ci         dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
800bf215546Sopenharmony_ci
801bf215546Sopenharmony_ci      if (srcRGB == V_028780_BLEND_SRC_ALPHA_SATURATE &&
802bf215546Sopenharmony_ci          (dstRGB == V_028780_BLEND_ZERO || dstRGB == V_028780_BLEND_SRC_ALPHA ||
803bf215546Sopenharmony_ci           dstRGB == V_028780_BLEND_SRC_ALPHA_SATURATE))
804bf215546Sopenharmony_ci         dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci      /* Set the final value. */
807bf215546Sopenharmony_ci      blend.sx_mrt_blend_opt[i] =
808bf215546Sopenharmony_ci         S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) |
809bf215546Sopenharmony_ci         S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
810bf215546Sopenharmony_ci         S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |
811bf215546Sopenharmony_ci         S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
812bf215546Sopenharmony_ci      blend_cntl |= S_028780_ENABLE(1);
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci      blend_cntl |= S_028780_COLOR_COMB_FCN(eqRGB);
815bf215546Sopenharmony_ci      blend_cntl |= S_028780_COLOR_SRCBLEND(srcRGB);
816bf215546Sopenharmony_ci      blend_cntl |= S_028780_COLOR_DESTBLEND(dstRGB);
817bf215546Sopenharmony_ci      if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
818bf215546Sopenharmony_ci         blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
819bf215546Sopenharmony_ci         blend_cntl |= S_028780_ALPHA_COMB_FCN(eqA);
820bf215546Sopenharmony_ci         blend_cntl |= S_028780_ALPHA_SRCBLEND(srcA);
821bf215546Sopenharmony_ci         blend_cntl |= S_028780_ALPHA_DESTBLEND(dstA);
822bf215546Sopenharmony_ci      }
823bf215546Sopenharmony_ci      blend.cb_blend_control[i] = blend_cntl;
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_ci      blend.blend_enable_4bit |= 0xfu << (i * 4);
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci      if (srcRGB == V_028780_BLEND_SRC_ALPHA || dstRGB == V_028780_BLEND_SRC_ALPHA ||
828bf215546Sopenharmony_ci          srcRGB == V_028780_BLEND_SRC_ALPHA_SATURATE ||
829bf215546Sopenharmony_ci          dstRGB == V_028780_BLEND_SRC_ALPHA_SATURATE ||
830bf215546Sopenharmony_ci          srcRGB == V_028780_BLEND_ONE_MINUS_SRC_ALPHA ||
831bf215546Sopenharmony_ci          dstRGB == V_028780_BLEND_ONE_MINUS_SRC_ALPHA)
832bf215546Sopenharmony_ci         blend.need_src_alpha |= 1 << i;
833bf215546Sopenharmony_ci   }
834bf215546Sopenharmony_ci   for (i = info->cb.att_count; i < 8; i++) {
835bf215546Sopenharmony_ci      blend.cb_blend_control[i] = 0;
836bf215546Sopenharmony_ci      blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
837bf215546Sopenharmony_ci                                  S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
838bf215546Sopenharmony_ci   }
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci   if (device->physical_device->rad_info.has_rbplus) {
841bf215546Sopenharmony_ci      /* Disable RB+ blend optimizations for dual source blending. */
842bf215546Sopenharmony_ci      if (blend.mrt0_is_dual_src) {
843bf215546Sopenharmony_ci         for (i = 0; i < 8; i++) {
844bf215546Sopenharmony_ci            blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
845bf215546Sopenharmony_ci                                        S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
846bf215546Sopenharmony_ci         }
847bf215546Sopenharmony_ci      }
848bf215546Sopenharmony_ci
849bf215546Sopenharmony_ci      /* RB+ doesn't work with dual source blending, logic op and
850bf215546Sopenharmony_ci       * RESOLVE.
851bf215546Sopenharmony_ci       */
852bf215546Sopenharmony_ci      if (blend.mrt0_is_dual_src || info->cb.logic_op_enable ||
853bf215546Sopenharmony_ci          (device->physical_device->rad_info.gfx_level >= GFX11 && blend.blend_enable_4bit))
854bf215546Sopenharmony_ci         cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);
855bf215546Sopenharmony_ci   }
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci   if (blend.cb_target_mask)
858bf215546Sopenharmony_ci      cb_color_control |= S_028808_MODE(V_028808_CB_NORMAL);
859bf215546Sopenharmony_ci   else
860bf215546Sopenharmony_ci      cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo, &blend, info);
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci   pipeline->cb_color_control = cb_color_control;
865bf215546Sopenharmony_ci
866bf215546Sopenharmony_ci   return blend;
867bf215546Sopenharmony_ci}
868bf215546Sopenharmony_ci
869bf215546Sopenharmony_cistatic uint32_t
870bf215546Sopenharmony_cisi_translate_fill(VkPolygonMode func)
871bf215546Sopenharmony_ci{
872bf215546Sopenharmony_ci   switch (func) {
873bf215546Sopenharmony_ci   case VK_POLYGON_MODE_FILL:
874bf215546Sopenharmony_ci      return V_028814_X_DRAW_TRIANGLES;
875bf215546Sopenharmony_ci   case VK_POLYGON_MODE_LINE:
876bf215546Sopenharmony_ci      return V_028814_X_DRAW_LINES;
877bf215546Sopenharmony_ci   case VK_POLYGON_MODE_POINT:
878bf215546Sopenharmony_ci      return V_028814_X_DRAW_POINTS;
879bf215546Sopenharmony_ci   default:
880bf215546Sopenharmony_ci      assert(0);
881bf215546Sopenharmony_ci      return V_028814_X_DRAW_POINTS;
882bf215546Sopenharmony_ci   }
883bf215546Sopenharmony_ci}
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_cistatic unsigned
886bf215546Sopenharmony_ciradv_pipeline_color_samples( const struct radv_graphics_pipeline_info *info)
887bf215546Sopenharmony_ci{
888bf215546Sopenharmony_ci   if (info->color_att_samples && radv_pipeline_has_color_attachments(&info->ri)) {
889bf215546Sopenharmony_ci      return info->color_att_samples;
890bf215546Sopenharmony_ci   }
891bf215546Sopenharmony_ci
892bf215546Sopenharmony_ci   return info->ms.raster_samples;
893bf215546Sopenharmony_ci}
894bf215546Sopenharmony_ci
895bf215546Sopenharmony_cistatic unsigned
896bf215546Sopenharmony_ciradv_pipeline_depth_samples(const struct radv_graphics_pipeline_info *info)
897bf215546Sopenharmony_ci{
898bf215546Sopenharmony_ci   if (info->ds_att_samples && radv_pipeline_has_ds_attachments(&info->ri)) {
899bf215546Sopenharmony_ci      return info->ds_att_samples;
900bf215546Sopenharmony_ci   }
901bf215546Sopenharmony_ci
902bf215546Sopenharmony_ci   return info->ms.raster_samples;
903bf215546Sopenharmony_ci}
904bf215546Sopenharmony_ci
905bf215546Sopenharmony_cistatic uint8_t
906bf215546Sopenharmony_ciradv_pipeline_get_ps_iter_samples(const struct radv_graphics_pipeline_info *info)
907bf215546Sopenharmony_ci{
908bf215546Sopenharmony_ci   uint32_t ps_iter_samples = 1;
909bf215546Sopenharmony_ci   uint32_t num_samples = radv_pipeline_color_samples(info);
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci   if (info->ms.sample_shading_enable) {
912bf215546Sopenharmony_ci      ps_iter_samples = ceilf(info->ms.min_sample_shading * num_samples);
913bf215546Sopenharmony_ci      ps_iter_samples = util_next_power_of_two(ps_iter_samples);
914bf215546Sopenharmony_ci   }
915bf215546Sopenharmony_ci   return ps_iter_samples;
916bf215546Sopenharmony_ci}
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_cistatic bool
919bf215546Sopenharmony_ciradv_is_depth_write_enabled(const struct radv_depth_stencil_info *ds_info)
920bf215546Sopenharmony_ci{
921bf215546Sopenharmony_ci   return ds_info->depth_test_enable && ds_info->depth_write_enable &&
922bf215546Sopenharmony_ci          ds_info->depth_compare_op != VK_COMPARE_OP_NEVER;
923bf215546Sopenharmony_ci}
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_cistatic bool
926bf215546Sopenharmony_ciradv_writes_stencil(const struct radv_stencil_op_info *info)
927bf215546Sopenharmony_ci{
928bf215546Sopenharmony_ci   return info->write_mask &&
929bf215546Sopenharmony_ci          (info->fail_op != VK_STENCIL_OP_KEEP || info->pass_op != VK_STENCIL_OP_KEEP ||
930bf215546Sopenharmony_ci           info->depth_fail_op != VK_STENCIL_OP_KEEP);
931bf215546Sopenharmony_ci}
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_cistatic bool
934bf215546Sopenharmony_ciradv_is_stencil_write_enabled(const struct radv_depth_stencil_info *ds_info)
935bf215546Sopenharmony_ci{
936bf215546Sopenharmony_ci   return ds_info->stencil_test_enable &&
937bf215546Sopenharmony_ci          (radv_writes_stencil(&ds_info->front) || radv_writes_stencil(&ds_info->back));
938bf215546Sopenharmony_ci}
939bf215546Sopenharmony_ci
940bf215546Sopenharmony_cistatic bool
941bf215546Sopenharmony_ciradv_order_invariant_stencil_op(VkStencilOp op)
942bf215546Sopenharmony_ci{
943bf215546Sopenharmony_ci   /* REPLACE is normally order invariant, except when the stencil
944bf215546Sopenharmony_ci    * reference value is written by the fragment shader. Tracking this
945bf215546Sopenharmony_ci    * interaction does not seem worth the effort, so be conservative.
946bf215546Sopenharmony_ci    */
947bf215546Sopenharmony_ci   return op != VK_STENCIL_OP_INCREMENT_AND_CLAMP && op != VK_STENCIL_OP_DECREMENT_AND_CLAMP &&
948bf215546Sopenharmony_ci          op != VK_STENCIL_OP_REPLACE;
949bf215546Sopenharmony_ci}
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_cistatic bool
952bf215546Sopenharmony_ciradv_order_invariant_stencil_state(const struct radv_stencil_op_info *info)
953bf215546Sopenharmony_ci{
954bf215546Sopenharmony_ci   /* Compute whether, assuming Z writes are disabled, this stencil state
955bf215546Sopenharmony_ci    * is order invariant in the sense that the set of passing fragments as
956bf215546Sopenharmony_ci    * well as the final stencil buffer result does not depend on the order
957bf215546Sopenharmony_ci    * of fragments.
958bf215546Sopenharmony_ci    */
959bf215546Sopenharmony_ci   return !info->write_mask ||
960bf215546Sopenharmony_ci          /* The following assumes that Z writes are disabled. */
961bf215546Sopenharmony_ci          (info->compare_op == VK_COMPARE_OP_ALWAYS &&
962bf215546Sopenharmony_ci           radv_order_invariant_stencil_op(info->pass_op) &&
963bf215546Sopenharmony_ci           radv_order_invariant_stencil_op(info->depth_fail_op)) ||
964bf215546Sopenharmony_ci          (info->compare_op == VK_COMPARE_OP_NEVER &&
965bf215546Sopenharmony_ci           radv_order_invariant_stencil_op(info->fail_op));
966bf215546Sopenharmony_ci}
967bf215546Sopenharmony_ci
968bf215546Sopenharmony_cistatic bool
969bf215546Sopenharmony_ciradv_pipeline_has_dynamic_ds_states(const struct radv_graphics_pipeline *pipeline)
970bf215546Sopenharmony_ci{
971bf215546Sopenharmony_ci   return !!(pipeline->dynamic_states & (RADV_DYNAMIC_DEPTH_TEST_ENABLE |
972bf215546Sopenharmony_ci                                         RADV_DYNAMIC_DEPTH_WRITE_ENABLE |
973bf215546Sopenharmony_ci                                         RADV_DYNAMIC_DEPTH_COMPARE_OP |
974bf215546Sopenharmony_ci                                         RADV_DYNAMIC_STENCIL_TEST_ENABLE |
975bf215546Sopenharmony_ci                                         RADV_DYNAMIC_STENCIL_WRITE_MASK |
976bf215546Sopenharmony_ci                                         RADV_DYNAMIC_STENCIL_OP));
977bf215546Sopenharmony_ci}
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_cistatic bool
980bf215546Sopenharmony_ciradv_pipeline_out_of_order_rast(struct radv_graphics_pipeline *pipeline,
981bf215546Sopenharmony_ci                                const struct radv_blend_state *blend,
982bf215546Sopenharmony_ci                                const struct radv_graphics_pipeline_info *info)
983bf215546Sopenharmony_ci{
984bf215546Sopenharmony_ci   unsigned colormask = blend->cb_target_enabled_4bit;
985bf215546Sopenharmony_ci
986bf215546Sopenharmony_ci   if (!pipeline->base.device->physical_device->out_of_order_rast_allowed)
987bf215546Sopenharmony_ci      return false;
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci   /* Be conservative if a logic operation is enabled with color buffers. */
990bf215546Sopenharmony_ci   if (colormask && info->cb.logic_op_enable)
991bf215546Sopenharmony_ci      return false;
992bf215546Sopenharmony_ci
993bf215546Sopenharmony_ci   /* Be conservative if an extended dynamic depth/stencil state is
994bf215546Sopenharmony_ci    * enabled because the driver can't update out-of-order rasterization
995bf215546Sopenharmony_ci    * dynamically.
996bf215546Sopenharmony_ci    */
997bf215546Sopenharmony_ci   if (radv_pipeline_has_dynamic_ds_states(pipeline))
998bf215546Sopenharmony_ci      return false;
999bf215546Sopenharmony_ci
1000bf215546Sopenharmony_ci   /* Default depth/stencil invariance when no attachment is bound. */
1001bf215546Sopenharmony_ci   struct radv_dsa_order_invariance dsa_order_invariant = {.zs = true, .pass_set = true};
1002bf215546Sopenharmony_ci
1003bf215546Sopenharmony_ci   bool has_stencil = info->ri.stencil_att_format != VK_FORMAT_UNDEFINED;
1004bf215546Sopenharmony_ci   struct radv_dsa_order_invariance order_invariance[2];
1005bf215546Sopenharmony_ci   struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
1006bf215546Sopenharmony_ci
1007bf215546Sopenharmony_ci   /* Compute depth/stencil order invariance in order to know if
1008bf215546Sopenharmony_ci    * it's safe to enable out-of-order.
1009bf215546Sopenharmony_ci    */
1010bf215546Sopenharmony_ci   bool zfunc_is_ordered = info->ds.depth_compare_op == VK_COMPARE_OP_NEVER ||
1011bf215546Sopenharmony_ci                           info->ds.depth_compare_op == VK_COMPARE_OP_LESS ||
1012bf215546Sopenharmony_ci                           info->ds.depth_compare_op == VK_COMPARE_OP_LESS_OR_EQUAL ||
1013bf215546Sopenharmony_ci                           info->ds.depth_compare_op == VK_COMPARE_OP_GREATER ||
1014bf215546Sopenharmony_ci                           info->ds.depth_compare_op == VK_COMPARE_OP_GREATER_OR_EQUAL;
1015bf215546Sopenharmony_ci   bool depth_write_enabled = radv_is_depth_write_enabled(&info->ds);
1016bf215546Sopenharmony_ci   bool stencil_write_enabled = radv_is_stencil_write_enabled(&info->ds);
1017bf215546Sopenharmony_ci   bool ds_write_enabled = depth_write_enabled || stencil_write_enabled;
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_ci   bool nozwrite_and_order_invariant_stencil =
1020bf215546Sopenharmony_ci      !ds_write_enabled ||
1021bf215546Sopenharmony_ci      (!depth_write_enabled && radv_order_invariant_stencil_state(&info->ds.front) &&
1022bf215546Sopenharmony_ci       radv_order_invariant_stencil_state(&info->ds.back));
1023bf215546Sopenharmony_ci
1024bf215546Sopenharmony_ci   order_invariance[1].zs = nozwrite_and_order_invariant_stencil ||
1025bf215546Sopenharmony_ci                            (!stencil_write_enabled && zfunc_is_ordered);
1026bf215546Sopenharmony_ci   order_invariance[0].zs = !depth_write_enabled || zfunc_is_ordered;
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci   order_invariance[1].pass_set =
1029bf215546Sopenharmony_ci      nozwrite_and_order_invariant_stencil ||
1030bf215546Sopenharmony_ci      (!stencil_write_enabled &&
1031bf215546Sopenharmony_ci       (info->ds.depth_compare_op == VK_COMPARE_OP_ALWAYS ||
1032bf215546Sopenharmony_ci        info->ds.depth_compare_op == VK_COMPARE_OP_NEVER));
1033bf215546Sopenharmony_ci   order_invariance[0].pass_set =
1034bf215546Sopenharmony_ci      !depth_write_enabled ||
1035bf215546Sopenharmony_ci      (info->ds.depth_compare_op == VK_COMPARE_OP_ALWAYS ||
1036bf215546Sopenharmony_ci       info->ds.depth_compare_op == VK_COMPARE_OP_NEVER);
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci   dsa_order_invariant = order_invariance[has_stencil];
1039bf215546Sopenharmony_ci   if (!dsa_order_invariant.zs)
1040bf215546Sopenharmony_ci      return false;
1041bf215546Sopenharmony_ci
1042bf215546Sopenharmony_ci   /* The set of PS invocations is always order invariant,
1043bf215546Sopenharmony_ci    * except when early Z/S tests are requested.
1044bf215546Sopenharmony_ci    */
1045bf215546Sopenharmony_ci   if (ps && ps->info.ps.writes_memory && ps->info.ps.early_fragment_test &&
1046bf215546Sopenharmony_ci       !dsa_order_invariant.pass_set)
1047bf215546Sopenharmony_ci      return false;
1048bf215546Sopenharmony_ci
1049bf215546Sopenharmony_ci   /* Determine if out-of-order rasterization should be disabled when occlusion queries are used. */
1050bf215546Sopenharmony_ci   pipeline->disable_out_of_order_rast_for_occlusion = !dsa_order_invariant.pass_set;
1051bf215546Sopenharmony_ci
1052bf215546Sopenharmony_ci   /* No color buffers are enabled for writing. */
1053bf215546Sopenharmony_ci   if (!colormask)
1054bf215546Sopenharmony_ci      return true;
1055bf215546Sopenharmony_ci
1056bf215546Sopenharmony_ci   unsigned blendmask = colormask & blend->blend_enable_4bit;
1057bf215546Sopenharmony_ci
1058bf215546Sopenharmony_ci   if (blendmask) {
1059bf215546Sopenharmony_ci      /* Only commutative blending. */
1060bf215546Sopenharmony_ci      if (blendmask & ~blend->commutative_4bit)
1061bf215546Sopenharmony_ci         return false;
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_ci      if (!dsa_order_invariant.pass_set)
1064bf215546Sopenharmony_ci         return false;
1065bf215546Sopenharmony_ci   }
1066bf215546Sopenharmony_ci
1067bf215546Sopenharmony_ci   if (colormask & ~blendmask)
1068bf215546Sopenharmony_ci      return false;
1069bf215546Sopenharmony_ci
1070bf215546Sopenharmony_ci   return true;
1071bf215546Sopenharmony_ci}
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_cistatic void
1074bf215546Sopenharmony_ciradv_pipeline_init_multisample_state(struct radv_graphics_pipeline *pipeline,
1075bf215546Sopenharmony_ci                                     const struct radv_blend_state *blend,
1076bf215546Sopenharmony_ci                                     const struct radv_graphics_pipeline_info *info,
1077bf215546Sopenharmony_ci                                     unsigned rast_prim)
1078bf215546Sopenharmony_ci{
1079bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
1080bf215546Sopenharmony_ci   struct radv_multisample_state *ms = &pipeline->ms;
1081bf215546Sopenharmony_ci   unsigned num_tile_pipes = pdevice->rad_info.num_tile_pipes;
1082bf215546Sopenharmony_ci   const VkConservativeRasterizationModeEXT mode = info->rs.conservative_mode;
1083bf215546Sopenharmony_ci   bool out_of_order_rast = false;
1084bf215546Sopenharmony_ci   int ps_iter_samples = 1;
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci   ms->num_samples = info->ms.raster_samples;
1087bf215546Sopenharmony_ci
1088bf215546Sopenharmony_ci   /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
1089bf215546Sopenharmony_ci    *
1090bf215546Sopenharmony_ci    * "Sample shading is enabled for a graphics pipeline:
1091bf215546Sopenharmony_ci    *
1092bf215546Sopenharmony_ci    * - If the interface of the fragment shader entry point of the
1093bf215546Sopenharmony_ci    *   graphics pipeline includes an input variable decorated
1094bf215546Sopenharmony_ci    *   with SampleId or SamplePosition. In this case
1095bf215546Sopenharmony_ci    *   minSampleShadingFactor takes the value 1.0.
1096bf215546Sopenharmony_ci    * - Else if the sampleShadingEnable member of the
1097bf215546Sopenharmony_ci    *   VkPipelineMultisampleStateCreateInfo structure specified
1098bf215546Sopenharmony_ci    *   when creating the graphics pipeline is set to VK_TRUE. In
1099bf215546Sopenharmony_ci    *   this case minSampleShadingFactor takes the value of
1100bf215546Sopenharmony_ci    *   VkPipelineMultisampleStateCreateInfo::minSampleShading.
1101bf215546Sopenharmony_ci    *
1102bf215546Sopenharmony_ci    * Otherwise, sample shading is considered disabled."
1103bf215546Sopenharmony_ci    */
1104bf215546Sopenharmony_ci   if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.uses_sample_shading) {
1105bf215546Sopenharmony_ci      ps_iter_samples = ms->num_samples;
1106bf215546Sopenharmony_ci   } else {
1107bf215546Sopenharmony_ci      ps_iter_samples = radv_pipeline_get_ps_iter_samples(info);
1108bf215546Sopenharmony_ci   }
1109bf215546Sopenharmony_ci
1110bf215546Sopenharmony_ci   if (info->rs.order == VK_RASTERIZATION_ORDER_RELAXED_AMD) {
1111bf215546Sopenharmony_ci      /* Out-of-order rasterization is explicitly enabled by the
1112bf215546Sopenharmony_ci       * application.
1113bf215546Sopenharmony_ci       */
1114bf215546Sopenharmony_ci      out_of_order_rast = true;
1115bf215546Sopenharmony_ci   } else {
1116bf215546Sopenharmony_ci      /* Determine if the driver can enable out-of-order
1117bf215546Sopenharmony_ci       * rasterization internally.
1118bf215546Sopenharmony_ci       */
1119bf215546Sopenharmony_ci      out_of_order_rast = radv_pipeline_out_of_order_rast(pipeline, blend, info);
1120bf215546Sopenharmony_ci   }
1121bf215546Sopenharmony_ci
1122bf215546Sopenharmony_ci   ms->pa_sc_aa_config = 0;
1123bf215546Sopenharmony_ci   ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) |
1124bf215546Sopenharmony_ci                 S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
1125bf215546Sopenharmony_ci
1126bf215546Sopenharmony_ci   /* Adjust MSAA state if conservative rasterization is enabled. */
1127bf215546Sopenharmony_ci   if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
1128bf215546Sopenharmony_ci      ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1);
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci      ms->db_eqaa |=
1131bf215546Sopenharmony_ci         S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | S_028804_OVERRASTERIZATION_AMOUNT(4);
1132bf215546Sopenharmony_ci   }
1133bf215546Sopenharmony_ci
1134bf215546Sopenharmony_ci   ms->pa_sc_mode_cntl_1 =
1135bf215546Sopenharmony_ci      S_028A4C_WALK_FENCE_ENABLE(1) | // TODO linear dst fixes
1136bf215546Sopenharmony_ci      S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
1137bf215546Sopenharmony_ci      S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
1138bf215546Sopenharmony_ci      S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
1139bf215546Sopenharmony_ci      /* always 1: */
1140bf215546Sopenharmony_ci      S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
1141bf215546Sopenharmony_ci      S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
1142bf215546Sopenharmony_ci      S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1);
1143bf215546Sopenharmony_ci   ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pdevice->rad_info.gfx_level >= GFX9) |
1144bf215546Sopenharmony_ci                           S_028A48_VPORT_SCISSOR_ENABLE(1) |
1145bf215546Sopenharmony_ci                           S_028A48_LINE_STIPPLE_ENABLE(info->rs.stippled_line_enable);
1146bf215546Sopenharmony_ci
1147bf215546Sopenharmony_ci   if (info->rs.line_raster_mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT &&
1148bf215546Sopenharmony_ci       radv_rast_prim_is_line(rast_prim)) {
1149bf215546Sopenharmony_ci      /* From the Vulkan spec 1.3.221:
1150bf215546Sopenharmony_ci       *
1151bf215546Sopenharmony_ci       * "When Bresenham lines are being rasterized, sample locations may all be treated as being at
1152bf215546Sopenharmony_ci       * the pixel center (this may affect attribute and depth interpolation)."
1153bf215546Sopenharmony_ci       *
1154bf215546Sopenharmony_ci       * "One consequence of this is that Bresenham lines cover the same pixels regardless of the
1155bf215546Sopenharmony_ci       * number of rasterization samples, and cover all samples in those pixels (unless masked out
1156bf215546Sopenharmony_ci       * or killed)."
1157bf215546Sopenharmony_ci       */
1158bf215546Sopenharmony_ci      ms->num_samples = 1;
1159bf215546Sopenharmony_ci   }
1160bf215546Sopenharmony_ci
1161bf215546Sopenharmony_ci   if (ms->num_samples > 1) {
1162bf215546Sopenharmony_ci      uint32_t z_samples = radv_pipeline_depth_samples(info);
1163bf215546Sopenharmony_ci      unsigned log_samples = util_logbase2(ms->num_samples);
1164bf215546Sopenharmony_ci      unsigned log_z_samples = util_logbase2(z_samples);
1165bf215546Sopenharmony_ci      unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
1166bf215546Sopenharmony_ci      ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
1167bf215546Sopenharmony_ci      ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
1168bf215546Sopenharmony_ci                     S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
1169bf215546Sopenharmony_ci                     S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
1170bf215546Sopenharmony_ci                     S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
1171bf215546Sopenharmony_ci      ms->pa_sc_aa_config |=
1172bf215546Sopenharmony_ci         S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
1173bf215546Sopenharmony_ci         S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) |
1174bf215546Sopenharmony_ci         S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */
1175bf215546Sopenharmony_ci         S_028BE0_COVERED_CENTROID_IS_CENTER(pdevice->rad_info.gfx_level >= GFX10_3);
1176bf215546Sopenharmony_ci      ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
1177bf215546Sopenharmony_ci      if (ps_iter_samples > 1)
1178bf215546Sopenharmony_ci         pipeline->spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
1179bf215546Sopenharmony_ci   }
1180bf215546Sopenharmony_ci
1181bf215546Sopenharmony_ci   ms->pa_sc_aa_mask[0] = info->ms.sample_mask | ((uint32_t)info->ms.sample_mask << 16);
1182bf215546Sopenharmony_ci   ms->pa_sc_aa_mask[1] = info->ms.sample_mask | ((uint32_t)info->ms.sample_mask << 16);
1183bf215546Sopenharmony_ci}
1184bf215546Sopenharmony_ci
1185bf215546Sopenharmony_cistatic void
1186bf215546Sopenharmony_cigfx103_pipeline_init_vrs_state(struct radv_graphics_pipeline *pipeline,
1187bf215546Sopenharmony_ci                               const struct radv_graphics_pipeline_info *info)
1188bf215546Sopenharmony_ci{
1189bf215546Sopenharmony_ci   struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
1190bf215546Sopenharmony_ci   struct radv_multisample_state *ms = &pipeline->ms;
1191bf215546Sopenharmony_ci   struct radv_vrs_state *vrs = &pipeline->vrs;
1192bf215546Sopenharmony_ci
1193bf215546Sopenharmony_ci   if (info->ms.sample_shading_enable ||
1194bf215546Sopenharmony_ci       ps->info.ps.uses_sample_shading || ps->info.ps.reads_sample_mask_in) {
1195bf215546Sopenharmony_ci      /* Disable VRS and use the rates from PS_ITER_SAMPLES if:
1196bf215546Sopenharmony_ci       *
1197bf215546Sopenharmony_ci       * 1) sample shading is enabled or per-sample interpolation is
1198bf215546Sopenharmony_ci       *    used by the fragment shader
1199bf215546Sopenharmony_ci       * 2) the fragment shader reads gl_SampleMaskIn because the
1200bf215546Sopenharmony_ci       *    16-bit sample coverage mask isn't enough for MSAA8x and
1201bf215546Sopenharmony_ci       *    2x2 coarse shading isn't enough.
1202bf215546Sopenharmony_ci       */
1203bf215546Sopenharmony_ci      vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE);
1204bf215546Sopenharmony_ci
1205bf215546Sopenharmony_ci      /* Make sure sample shading is enabled even if only MSAA1x is
1206bf215546Sopenharmony_ci       * used because the SAMPLE_ITER combiner is in passthrough
1207bf215546Sopenharmony_ci       * mode if PS_ITER_SAMPLE is 0, and it uses the per-draw rate.
1208bf215546Sopenharmony_ci       * The default VRS rate when sample shading is enabled is 1x1.
1209bf215546Sopenharmony_ci       */
1210bf215546Sopenharmony_ci      if (!G_028A4C_PS_ITER_SAMPLE(ms->pa_sc_mode_cntl_1))
1211bf215546Sopenharmony_ci         ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1);
1212bf215546Sopenharmony_ci   } else {
1213bf215546Sopenharmony_ci      vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
1214bf215546Sopenharmony_ci   }
1215bf215546Sopenharmony_ci}
1216bf215546Sopenharmony_ci
1217bf215546Sopenharmony_cistatic uint32_t
1218bf215546Sopenharmony_cisi_conv_tess_prim_to_gs_out(enum tess_primitive_mode prim)
1219bf215546Sopenharmony_ci{
1220bf215546Sopenharmony_ci   switch (prim) {
1221bf215546Sopenharmony_ci   case TESS_PRIMITIVE_TRIANGLES:
1222bf215546Sopenharmony_ci   case TESS_PRIMITIVE_QUADS:
1223bf215546Sopenharmony_ci      return V_028A6C_TRISTRIP;
1224bf215546Sopenharmony_ci   case TESS_PRIMITIVE_ISOLINES:
1225bf215546Sopenharmony_ci      return V_028A6C_LINESTRIP;
1226bf215546Sopenharmony_ci   default:
1227bf215546Sopenharmony_ci      assert(0);
1228bf215546Sopenharmony_ci      return 0;
1229bf215546Sopenharmony_ci   }
1230bf215546Sopenharmony_ci}
1231bf215546Sopenharmony_ci
1232bf215546Sopenharmony_cistatic uint32_t
1233bf215546Sopenharmony_cisi_conv_gl_prim_to_gs_out(unsigned gl_prim)
1234bf215546Sopenharmony_ci{
1235bf215546Sopenharmony_ci   switch (gl_prim) {
1236bf215546Sopenharmony_ci   case SHADER_PRIM_POINTS:
1237bf215546Sopenharmony_ci      return V_028A6C_POINTLIST;
1238bf215546Sopenharmony_ci   case SHADER_PRIM_LINES:
1239bf215546Sopenharmony_ci   case SHADER_PRIM_LINE_STRIP:
1240bf215546Sopenharmony_ci   case SHADER_PRIM_LINES_ADJACENCY:
1241bf215546Sopenharmony_ci      return V_028A6C_LINESTRIP;
1242bf215546Sopenharmony_ci
1243bf215546Sopenharmony_ci   case SHADER_PRIM_TRIANGLES:
1244bf215546Sopenharmony_ci   case SHADER_PRIM_TRIANGLE_STRIP_ADJACENCY:
1245bf215546Sopenharmony_ci   case SHADER_PRIM_TRIANGLE_STRIP:
1246bf215546Sopenharmony_ci   case SHADER_PRIM_QUADS:
1247bf215546Sopenharmony_ci      return V_028A6C_TRISTRIP;
1248bf215546Sopenharmony_ci   default:
1249bf215546Sopenharmony_ci      assert(0);
1250bf215546Sopenharmony_ci      return 0;
1251bf215546Sopenharmony_ci   }
1252bf215546Sopenharmony_ci}
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_cistatic uint64_t
1255bf215546Sopenharmony_ciradv_dynamic_state_mask(VkDynamicState state)
1256bf215546Sopenharmony_ci{
1257bf215546Sopenharmony_ci   switch (state) {
1258bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_VIEWPORT:
1259bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT:
1260bf215546Sopenharmony_ci      return RADV_DYNAMIC_VIEWPORT;
1261bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_SCISSOR:
1262bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT:
1263bf215546Sopenharmony_ci      return RADV_DYNAMIC_SCISSOR;
1264bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_LINE_WIDTH:
1265bf215546Sopenharmony_ci      return RADV_DYNAMIC_LINE_WIDTH;
1266bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_BIAS:
1267bf215546Sopenharmony_ci      return RADV_DYNAMIC_DEPTH_BIAS;
1268bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
1269bf215546Sopenharmony_ci      return RADV_DYNAMIC_BLEND_CONSTANTS;
1270bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
1271bf215546Sopenharmony_ci      return RADV_DYNAMIC_DEPTH_BOUNDS;
1272bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
1273bf215546Sopenharmony_ci      return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
1274bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
1275bf215546Sopenharmony_ci      return RADV_DYNAMIC_STENCIL_WRITE_MASK;
1276bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
1277bf215546Sopenharmony_ci      return RADV_DYNAMIC_STENCIL_REFERENCE;
1278bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
1279bf215546Sopenharmony_ci      return RADV_DYNAMIC_DISCARD_RECTANGLE;
1280bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
1281bf215546Sopenharmony_ci      return RADV_DYNAMIC_SAMPLE_LOCATIONS;
1282bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
1283bf215546Sopenharmony_ci      return RADV_DYNAMIC_LINE_STIPPLE;
1284bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_CULL_MODE:
1285bf215546Sopenharmony_ci      return RADV_DYNAMIC_CULL_MODE;
1286bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_FRONT_FACE:
1287bf215546Sopenharmony_ci      return RADV_DYNAMIC_FRONT_FACE;
1288bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY:
1289bf215546Sopenharmony_ci      return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
1290bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE:
1291bf215546Sopenharmony_ci      return RADV_DYNAMIC_DEPTH_TEST_ENABLE;
1292bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE:
1293bf215546Sopenharmony_ci      return RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
1294bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP:
1295bf215546Sopenharmony_ci      return RADV_DYNAMIC_DEPTH_COMPARE_OP;
1296bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE:
1297bf215546Sopenharmony_ci      return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
1298bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE:
1299bf215546Sopenharmony_ci      return RADV_DYNAMIC_STENCIL_TEST_ENABLE;
1300bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_STENCIL_OP:
1301bf215546Sopenharmony_ci      return RADV_DYNAMIC_STENCIL_OP;
1302bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE:
1303bf215546Sopenharmony_ci      return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
1304bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
1305bf215546Sopenharmony_ci      return RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
1306bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT:
1307bf215546Sopenharmony_ci      return RADV_DYNAMIC_PATCH_CONTROL_POINTS;
1308bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE:
1309bf215546Sopenharmony_ci      return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
1310bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE:
1311bf215546Sopenharmony_ci      return RADV_DYNAMIC_DEPTH_BIAS_ENABLE;
1312bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_LOGIC_OP_EXT:
1313bf215546Sopenharmony_ci      return RADV_DYNAMIC_LOGIC_OP;
1314bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE:
1315bf215546Sopenharmony_ci      return RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
1316bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
1317bf215546Sopenharmony_ci      return RADV_DYNAMIC_COLOR_WRITE_ENABLE;
1318bf215546Sopenharmony_ci   case VK_DYNAMIC_STATE_VERTEX_INPUT_EXT:
1319bf215546Sopenharmony_ci      return RADV_DYNAMIC_VERTEX_INPUT;
1320bf215546Sopenharmony_ci   default:
1321bf215546Sopenharmony_ci      unreachable("Unhandled dynamic state");
1322bf215546Sopenharmony_ci   }
1323bf215546Sopenharmony_ci}
1324bf215546Sopenharmony_ci
1325bf215546Sopenharmony_cistatic bool
1326bf215546Sopenharmony_ciradv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline,
1327bf215546Sopenharmony_ci                               const struct radv_color_blend_info *cb_info)
1328bf215546Sopenharmony_ci{
1329bf215546Sopenharmony_ci   for (uint32_t i = 0; i < cb_info->att_count; i++) {
1330bf215546Sopenharmony_ci      if (cb_info->att[i].color_write_mask && cb_info->att[i].blend_enable)
1331bf215546Sopenharmony_ci         return true;
1332bf215546Sopenharmony_ci   }
1333bf215546Sopenharmony_ci
1334bf215546Sopenharmony_ci   return false;
1335bf215546Sopenharmony_ci}
1336bf215546Sopenharmony_ci
1337bf215546Sopenharmony_cistatic uint64_t
1338bf215546Sopenharmony_ciradv_pipeline_needed_dynamic_state(const struct radv_graphics_pipeline *pipeline,
1339bf215546Sopenharmony_ci                                   const struct radv_graphics_pipeline_info *info)
1340bf215546Sopenharmony_ci{
1341bf215546Sopenharmony_ci   bool has_color_att = radv_pipeline_has_color_attachments(&info->ri);
1342bf215546Sopenharmony_ci   bool raster_enabled = !info->rs.discard_enable ||
1343bf215546Sopenharmony_ci                         (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
1344bf215546Sopenharmony_ci   uint64_t states = RADV_DYNAMIC_ALL;
1345bf215546Sopenharmony_ci
1346bf215546Sopenharmony_ci   /* Disable dynamic states that are useless to mesh shading. */
1347bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
1348bf215546Sopenharmony_ci      if (!raster_enabled)
1349bf215546Sopenharmony_ci         return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
1350bf215546Sopenharmony_ci
1351bf215546Sopenharmony_ci      states &= ~(RADV_DYNAMIC_VERTEX_INPUT | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
1352bf215546Sopenharmony_ci                  RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY);
1353bf215546Sopenharmony_ci   }
1354bf215546Sopenharmony_ci
1355bf215546Sopenharmony_ci   /* If rasterization is disabled we do not care about any of the
1356bf215546Sopenharmony_ci    * dynamic states, since they are all rasterization related only,
1357bf215546Sopenharmony_ci    * except primitive topology, primitive restart enable, vertex
1358bf215546Sopenharmony_ci    * binding stride and rasterization discard itself.
1359bf215546Sopenharmony_ci    */
1360bf215546Sopenharmony_ci   if (!raster_enabled) {
1361bf215546Sopenharmony_ci      return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
1362bf215546Sopenharmony_ci             RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
1363bf215546Sopenharmony_ci             RADV_DYNAMIC_VERTEX_INPUT;
1364bf215546Sopenharmony_ci   }
1365bf215546Sopenharmony_ci
1366bf215546Sopenharmony_ci   if (!info->rs.depth_bias_enable &&
1367bf215546Sopenharmony_ci       !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE))
1368bf215546Sopenharmony_ci      states &= ~RADV_DYNAMIC_DEPTH_BIAS;
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_ci   if (!info->ds.depth_bounds_test_enable &&
1371bf215546Sopenharmony_ci       !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE))
1372bf215546Sopenharmony_ci      states &= ~RADV_DYNAMIC_DEPTH_BOUNDS;
1373bf215546Sopenharmony_ci
1374bf215546Sopenharmony_ci   if (!info->ds.stencil_test_enable &&
1375bf215546Sopenharmony_ci       !(pipeline->dynamic_states & RADV_DYNAMIC_STENCIL_TEST_ENABLE))
1376bf215546Sopenharmony_ci      states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK |
1377bf215546Sopenharmony_ci                  RADV_DYNAMIC_STENCIL_REFERENCE | RADV_DYNAMIC_STENCIL_OP);
1378bf215546Sopenharmony_ci
1379bf215546Sopenharmony_ci   if (!info->dr.count)
1380bf215546Sopenharmony_ci      states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
1381bf215546Sopenharmony_ci
1382bf215546Sopenharmony_ci   if (!info->ms.sample_locs_enable)
1383bf215546Sopenharmony_ci      states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
1384bf215546Sopenharmony_ci
1385bf215546Sopenharmony_ci   if (!info->rs.stippled_line_enable)
1386bf215546Sopenharmony_ci      states &= ~RADV_DYNAMIC_LINE_STIPPLE;
1387bf215546Sopenharmony_ci
1388bf215546Sopenharmony_ci   if (!radv_is_vrs_enabled(pipeline, info))
1389bf215546Sopenharmony_ci      states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
1390bf215546Sopenharmony_ci
1391bf215546Sopenharmony_ci   if (!has_color_att || !radv_pipeline_is_blend_enabled(pipeline, &info->cb))
1392bf215546Sopenharmony_ci      states &= ~RADV_DYNAMIC_BLEND_CONSTANTS;
1393bf215546Sopenharmony_ci
1394bf215546Sopenharmony_ci   if (!has_color_att)
1395bf215546Sopenharmony_ci      states &= ~RADV_DYNAMIC_COLOR_WRITE_ENABLE;
1396bf215546Sopenharmony_ci
1397bf215546Sopenharmony_ci   return states;
1398bf215546Sopenharmony_ci}
1399bf215546Sopenharmony_ci
1400bf215546Sopenharmony_cistatic struct radv_ia_multi_vgt_param_helpers
1401bf215546Sopenharmony_ciradv_compute_ia_multi_vgt_param_helpers(struct radv_graphics_pipeline *pipeline)
1402bf215546Sopenharmony_ci{
1403bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
1404bf215546Sopenharmony_ci   struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
1405bf215546Sopenharmony_ci
1406bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
1407bf215546Sopenharmony_ci      ia_multi_vgt_param.primgroup_size =
1408bf215546Sopenharmony_ci         pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
1409bf215546Sopenharmony_ci   else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
1410bf215546Sopenharmony_ci      ia_multi_vgt_param.primgroup_size = 64;
1411bf215546Sopenharmony_ci   else
1412bf215546Sopenharmony_ci      ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
1413bf215546Sopenharmony_ci
1414bf215546Sopenharmony_ci   /* GS requirement. */
1415bf215546Sopenharmony_ci   ia_multi_vgt_param.partial_es_wave = false;
1416bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && pdevice->rad_info.gfx_level <= GFX8)
1417bf215546Sopenharmony_ci      if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pdevice->gs_table_depth - 3)
1418bf215546Sopenharmony_ci         ia_multi_vgt_param.partial_es_wave = true;
1419bf215546Sopenharmony_ci
1420bf215546Sopenharmony_ci   ia_multi_vgt_param.ia_switch_on_eoi = false;
1421bf215546Sopenharmony_ci   if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
1422bf215546Sopenharmony_ci      ia_multi_vgt_param.ia_switch_on_eoi = true;
1423bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
1424bf215546Sopenharmony_ci      ia_multi_vgt_param.ia_switch_on_eoi = true;
1425bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
1426bf215546Sopenharmony_ci      /* SWITCH_ON_EOI must be set if PrimID is used. */
1427bf215546Sopenharmony_ci      if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
1428bf215546Sopenharmony_ci          radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
1429bf215546Sopenharmony_ci         ia_multi_vgt_param.ia_switch_on_eoi = true;
1430bf215546Sopenharmony_ci   }
1431bf215546Sopenharmony_ci
1432bf215546Sopenharmony_ci   ia_multi_vgt_param.partial_vs_wave = false;
1433bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
1434bf215546Sopenharmony_ci      /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
1435bf215546Sopenharmony_ci      if ((pdevice->rad_info.family == CHIP_TAHITI ||
1436bf215546Sopenharmony_ci           pdevice->rad_info.family == CHIP_PITCAIRN ||
1437bf215546Sopenharmony_ci           pdevice->rad_info.family == CHIP_BONAIRE) &&
1438bf215546Sopenharmony_ci          radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
1439bf215546Sopenharmony_ci         ia_multi_vgt_param.partial_vs_wave = true;
1440bf215546Sopenharmony_ci      /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
1441bf215546Sopenharmony_ci      if (pdevice->rad_info.has_distributed_tess) {
1442bf215546Sopenharmony_ci         if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
1443bf215546Sopenharmony_ci            if (pdevice->rad_info.gfx_level <= GFX8)
1444bf215546Sopenharmony_ci               ia_multi_vgt_param.partial_es_wave = true;
1445bf215546Sopenharmony_ci         } else {
1446bf215546Sopenharmony_ci            ia_multi_vgt_param.partial_vs_wave = true;
1447bf215546Sopenharmony_ci         }
1448bf215546Sopenharmony_ci      }
1449bf215546Sopenharmony_ci   }
1450bf215546Sopenharmony_ci
1451bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
1452bf215546Sopenharmony_ci      /* On these chips there is the possibility of a hang if the
1453bf215546Sopenharmony_ci       * pipeline uses a GS and partial_vs_wave is not set.
1454bf215546Sopenharmony_ci       *
1455bf215546Sopenharmony_ci       * This mostly does not hit 4-SE chips, as those typically set
1456bf215546Sopenharmony_ci       * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
1457bf215546Sopenharmony_ci       * with GS due to another workaround.
1458bf215546Sopenharmony_ci       *
1459bf215546Sopenharmony_ci       * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
1460bf215546Sopenharmony_ci       */
1461bf215546Sopenharmony_ci      if (pdevice->rad_info.family == CHIP_TONGA ||
1462bf215546Sopenharmony_ci          pdevice->rad_info.family == CHIP_FIJI ||
1463bf215546Sopenharmony_ci          pdevice->rad_info.family == CHIP_POLARIS10 ||
1464bf215546Sopenharmony_ci          pdevice->rad_info.family == CHIP_POLARIS11 ||
1465bf215546Sopenharmony_ci          pdevice->rad_info.family == CHIP_POLARIS12 ||
1466bf215546Sopenharmony_ci          pdevice->rad_info.family == CHIP_VEGAM) {
1467bf215546Sopenharmony_ci         ia_multi_vgt_param.partial_vs_wave = true;
1468bf215546Sopenharmony_ci      }
1469bf215546Sopenharmony_ci   }
1470bf215546Sopenharmony_ci
1471bf215546Sopenharmony_ci   ia_multi_vgt_param.base =
1472bf215546Sopenharmony_ci      S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
1473bf215546Sopenharmony_ci      /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
1474bf215546Sopenharmony_ci      S_028AA8_MAX_PRIMGRP_IN_WAVE(pdevice->rad_info.gfx_level == GFX8 ? 2 : 0) |
1475bf215546Sopenharmony_ci      S_030960_EN_INST_OPT_BASIC(pdevice->rad_info.gfx_level >= GFX9) |
1476bf215546Sopenharmony_ci      S_030960_EN_INST_OPT_ADV(pdevice->rad_info.gfx_level >= GFX9);
1477bf215546Sopenharmony_ci
1478bf215546Sopenharmony_ci   return ia_multi_vgt_param;
1479bf215546Sopenharmony_ci}
1480bf215546Sopenharmony_ci
1481bf215546Sopenharmony_cistatic uint32_t
1482bf215546Sopenharmony_ciradv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *vi, uint32_t attrib_binding)
1483bf215546Sopenharmony_ci{
1484bf215546Sopenharmony_ci   for (uint32_t i = 0; i < vi->vertexBindingDescriptionCount; i++) {
1485bf215546Sopenharmony_ci      const VkVertexInputBindingDescription *input_binding = &vi->pVertexBindingDescriptions[i];
1486bf215546Sopenharmony_ci
1487bf215546Sopenharmony_ci      if (input_binding->binding == attrib_binding)
1488bf215546Sopenharmony_ci         return input_binding->stride;
1489bf215546Sopenharmony_ci   }
1490bf215546Sopenharmony_ci
1491bf215546Sopenharmony_ci   return 0;
1492bf215546Sopenharmony_ci}
1493bf215546Sopenharmony_ci
1494bf215546Sopenharmony_cistatic struct radv_vertex_input_info
1495bf215546Sopenharmony_ciradv_pipeline_init_vertex_input_info(struct radv_graphics_pipeline *pipeline,
1496bf215546Sopenharmony_ci                                     const VkGraphicsPipelineCreateInfo *pCreateInfo)
1497bf215546Sopenharmony_ci{
1498bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
1499bf215546Sopenharmony_ci   const VkPipelineVertexInputStateCreateInfo *vi = pCreateInfo->pVertexInputState;
1500bf215546Sopenharmony_ci   struct radv_vertex_input_info info = {0};
1501bf215546Sopenharmony_ci
1502bf215546Sopenharmony_ci   if (!(pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT)) {
1503bf215546Sopenharmony_ci      /* Vertex input */
1504bf215546Sopenharmony_ci      const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state =
1505bf215546Sopenharmony_ci         vk_find_struct_const(vi->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
1506bf215546Sopenharmony_ci
1507bf215546Sopenharmony_ci      uint32_t binding_input_rate = 0;
1508bf215546Sopenharmony_ci      uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
1509bf215546Sopenharmony_ci      for (unsigned i = 0; i < vi->vertexBindingDescriptionCount; ++i) {
1510bf215546Sopenharmony_ci         const VkVertexInputBindingDescription *desc = &vi->pVertexBindingDescriptions[i];
1511bf215546Sopenharmony_ci
1512bf215546Sopenharmony_ci         if (desc->inputRate) {
1513bf215546Sopenharmony_ci            unsigned binding = vi->pVertexBindingDescriptions[i].binding;
1514bf215546Sopenharmony_ci            binding_input_rate |= 1u << binding;
1515bf215546Sopenharmony_ci            instance_rate_divisors[binding] = 1;
1516bf215546Sopenharmony_ci         }
1517bf215546Sopenharmony_ci
1518bf215546Sopenharmony_ci         info.binding_stride[desc->binding] = desc->stride;
1519bf215546Sopenharmony_ci      }
1520bf215546Sopenharmony_ci
1521bf215546Sopenharmony_ci      if (divisor_state) {
1522bf215546Sopenharmony_ci         for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) {
1523bf215546Sopenharmony_ci            instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] =
1524bf215546Sopenharmony_ci               divisor_state->pVertexBindingDivisors[i].divisor;
1525bf215546Sopenharmony_ci         }
1526bf215546Sopenharmony_ci      }
1527bf215546Sopenharmony_ci
1528bf215546Sopenharmony_ci      for (unsigned i = 0; i < vi->vertexAttributeDescriptionCount; ++i) {
1529bf215546Sopenharmony_ci         const VkVertexInputAttributeDescription *desc = &vi->pVertexAttributeDescriptions[i];
1530bf215546Sopenharmony_ci         const struct util_format_description *format_desc;
1531bf215546Sopenharmony_ci         unsigned location = desc->location;
1532bf215546Sopenharmony_ci         unsigned binding = desc->binding;
1533bf215546Sopenharmony_ci         unsigned num_format, data_format;
1534bf215546Sopenharmony_ci         bool post_shuffle;
1535bf215546Sopenharmony_ci
1536bf215546Sopenharmony_ci         if (binding_input_rate & (1u << binding)) {
1537bf215546Sopenharmony_ci            info.instance_rate_inputs |= 1u << location;
1538bf215546Sopenharmony_ci            info.instance_rate_divisors[location] = instance_rate_divisors[binding];
1539bf215546Sopenharmony_ci         }
1540bf215546Sopenharmony_ci
1541bf215546Sopenharmony_ci         format_desc = vk_format_description(desc->format);
1542bf215546Sopenharmony_ci         radv_translate_vertex_format(pdevice, desc->format, format_desc, &data_format, &num_format,
1543bf215546Sopenharmony_ci                                      &post_shuffle, &info.vertex_alpha_adjust[location]);
1544bf215546Sopenharmony_ci
1545bf215546Sopenharmony_ci         info.vertex_attribute_formats[location] = data_format | (num_format << 4);
1546bf215546Sopenharmony_ci         info.vertex_attribute_bindings[location] = desc->binding;
1547bf215546Sopenharmony_ci         info.vertex_attribute_offsets[location] = desc->offset;
1548bf215546Sopenharmony_ci
1549bf215546Sopenharmony_ci         const struct ac_data_format_info *dfmt_info = ac_get_data_format_info(data_format);
1550bf215546Sopenharmony_ci         unsigned attrib_align =
1551bf215546Sopenharmony_ci            dfmt_info->chan_byte_size ? dfmt_info->chan_byte_size : dfmt_info->element_size;
1552bf215546Sopenharmony_ci
1553bf215546Sopenharmony_ci         /* If desc->offset is misaligned, then the buffer offset must be too. Just
1554bf215546Sopenharmony_ci          * skip updating vertex_binding_align in this case.
1555bf215546Sopenharmony_ci          */
1556bf215546Sopenharmony_ci         if (desc->offset % attrib_align == 0)
1557bf215546Sopenharmony_ci            info.vertex_binding_align[desc->binding] =
1558bf215546Sopenharmony_ci               MAX2(info.vertex_binding_align[desc->binding], attrib_align);
1559bf215546Sopenharmony_ci
1560bf215546Sopenharmony_ci         if (!(pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE)) {
1561bf215546Sopenharmony_ci            /* From the Vulkan spec 1.2.157:
1562bf215546Sopenharmony_ci             *
1563bf215546Sopenharmony_ci             * "If the bound pipeline state object was created
1564bf215546Sopenharmony_ci             *  with the
1565bf215546Sopenharmony_ci             *  VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE
1566bf215546Sopenharmony_ci             *  dynamic state enabled then pStrides[i] specifies
1567bf215546Sopenharmony_ci             *  the distance in bytes between two consecutive
1568bf215546Sopenharmony_ci             *  elements within the corresponding buffer. In this
1569bf215546Sopenharmony_ci             *  case the VkVertexInputBindingDescription::stride
1570bf215546Sopenharmony_ci             *  state from the pipeline state object is ignored."
1571bf215546Sopenharmony_ci             *
1572bf215546Sopenharmony_ci             * Make sure the vertex attribute stride is zero to
1573bf215546Sopenharmony_ci             * avoid computing a wrong offset if it's initialized
1574bf215546Sopenharmony_ci             * to something else than zero.
1575bf215546Sopenharmony_ci             */
1576bf215546Sopenharmony_ci            info.vertex_attribute_strides[location] = radv_get_attrib_stride(vi, desc->binding);
1577bf215546Sopenharmony_ci         }
1578bf215546Sopenharmony_ci
1579bf215546Sopenharmony_ci         if (post_shuffle)
1580bf215546Sopenharmony_ci            info.vertex_post_shuffle |= 1 << location;
1581bf215546Sopenharmony_ci
1582bf215546Sopenharmony_ci         uint32_t end = desc->offset + vk_format_get_blocksize(desc->format);
1583bf215546Sopenharmony_ci         info.attrib_ends[desc->location] = end;
1584bf215546Sopenharmony_ci         if (info.binding_stride[desc->binding])
1585bf215546Sopenharmony_ci            info.attrib_index_offset[desc->location] =
1586bf215546Sopenharmony_ci               desc->offset / info.binding_stride[desc->binding];
1587bf215546Sopenharmony_ci         info.attrib_bindings[desc->location] = desc->binding;
1588bf215546Sopenharmony_ci      }
1589bf215546Sopenharmony_ci   }
1590bf215546Sopenharmony_ci
1591bf215546Sopenharmony_ci   return info;
1592bf215546Sopenharmony_ci}
1593bf215546Sopenharmony_ci
1594bf215546Sopenharmony_cistatic struct radv_input_assembly_info
1595bf215546Sopenharmony_ciradv_pipeline_init_input_assembly_info(struct radv_graphics_pipeline *pipeline,
1596bf215546Sopenharmony_ci                                       const VkGraphicsPipelineCreateInfo *pCreateInfo)
1597bf215546Sopenharmony_ci{
1598bf215546Sopenharmony_ci   const VkPipelineInputAssemblyStateCreateInfo *ia = pCreateInfo->pInputAssemblyState;
1599bf215546Sopenharmony_ci   struct radv_input_assembly_info info = {0};
1600bf215546Sopenharmony_ci
1601bf215546Sopenharmony_ci   info.primitive_topology = si_translate_prim(ia->topology);
1602bf215546Sopenharmony_ci   info.primitive_restart_enable = !!ia->primitiveRestartEnable;
1603bf215546Sopenharmony_ci
1604bf215546Sopenharmony_ci   return info;
1605bf215546Sopenharmony_ci}
1606bf215546Sopenharmony_ci
1607bf215546Sopenharmony_cistatic struct radv_tessellation_info
1608bf215546Sopenharmony_ciradv_pipeline_init_tessellation_info(struct radv_graphics_pipeline *pipeline,
1609bf215546Sopenharmony_ci                                     const VkGraphicsPipelineCreateInfo *pCreateInfo)
1610bf215546Sopenharmony_ci{
1611bf215546Sopenharmony_ci   const VkPipelineTessellationStateCreateInfo *ts = pCreateInfo->pTessellationState;
1612bf215546Sopenharmony_ci   const VkShaderStageFlagBits tess_stages = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
1613bf215546Sopenharmony_ci                                             VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
1614bf215546Sopenharmony_ci   struct radv_tessellation_info info = {0};
1615bf215546Sopenharmony_ci
1616bf215546Sopenharmony_ci   if ((pipeline->active_stages & tess_stages) == tess_stages) {
1617bf215546Sopenharmony_ci      info.patch_control_points = ts->patchControlPoints;
1618bf215546Sopenharmony_ci
1619bf215546Sopenharmony_ci      const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
1620bf215546Sopenharmony_ci         vk_find_struct_const(ts->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
1621bf215546Sopenharmony_ci      if (domain_origin_state) {
1622bf215546Sopenharmony_ci         info.domain_origin = domain_origin_state->domainOrigin;
1623bf215546Sopenharmony_ci      }
1624bf215546Sopenharmony_ci   }
1625bf215546Sopenharmony_ci
1626bf215546Sopenharmony_ci   return info;
1627bf215546Sopenharmony_ci}
1628bf215546Sopenharmony_ci
1629bf215546Sopenharmony_cistatic struct radv_viewport_info
1630bf215546Sopenharmony_ciradv_pipeline_init_viewport_info(struct radv_graphics_pipeline *pipeline,
1631bf215546Sopenharmony_ci                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1632bf215546Sopenharmony_ci{
1633bf215546Sopenharmony_ci   const VkPipelineViewportStateCreateInfo *vp = pCreateInfo->pViewportState;
1634bf215546Sopenharmony_ci   struct radv_viewport_info info = {0};
1635bf215546Sopenharmony_ci
1636bf215546Sopenharmony_ci   if (radv_is_raster_enabled(pipeline, pCreateInfo)) {
1637bf215546Sopenharmony_ci      if (!(pipeline->dynamic_states & RADV_DYNAMIC_VIEWPORT)) {
1638bf215546Sopenharmony_ci         typed_memcpy(info.viewports, vp->pViewports, vp->viewportCount);
1639bf215546Sopenharmony_ci      }
1640bf215546Sopenharmony_ci      info.viewport_count = vp->viewportCount;
1641bf215546Sopenharmony_ci
1642bf215546Sopenharmony_ci      if (!(pipeline->dynamic_states & RADV_DYNAMIC_SCISSOR)) {
1643bf215546Sopenharmony_ci         typed_memcpy(info.scissors, vp->pScissors, vp->scissorCount);
1644bf215546Sopenharmony_ci      }
1645bf215546Sopenharmony_ci      info.scissor_count = vp->scissorCount;
1646bf215546Sopenharmony_ci
1647bf215546Sopenharmony_ci      const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_control =
1648bf215546Sopenharmony_ci         vk_find_struct_const(vp->pNext, PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT);
1649bf215546Sopenharmony_ci      if (depth_clip_control) {
1650bf215546Sopenharmony_ci         info.negative_one_to_one = !!depth_clip_control->negativeOneToOne;
1651bf215546Sopenharmony_ci      }
1652bf215546Sopenharmony_ci   }
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci   return info;
1655bf215546Sopenharmony_ci}
1656bf215546Sopenharmony_ci
1657bf215546Sopenharmony_cistatic struct radv_rasterization_info
1658bf215546Sopenharmony_ciradv_pipeline_init_rasterization_info(struct radv_graphics_pipeline *pipeline,
1659bf215546Sopenharmony_ci                                      const VkGraphicsPipelineCreateInfo *pCreateInfo)
1660bf215546Sopenharmony_ci{
1661bf215546Sopenharmony_ci   const VkPipelineRasterizationStateCreateInfo *rs = pCreateInfo->pRasterizationState;
1662bf215546Sopenharmony_ci   struct radv_rasterization_info info = {0};
1663bf215546Sopenharmony_ci
1664bf215546Sopenharmony_ci   info.discard_enable = rs->rasterizerDiscardEnable;
1665bf215546Sopenharmony_ci   info.front_face = rs->frontFace;
1666bf215546Sopenharmony_ci   info.cull_mode = rs->cullMode;
1667bf215546Sopenharmony_ci   info.polygon_mode = si_translate_fill(rs->polygonMode);
1668bf215546Sopenharmony_ci   info.depth_bias_enable = rs->depthBiasEnable;
1669bf215546Sopenharmony_ci   info.depth_clamp_enable = rs->depthClampEnable;
1670bf215546Sopenharmony_ci   info.line_width = rs->lineWidth;
1671bf215546Sopenharmony_ci   info.depth_bias_constant_factor = rs->depthBiasConstantFactor;
1672bf215546Sopenharmony_ci   info.depth_bias_clamp = rs->depthBiasClamp;
1673bf215546Sopenharmony_ci   info.depth_bias_slope_factor = rs->depthBiasSlopeFactor;
1674bf215546Sopenharmony_ci   info.depth_clip_disable = rs->depthClampEnable;
1675bf215546Sopenharmony_ci
1676bf215546Sopenharmony_ci   const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_info =
1677bf215546Sopenharmony_ci      vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);
1678bf215546Sopenharmony_ci   if (provoking_vtx_info &&
1679bf215546Sopenharmony_ci       provoking_vtx_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) {
1680bf215546Sopenharmony_ci      info.provoking_vtx_last = true;
1681bf215546Sopenharmony_ci   }
1682bf215546Sopenharmony_ci
1683bf215546Sopenharmony_ci   const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster =
1684bf215546Sopenharmony_ci      vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT);
1685bf215546Sopenharmony_ci   if (conservative_raster) {
1686bf215546Sopenharmony_ci      info.conservative_mode = conservative_raster->conservativeRasterizationMode;
1687bf215546Sopenharmony_ci   }
1688bf215546Sopenharmony_ci
1689bf215546Sopenharmony_ci   const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info =
1690bf215546Sopenharmony_ci      vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
1691bf215546Sopenharmony_ci   if (rast_line_info) {
1692bf215546Sopenharmony_ci      info.stippled_line_enable = rast_line_info->stippledLineEnable;
1693bf215546Sopenharmony_ci      info.line_raster_mode = rast_line_info->lineRasterizationMode;
1694bf215546Sopenharmony_ci      info.line_stipple_factor = rast_line_info->lineStippleFactor;
1695bf215546Sopenharmony_ci      info.line_stipple_pattern = rast_line_info->lineStipplePattern;
1696bf215546Sopenharmony_ci   }
1697bf215546Sopenharmony_ci
1698bf215546Sopenharmony_ci   const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
1699bf215546Sopenharmony_ci      vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
1700bf215546Sopenharmony_ci   if (depth_clip_state) {
1701bf215546Sopenharmony_ci      info.depth_clip_disable = !depth_clip_state->depthClipEnable;
1702bf215546Sopenharmony_ci   }
1703bf215546Sopenharmony_ci
1704bf215546Sopenharmony_ci   const VkPipelineRasterizationStateRasterizationOrderAMD *raster_order =
1705bf215546Sopenharmony_ci      vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD);
1706bf215546Sopenharmony_ci   if (raster_order) {
1707bf215546Sopenharmony_ci      info.order = raster_order->rasterizationOrder;
1708bf215546Sopenharmony_ci   }
1709bf215546Sopenharmony_ci
1710bf215546Sopenharmony_ci   return info;
1711bf215546Sopenharmony_ci}
1712bf215546Sopenharmony_ci
1713bf215546Sopenharmony_cistatic struct radv_discard_rectangle_info
1714bf215546Sopenharmony_ciradv_pipeline_init_discard_rectangle_info(struct radv_graphics_pipeline *pipeline,
1715bf215546Sopenharmony_ci                                          const VkGraphicsPipelineCreateInfo *pCreateInfo)
1716bf215546Sopenharmony_ci{
1717bf215546Sopenharmony_ci   const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
1718bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
1719bf215546Sopenharmony_ci   struct radv_discard_rectangle_info info = {0};
1720bf215546Sopenharmony_ci
1721bf215546Sopenharmony_ci   if (discard_rectangle_info) {
1722bf215546Sopenharmony_ci      info.mode = discard_rectangle_info->discardRectangleMode;
1723bf215546Sopenharmony_ci      if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE)) {
1724bf215546Sopenharmony_ci         typed_memcpy(info.rects, discard_rectangle_info->pDiscardRectangles,
1725bf215546Sopenharmony_ci                      discard_rectangle_info->discardRectangleCount);
1726bf215546Sopenharmony_ci      }
1727bf215546Sopenharmony_ci      info.count = discard_rectangle_info->discardRectangleCount;
1728bf215546Sopenharmony_ci   }
1729bf215546Sopenharmony_ci
1730bf215546Sopenharmony_ci   return info;
1731bf215546Sopenharmony_ci}
1732bf215546Sopenharmony_ci
1733bf215546Sopenharmony_cistatic struct radv_multisample_info
1734bf215546Sopenharmony_ciradv_pipeline_init_multisample_info(struct radv_graphics_pipeline *pipeline,
1735bf215546Sopenharmony_ci                                    const VkGraphicsPipelineCreateInfo *pCreateInfo)
1736bf215546Sopenharmony_ci{
1737bf215546Sopenharmony_ci   const VkPipelineMultisampleStateCreateInfo *ms = pCreateInfo->pMultisampleState;
1738bf215546Sopenharmony_ci   struct radv_multisample_info info = {0};
1739bf215546Sopenharmony_ci
1740bf215546Sopenharmony_ci   if (radv_is_raster_enabled(pipeline, pCreateInfo)) {
1741bf215546Sopenharmony_ci      info.raster_samples = ms->rasterizationSamples;
1742bf215546Sopenharmony_ci      info.sample_shading_enable = ms->sampleShadingEnable;
1743bf215546Sopenharmony_ci      info.min_sample_shading = ms->minSampleShading;
1744bf215546Sopenharmony_ci      info.alpha_to_coverage_enable = ms->alphaToCoverageEnable;
1745bf215546Sopenharmony_ci      if (ms->pSampleMask) {
1746bf215546Sopenharmony_ci         info.sample_mask = ms->pSampleMask[0] & 0xffff;
1747bf215546Sopenharmony_ci      } else {
1748bf215546Sopenharmony_ci         info.sample_mask = 0xffff;
1749bf215546Sopenharmony_ci      }
1750bf215546Sopenharmony_ci
1751bf215546Sopenharmony_ci      const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
1752bf215546Sopenharmony_ci         vk_find_struct_const(ms->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
1753bf215546Sopenharmony_ci      if (sample_location_info) {
1754bf215546Sopenharmony_ci         /* If sampleLocationsEnable is VK_FALSE, the default sample locations are used and the
1755bf215546Sopenharmony_ci          * values specified in sampleLocationsInfo are ignored.
1756bf215546Sopenharmony_ci          */
1757bf215546Sopenharmony_ci         info.sample_locs_enable = sample_location_info->sampleLocationsEnable;
1758bf215546Sopenharmony_ci         if (sample_location_info->sampleLocationsEnable) {
1759bf215546Sopenharmony_ci            const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
1760bf215546Sopenharmony_ci               &sample_location_info->sampleLocationsInfo;
1761bf215546Sopenharmony_ci            assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
1762bf215546Sopenharmony_ci
1763bf215546Sopenharmony_ci            info.sample_locs_per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
1764bf215546Sopenharmony_ci            info.sample_locs_grid_size = pSampleLocationsInfo->sampleLocationGridSize;
1765bf215546Sopenharmony_ci            for (uint32_t i = 0; i < pSampleLocationsInfo->sampleLocationsCount; i++) {
1766bf215546Sopenharmony_ci               info.sample_locs[i] = pSampleLocationsInfo->pSampleLocations[i];
1767bf215546Sopenharmony_ci            }
1768bf215546Sopenharmony_ci            info.sample_locs_count = pSampleLocationsInfo->sampleLocationsCount;
1769bf215546Sopenharmony_ci         }
1770bf215546Sopenharmony_ci      }
1771bf215546Sopenharmony_ci   } else {
1772bf215546Sopenharmony_ci      info.raster_samples = VK_SAMPLE_COUNT_1_BIT;
1773bf215546Sopenharmony_ci   }
1774bf215546Sopenharmony_ci
1775bf215546Sopenharmony_ci   return info;
1776bf215546Sopenharmony_ci}
1777bf215546Sopenharmony_ci
1778bf215546Sopenharmony_cistatic struct radv_depth_stencil_info
1779bf215546Sopenharmony_ciradv_pipeline_init_depth_stencil_info(struct radv_graphics_pipeline *pipeline,
1780bf215546Sopenharmony_ci                                      const VkGraphicsPipelineCreateInfo *pCreateInfo)
1781bf215546Sopenharmony_ci{
1782bf215546Sopenharmony_ci   const VkPipelineDepthStencilStateCreateInfo *ds = pCreateInfo->pDepthStencilState;
1783bf215546Sopenharmony_ci   const VkPipelineRenderingCreateInfo *ri =
1784bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO);
1785bf215546Sopenharmony_ci   struct radv_depth_stencil_info info = {0};
1786bf215546Sopenharmony_ci
1787bf215546Sopenharmony_ci   if (radv_is_raster_enabled(pipeline, pCreateInfo) &&
1788bf215546Sopenharmony_ci       (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED ||
1789bf215546Sopenharmony_ci        ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)) {
1790bf215546Sopenharmony_ci      info.depth_bounds_test_enable = ds->depthBoundsTestEnable;
1791bf215546Sopenharmony_ci      info.depth_bounds.min = ds->minDepthBounds;
1792bf215546Sopenharmony_ci      info.depth_bounds.max = ds->maxDepthBounds;
1793bf215546Sopenharmony_ci      info.stencil_test_enable = ds->stencilTestEnable;
1794bf215546Sopenharmony_ci      info.front.fail_op = ds->front.failOp;
1795bf215546Sopenharmony_ci      info.front.pass_op = ds->front.passOp;
1796bf215546Sopenharmony_ci      info.front.depth_fail_op = ds->front.depthFailOp;
1797bf215546Sopenharmony_ci      info.front.compare_op = ds->front.compareOp;
1798bf215546Sopenharmony_ci      info.front.compare_mask = ds->front.compareMask;
1799bf215546Sopenharmony_ci      info.front.write_mask = ds->front.writeMask;
1800bf215546Sopenharmony_ci      info.front.reference = ds->front.reference;
1801bf215546Sopenharmony_ci      info.back.fail_op = ds->back.failOp;
1802bf215546Sopenharmony_ci      info.back.pass_op = ds->back.passOp;
1803bf215546Sopenharmony_ci      info.back.depth_fail_op = ds->back.depthFailOp;
1804bf215546Sopenharmony_ci      info.back.compare_op = ds->back.compareOp;
1805bf215546Sopenharmony_ci      info.back.compare_mask = ds->back.compareMask;
1806bf215546Sopenharmony_ci      info.back.write_mask = ds->back.writeMask;
1807bf215546Sopenharmony_ci      info.back.reference = ds->back.reference;
1808bf215546Sopenharmony_ci      info.depth_test_enable = ds->depthTestEnable;
1809bf215546Sopenharmony_ci      info.depth_write_enable = ds->depthWriteEnable;
1810bf215546Sopenharmony_ci      info.depth_compare_op = ds->depthCompareOp;
1811bf215546Sopenharmony_ci   }
1812bf215546Sopenharmony_ci
1813bf215546Sopenharmony_ci   return info;
1814bf215546Sopenharmony_ci}
1815bf215546Sopenharmony_ci
1816bf215546Sopenharmony_cistatic struct radv_rendering_info
1817bf215546Sopenharmony_ciradv_pipeline_init_rendering_info(struct radv_graphics_pipeline *pipeline,
1818bf215546Sopenharmony_ci                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1819bf215546Sopenharmony_ci{
1820bf215546Sopenharmony_ci   const VkPipelineRenderingCreateInfo *ri =
1821bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO);
1822bf215546Sopenharmony_ci   struct radv_rendering_info info = {0};
1823bf215546Sopenharmony_ci
1824bf215546Sopenharmony_ci   info.view_mask = ri->viewMask;
1825bf215546Sopenharmony_ci   for (uint32_t i = 0; i < ri->colorAttachmentCount; i++) {
1826bf215546Sopenharmony_ci      info.color_att_formats[i] = ri->pColorAttachmentFormats[i];
1827bf215546Sopenharmony_ci   }
1828bf215546Sopenharmony_ci   info.color_att_count = ri->colorAttachmentCount;
1829bf215546Sopenharmony_ci   info.depth_att_format = ri->depthAttachmentFormat;
1830bf215546Sopenharmony_ci   info.stencil_att_format = ri->stencilAttachmentFormat;
1831bf215546Sopenharmony_ci
1832bf215546Sopenharmony_ci   return info;
1833bf215546Sopenharmony_ci}
1834bf215546Sopenharmony_ci
1835bf215546Sopenharmony_cistatic struct radv_color_blend_info
1836bf215546Sopenharmony_ciradv_pipeline_init_color_blend_info(struct radv_graphics_pipeline *pipeline,
1837bf215546Sopenharmony_ci                                    const VkGraphicsPipelineCreateInfo *pCreateInfo)
1838bf215546Sopenharmony_ci{
1839bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
1840bf215546Sopenharmony_ci   const VkPipelineColorBlendStateCreateInfo *cb = pCreateInfo->pColorBlendState;
1841bf215546Sopenharmony_ci   const VkPipelineRenderingCreateInfo *ri =
1842bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO);
1843bf215546Sopenharmony_ci   struct radv_color_blend_info info = {0};
1844bf215546Sopenharmony_ci   bool has_color_att = false;
1845bf215546Sopenharmony_ci
1846bf215546Sopenharmony_ci   for (uint32_t i = 0; i < ri->colorAttachmentCount; ++i) {
1847bf215546Sopenharmony_ci      if (ri->pColorAttachmentFormats[i] != VK_FORMAT_UNDEFINED) {
1848bf215546Sopenharmony_ci         has_color_att = true;
1849bf215546Sopenharmony_ci         break;
1850bf215546Sopenharmony_ci      }
1851bf215546Sopenharmony_ci   }
1852bf215546Sopenharmony_ci
1853bf215546Sopenharmony_ci   if (radv_is_raster_enabled(pipeline, pCreateInfo) && has_color_att) {
1854bf215546Sopenharmony_ci      for (uint32_t i = 0; i < cb->attachmentCount; i++) {
1855bf215546Sopenharmony_ci         const VkPipelineColorBlendAttachmentState *att = &cb->pAttachments[i];
1856bf215546Sopenharmony_ci
1857bf215546Sopenharmony_ci         info.att[i].color_write_mask = att->colorWriteMask;
1858bf215546Sopenharmony_ci         info.att[i].blend_enable = att->blendEnable;
1859bf215546Sopenharmony_ci         info.att[i].color_blend_op = si_translate_blend_function(att->colorBlendOp);
1860bf215546Sopenharmony_ci         info.att[i].alpha_blend_op = si_translate_blend_function(att->alphaBlendOp);
1861bf215546Sopenharmony_ci         info.att[i].src_color_blend_factor =
1862bf215546Sopenharmony_ci            si_translate_blend_factor(pdevice->rad_info.gfx_level, att->srcColorBlendFactor);
1863bf215546Sopenharmony_ci         info.att[i].dst_color_blend_factor =
1864bf215546Sopenharmony_ci            si_translate_blend_factor(pdevice->rad_info.gfx_level, att->dstColorBlendFactor);
1865bf215546Sopenharmony_ci         info.att[i].src_alpha_blend_factor =
1866bf215546Sopenharmony_ci            si_translate_blend_factor(pdevice->rad_info.gfx_level, att->srcAlphaBlendFactor);
1867bf215546Sopenharmony_ci         info.att[i].dst_alpha_blend_factor =
1868bf215546Sopenharmony_ci            si_translate_blend_factor(pdevice->rad_info.gfx_level, att->dstAlphaBlendFactor);
1869bf215546Sopenharmony_ci      }
1870bf215546Sopenharmony_ci      info.att_count = cb->attachmentCount;
1871bf215546Sopenharmony_ci
1872bf215546Sopenharmony_ci      for (uint32_t i = 0; i < 4; i++) {
1873bf215546Sopenharmony_ci         info.blend_constants[i] = cb->blendConstants[i];
1874bf215546Sopenharmony_ci      }
1875bf215546Sopenharmony_ci
1876bf215546Sopenharmony_ci      info.logic_op_enable = cb->logicOpEnable;
1877bf215546Sopenharmony_ci      if (info.logic_op_enable)
1878bf215546Sopenharmony_ci         info.logic_op = si_translate_blend_logic_op(cb->logicOp);
1879bf215546Sopenharmony_ci
1880bf215546Sopenharmony_ci      const VkPipelineColorWriteCreateInfoEXT *color_write_info =
1881bf215546Sopenharmony_ci         vk_find_struct_const(cb->pNext, PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
1882bf215546Sopenharmony_ci      if (color_write_info) {
1883bf215546Sopenharmony_ci         for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
1884bf215546Sopenharmony_ci            info.color_write_enable |=
1885bf215546Sopenharmony_ci               color_write_info->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
1886bf215546Sopenharmony_ci         }
1887bf215546Sopenharmony_ci      } else {
1888bf215546Sopenharmony_ci         info.color_write_enable = 0xffffffffu;
1889bf215546Sopenharmony_ci      }
1890bf215546Sopenharmony_ci   }
1891bf215546Sopenharmony_ci
1892bf215546Sopenharmony_ci   return info;
1893bf215546Sopenharmony_ci}
1894bf215546Sopenharmony_ci
1895bf215546Sopenharmony_cistatic struct radv_fragment_shading_rate_info
1896bf215546Sopenharmony_ciradv_pipeline_init_fragment_shading_rate_info(struct radv_graphics_pipeline *pipeline,
1897bf215546Sopenharmony_ci                                              const VkGraphicsPipelineCreateInfo *pCreateInfo)
1898bf215546Sopenharmony_ci{
1899bf215546Sopenharmony_ci   const VkPipelineFragmentShadingRateStateCreateInfoKHR *shading_rate =
1900bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
1901bf215546Sopenharmony_ci   struct radv_fragment_shading_rate_info info = {0};
1902bf215546Sopenharmony_ci
1903bf215546Sopenharmony_ci   if (shading_rate && !(pipeline->dynamic_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE)) {
1904bf215546Sopenharmony_ci      info.size = shading_rate->fragmentSize;
1905bf215546Sopenharmony_ci      for (int i = 0; i < 2; i++)
1906bf215546Sopenharmony_ci         info.combiner_ops[i] = shading_rate->combinerOps[i];
1907bf215546Sopenharmony_ci   } else {
1908bf215546Sopenharmony_ci      info.size = (VkExtent2D){ 1, 1 };
1909bf215546Sopenharmony_ci      info.combiner_ops[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
1910bf215546Sopenharmony_ci      info.combiner_ops[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
1911bf215546Sopenharmony_ci   }
1912bf215546Sopenharmony_ci
1913bf215546Sopenharmony_ci   return info;
1914bf215546Sopenharmony_ci}
1915bf215546Sopenharmony_ci
1916bf215546Sopenharmony_cistatic struct radv_graphics_pipeline_info
1917bf215546Sopenharmony_ciradv_pipeline_init_graphics_info(struct radv_graphics_pipeline *pipeline,
1918bf215546Sopenharmony_ci                                 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1919bf215546Sopenharmony_ci{
1920bf215546Sopenharmony_ci   struct radv_graphics_pipeline_info info = {0};
1921bf215546Sopenharmony_ci
1922bf215546Sopenharmony_ci   /* Vertex input interface structs have to be ignored if the pipeline includes a mesh shader. */
1923bf215546Sopenharmony_ci   if (!(pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV)) {
1924bf215546Sopenharmony_ci      info.vi = radv_pipeline_init_vertex_input_info(pipeline, pCreateInfo);
1925bf215546Sopenharmony_ci      info.ia = radv_pipeline_init_input_assembly_info(pipeline, pCreateInfo);
1926bf215546Sopenharmony_ci   }
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_ci   info.ts = radv_pipeline_init_tessellation_info(pipeline, pCreateInfo);
1929bf215546Sopenharmony_ci   info.vp = radv_pipeline_init_viewport_info(pipeline, pCreateInfo);
1930bf215546Sopenharmony_ci   info.rs = radv_pipeline_init_rasterization_info(pipeline, pCreateInfo);
1931bf215546Sopenharmony_ci   info.dr = radv_pipeline_init_discard_rectangle_info(pipeline, pCreateInfo);
1932bf215546Sopenharmony_ci
1933bf215546Sopenharmony_ci   info.ms = radv_pipeline_init_multisample_info(pipeline, pCreateInfo);
1934bf215546Sopenharmony_ci   info.ds = radv_pipeline_init_depth_stencil_info(pipeline, pCreateInfo);
1935bf215546Sopenharmony_ci   info.ri = radv_pipeline_init_rendering_info(pipeline, pCreateInfo);
1936bf215546Sopenharmony_ci   info.cb = radv_pipeline_init_color_blend_info(pipeline, pCreateInfo);
1937bf215546Sopenharmony_ci
1938bf215546Sopenharmony_ci   info.fsr = radv_pipeline_init_fragment_shading_rate_info(pipeline, pCreateInfo);
1939bf215546Sopenharmony_ci
1940bf215546Sopenharmony_ci   /* VK_AMD_mixed_attachment_samples */
1941bf215546Sopenharmony_ci   const VkAttachmentSampleCountInfoAMD *sample_info =
1942bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, ATTACHMENT_SAMPLE_COUNT_INFO_AMD);
1943bf215546Sopenharmony_ci   if (sample_info) {
1944bf215546Sopenharmony_ci      for (uint32_t i = 0; i < sample_info->colorAttachmentCount; ++i) {
1945bf215546Sopenharmony_ci         if (info.ri.color_att_formats[i] != VK_FORMAT_UNDEFINED) {
1946bf215546Sopenharmony_ci            info.color_att_samples = MAX2(info.color_att_samples, sample_info->pColorAttachmentSamples[i]);
1947bf215546Sopenharmony_ci         }
1948bf215546Sopenharmony_ci      }
1949bf215546Sopenharmony_ci      info.ds_att_samples = sample_info->depthStencilAttachmentSamples;
1950bf215546Sopenharmony_ci   }
1951bf215546Sopenharmony_ci
1952bf215546Sopenharmony_ci   return info;
1953bf215546Sopenharmony_ci}
1954bf215546Sopenharmony_ci
1955bf215546Sopenharmony_cistatic void
1956bf215546Sopenharmony_ciradv_pipeline_init_input_assembly_state(struct radv_graphics_pipeline *pipeline,
1957bf215546Sopenharmony_ci                                        const struct radv_graphics_pipeline_info *info)
1958bf215546Sopenharmony_ci{
1959bf215546Sopenharmony_ci   pipeline->ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(pipeline);
1960bf215546Sopenharmony_ci}
1961bf215546Sopenharmony_ci
1962bf215546Sopenharmony_cistatic void
1963bf215546Sopenharmony_ciradv_pipeline_init_dynamic_state(struct radv_graphics_pipeline *pipeline,
1964bf215546Sopenharmony_ci                                 const struct radv_graphics_pipeline_info *info)
1965bf215546Sopenharmony_ci{
1966bf215546Sopenharmony_ci   uint64_t needed_states = radv_pipeline_needed_dynamic_state(pipeline, info);
1967bf215546Sopenharmony_ci   uint64_t states = needed_states;
1968bf215546Sopenharmony_ci
1969bf215546Sopenharmony_ci   pipeline->dynamic_state = default_dynamic_state;
1970bf215546Sopenharmony_ci   pipeline->needed_dynamic_state = needed_states;
1971bf215546Sopenharmony_ci
1972bf215546Sopenharmony_ci   states &= ~pipeline->dynamic_states;
1973bf215546Sopenharmony_ci
1974bf215546Sopenharmony_ci   struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
1975bf215546Sopenharmony_ci
1976bf215546Sopenharmony_ci   if (needed_states & RADV_DYNAMIC_VIEWPORT) {
1977bf215546Sopenharmony_ci      dynamic->viewport.count = info->vp.viewport_count;
1978bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_VIEWPORT) {
1979bf215546Sopenharmony_ci         typed_memcpy(dynamic->viewport.viewports, info->vp.viewports, info->vp.viewport_count);
1980bf215546Sopenharmony_ci         for (unsigned i = 0; i < dynamic->viewport.count; i++)
1981bf215546Sopenharmony_ci            radv_get_viewport_xform(&dynamic->viewport.viewports[i],
1982bf215546Sopenharmony_ci                                    dynamic->viewport.xform[i].scale, dynamic->viewport.xform[i].translate);
1983bf215546Sopenharmony_ci      }
1984bf215546Sopenharmony_ci   }
1985bf215546Sopenharmony_ci
1986bf215546Sopenharmony_ci   if (needed_states & RADV_DYNAMIC_SCISSOR) {
1987bf215546Sopenharmony_ci      dynamic->scissor.count = info->vp.scissor_count;
1988bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_SCISSOR) {
1989bf215546Sopenharmony_ci         typed_memcpy(dynamic->scissor.scissors, info->vp.scissors, info->vp.scissor_count);
1990bf215546Sopenharmony_ci      }
1991bf215546Sopenharmony_ci   }
1992bf215546Sopenharmony_ci
1993bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_LINE_WIDTH) {
1994bf215546Sopenharmony_ci      dynamic->line_width = info->rs.line_width;
1995bf215546Sopenharmony_ci   }
1996bf215546Sopenharmony_ci
1997bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_DEPTH_BIAS) {
1998bf215546Sopenharmony_ci      dynamic->depth_bias.bias = info->rs.depth_bias_constant_factor;
1999bf215546Sopenharmony_ci      dynamic->depth_bias.clamp = info->rs.depth_bias_clamp;
2000bf215546Sopenharmony_ci      dynamic->depth_bias.slope = info->rs.depth_bias_slope_factor;
2001bf215546Sopenharmony_ci   }
2002bf215546Sopenharmony_ci
2003bf215546Sopenharmony_ci   /* Section 9.2 of the Vulkan 1.0.15 spec says:
2004bf215546Sopenharmony_ci    *
2005bf215546Sopenharmony_ci    *    pColorBlendState is [...] NULL if the pipeline has rasterization
2006bf215546Sopenharmony_ci    *    disabled or if the subpass of the render pass the pipeline is
2007bf215546Sopenharmony_ci    *    created against does not use any color attachments.
2008bf215546Sopenharmony_ci    */
2009bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_BLEND_CONSTANTS) {
2010bf215546Sopenharmony_ci      typed_memcpy(dynamic->blend_constants, info->cb.blend_constants, 4);
2011bf215546Sopenharmony_ci   }
2012bf215546Sopenharmony_ci
2013bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_CULL_MODE) {
2014bf215546Sopenharmony_ci      dynamic->cull_mode = info->rs.cull_mode;
2015bf215546Sopenharmony_ci   }
2016bf215546Sopenharmony_ci
2017bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_FRONT_FACE) {
2018bf215546Sopenharmony_ci      dynamic->front_face = info->rs.front_face;
2019bf215546Sopenharmony_ci   }
2020bf215546Sopenharmony_ci
2021bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
2022bf215546Sopenharmony_ci      dynamic->primitive_topology = info->ia.primitive_topology;
2023bf215546Sopenharmony_ci   }
2024bf215546Sopenharmony_ci
2025bf215546Sopenharmony_ci   /* If there is no depthstencil attachment, then don't read
2026bf215546Sopenharmony_ci    * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
2027bf215546Sopenharmony_ci    * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
2028bf215546Sopenharmony_ci    * no need to override the depthstencil defaults in
2029bf215546Sopenharmony_ci    * radv_pipeline::dynamic_state when there is no depthstencil attachment.
2030bf215546Sopenharmony_ci    *
2031bf215546Sopenharmony_ci    * Section 9.2 of the Vulkan 1.0.15 spec says:
2032bf215546Sopenharmony_ci    *
2033bf215546Sopenharmony_ci    *    pDepthStencilState is [...] NULL if the pipeline has rasterization
2034bf215546Sopenharmony_ci    *    disabled or if the subpass of the render pass the pipeline is created
2035bf215546Sopenharmony_ci    *    against does not use a depth/stencil attachment.
2036bf215546Sopenharmony_ci    */
2037bf215546Sopenharmony_ci   if (needed_states && radv_pipeline_has_ds_attachments(&info->ri)) {
2038bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
2039bf215546Sopenharmony_ci         dynamic->depth_bounds.min = info->ds.depth_bounds.min;
2040bf215546Sopenharmony_ci         dynamic->depth_bounds.max = info->ds.depth_bounds.max;
2041bf215546Sopenharmony_ci      }
2042bf215546Sopenharmony_ci
2043bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
2044bf215546Sopenharmony_ci         dynamic->stencil_compare_mask.front = info->ds.front.compare_mask;
2045bf215546Sopenharmony_ci         dynamic->stencil_compare_mask.back = info->ds.back.compare_mask;
2046bf215546Sopenharmony_ci      }
2047bf215546Sopenharmony_ci
2048bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
2049bf215546Sopenharmony_ci         dynamic->stencil_write_mask.front = info->ds.front.write_mask;
2050bf215546Sopenharmony_ci         dynamic->stencil_write_mask.back = info->ds.back.write_mask;
2051bf215546Sopenharmony_ci      }
2052bf215546Sopenharmony_ci
2053bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
2054bf215546Sopenharmony_ci         dynamic->stencil_reference.front = info->ds.front.reference;
2055bf215546Sopenharmony_ci         dynamic->stencil_reference.back = info->ds.back.reference;
2056bf215546Sopenharmony_ci      }
2057bf215546Sopenharmony_ci
2058bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
2059bf215546Sopenharmony_ci         dynamic->depth_test_enable = info->ds.depth_test_enable;
2060bf215546Sopenharmony_ci      }
2061bf215546Sopenharmony_ci
2062bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
2063bf215546Sopenharmony_ci         dynamic->depth_write_enable = info->ds.depth_write_enable;
2064bf215546Sopenharmony_ci      }
2065bf215546Sopenharmony_ci
2066bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
2067bf215546Sopenharmony_ci         dynamic->depth_compare_op = info->ds.depth_compare_op;
2068bf215546Sopenharmony_ci      }
2069bf215546Sopenharmony_ci
2070bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
2071bf215546Sopenharmony_ci         dynamic->depth_bounds_test_enable = info->ds.depth_bounds_test_enable;
2072bf215546Sopenharmony_ci      }
2073bf215546Sopenharmony_ci
2074bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
2075bf215546Sopenharmony_ci         dynamic->stencil_test_enable = info->ds.stencil_test_enable;
2076bf215546Sopenharmony_ci      }
2077bf215546Sopenharmony_ci
2078bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_STENCIL_OP) {
2079bf215546Sopenharmony_ci         dynamic->stencil_op.front.compare_op = info->ds.front.compare_op;
2080bf215546Sopenharmony_ci         dynamic->stencil_op.front.fail_op = info->ds.front.fail_op;
2081bf215546Sopenharmony_ci         dynamic->stencil_op.front.pass_op = info->ds.front.pass_op;
2082bf215546Sopenharmony_ci         dynamic->stencil_op.front.depth_fail_op = info->ds.front.depth_fail_op;
2083bf215546Sopenharmony_ci
2084bf215546Sopenharmony_ci         dynamic->stencil_op.back.compare_op = info->ds.back.compare_op;
2085bf215546Sopenharmony_ci         dynamic->stencil_op.back.fail_op = info->ds.back.fail_op;
2086bf215546Sopenharmony_ci         dynamic->stencil_op.back.pass_op = info->ds.back.pass_op;
2087bf215546Sopenharmony_ci         dynamic->stencil_op.back.depth_fail_op = info->ds.back.depth_fail_op;
2088bf215546Sopenharmony_ci      }
2089bf215546Sopenharmony_ci   }
2090bf215546Sopenharmony_ci
2091bf215546Sopenharmony_ci   if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
2092bf215546Sopenharmony_ci      dynamic->discard_rectangle.count = info->dr.count;
2093bf215546Sopenharmony_ci      if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
2094bf215546Sopenharmony_ci         typed_memcpy(dynamic->discard_rectangle.rectangles, info->dr.rects, info->dr.count);
2095bf215546Sopenharmony_ci      }
2096bf215546Sopenharmony_ci   }
2097bf215546Sopenharmony_ci
2098bf215546Sopenharmony_ci   if (needed_states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
2099bf215546Sopenharmony_ci      if (info->ms.sample_locs_enable) {
2100bf215546Sopenharmony_ci         dynamic->sample_location.per_pixel = info->ms.sample_locs_per_pixel;
2101bf215546Sopenharmony_ci         dynamic->sample_location.grid_size = info->ms.sample_locs_grid_size;
2102bf215546Sopenharmony_ci         dynamic->sample_location.count = info->ms.sample_locs_count;
2103bf215546Sopenharmony_ci         typed_memcpy(&dynamic->sample_location.locations[0], info->ms.sample_locs,
2104bf215546Sopenharmony_ci                      info->ms.sample_locs_count);
2105bf215546Sopenharmony_ci      }
2106bf215546Sopenharmony_ci   }
2107bf215546Sopenharmony_ci
2108bf215546Sopenharmony_ci   if (needed_states & RADV_DYNAMIC_LINE_STIPPLE) {
2109bf215546Sopenharmony_ci      dynamic->line_stipple.factor = info->rs.line_stipple_factor;
2110bf215546Sopenharmony_ci      dynamic->line_stipple.pattern = info->rs.line_stipple_pattern;
2111bf215546Sopenharmony_ci   }
2112bf215546Sopenharmony_ci
2113bf215546Sopenharmony_ci   if (!(states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE) ||
2114bf215546Sopenharmony_ci       !(states & RADV_DYNAMIC_VERTEX_INPUT))
2115bf215546Sopenharmony_ci      pipeline->uses_dynamic_stride = true;
2116bf215546Sopenharmony_ci
2117bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
2118bf215546Sopenharmony_ci      dynamic->fragment_shading_rate.size = info->fsr.size;
2119bf215546Sopenharmony_ci      for (int i = 0; i < 2; i++)
2120bf215546Sopenharmony_ci         dynamic->fragment_shading_rate.combiner_ops[i] = info->fsr.combiner_ops[i];
2121bf215546Sopenharmony_ci   }
2122bf215546Sopenharmony_ci
2123bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) {
2124bf215546Sopenharmony_ci      dynamic->depth_bias_enable = info->rs.depth_bias_enable;
2125bf215546Sopenharmony_ci   }
2126bf215546Sopenharmony_ci
2127bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) {
2128bf215546Sopenharmony_ci      dynamic->primitive_restart_enable = info->ia.primitive_restart_enable;
2129bf215546Sopenharmony_ci   }
2130bf215546Sopenharmony_ci
2131bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
2132bf215546Sopenharmony_ci      dynamic->rasterizer_discard_enable = info->rs.discard_enable;
2133bf215546Sopenharmony_ci   }
2134bf215546Sopenharmony_ci
2135bf215546Sopenharmony_ci   if (radv_pipeline_has_color_attachments(&info->ri) && states & RADV_DYNAMIC_LOGIC_OP) {
2136bf215546Sopenharmony_ci      if (info->cb.logic_op_enable) {
2137bf215546Sopenharmony_ci         dynamic->logic_op = info->cb.logic_op;
2138bf215546Sopenharmony_ci      } else {
2139bf215546Sopenharmony_ci         dynamic->logic_op = V_028808_ROP3_COPY;
2140bf215546Sopenharmony_ci      }
2141bf215546Sopenharmony_ci   }
2142bf215546Sopenharmony_ci
2143bf215546Sopenharmony_ci   if (states & RADV_DYNAMIC_COLOR_WRITE_ENABLE) {
2144bf215546Sopenharmony_ci      dynamic->color_write_enable = info->cb.color_write_enable;
2145bf215546Sopenharmony_ci   }
2146bf215546Sopenharmony_ci
2147bf215546Sopenharmony_ci   pipeline->dynamic_state.mask = states;
2148bf215546Sopenharmony_ci}
2149bf215546Sopenharmony_ci
2150bf215546Sopenharmony_cistatic void
2151bf215546Sopenharmony_ciradv_pipeline_init_raster_state(struct radv_graphics_pipeline *pipeline,
2152bf215546Sopenharmony_ci                                const struct radv_graphics_pipeline_info *info)
2153bf215546Sopenharmony_ci{
2154bf215546Sopenharmony_ci   const struct radv_device *device = pipeline->base.device;
2155bf215546Sopenharmony_ci
2156bf215546Sopenharmony_ci   pipeline->pa_su_sc_mode_cntl =
2157bf215546Sopenharmony_ci      S_028814_FACE(info->rs.front_face) |
2158bf215546Sopenharmony_ci      S_028814_CULL_FRONT(!!(info->rs.cull_mode & VK_CULL_MODE_FRONT_BIT)) |
2159bf215546Sopenharmony_ci      S_028814_CULL_BACK(!!(info->rs.cull_mode & VK_CULL_MODE_BACK_BIT)) |
2160bf215546Sopenharmony_ci      S_028814_POLY_MODE(info->rs.polygon_mode != V_028814_X_DRAW_TRIANGLES) |
2161bf215546Sopenharmony_ci      S_028814_POLYMODE_FRONT_PTYPE(info->rs.polygon_mode) |
2162bf215546Sopenharmony_ci      S_028814_POLYMODE_BACK_PTYPE(info->rs.polygon_mode) |
2163bf215546Sopenharmony_ci      S_028814_POLY_OFFSET_FRONT_ENABLE(info->rs.depth_bias_enable) |
2164bf215546Sopenharmony_ci      S_028814_POLY_OFFSET_BACK_ENABLE(info->rs.depth_bias_enable) |
2165bf215546Sopenharmony_ci      S_028814_POLY_OFFSET_PARA_ENABLE(info->rs.depth_bias_enable) |
2166bf215546Sopenharmony_ci      S_028814_PROVOKING_VTX_LAST(info->rs.provoking_vtx_last);
2167bf215546Sopenharmony_ci
2168bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level >= GFX10) {
2169bf215546Sopenharmony_ci      /* It should also be set if PERPENDICULAR_ENDCAP_ENA is set. */
2170bf215546Sopenharmony_ci      pipeline->pa_su_sc_mode_cntl |=
2171bf215546Sopenharmony_ci         S_028814_KEEP_TOGETHER_ENABLE(info->rs.polygon_mode != V_028814_X_DRAW_TRIANGLES);
2172bf215546Sopenharmony_ci   }
2173bf215546Sopenharmony_ci
2174bf215546Sopenharmony_ci   pipeline->pa_cl_clip_cntl =
2175bf215546Sopenharmony_ci      S_028810_DX_CLIP_SPACE_DEF(!pipeline->negative_one_to_one) |
2176bf215546Sopenharmony_ci      S_028810_ZCLIP_NEAR_DISABLE(info->rs.depth_clip_disable) |
2177bf215546Sopenharmony_ci      S_028810_ZCLIP_FAR_DISABLE(info->rs.depth_clip_disable) |
2178bf215546Sopenharmony_ci      S_028810_DX_RASTERIZATION_KILL(info->rs.discard_enable) |
2179bf215546Sopenharmony_ci      S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
2180bf215546Sopenharmony_ci
2181bf215546Sopenharmony_ci   pipeline->uses_conservative_overestimate =
2182bf215546Sopenharmony_ci      info->rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT;
2183bf215546Sopenharmony_ci
2184bf215546Sopenharmony_ci   pipeline->depth_clamp_mode = RADV_DEPTH_CLAMP_MODE_VIEWPORT;
2185bf215546Sopenharmony_ci   if (!info->rs.depth_clamp_enable) {
2186bf215546Sopenharmony_ci      /* For optimal performance, depth clamping should always be enabled except if the
2187bf215546Sopenharmony_ci       * application disables clamping explicitly or uses depth values outside of the [0.0, 1.0]
2188bf215546Sopenharmony_ci       * range.
2189bf215546Sopenharmony_ci       */
2190bf215546Sopenharmony_ci      if (info->rs.depth_clip_disable ||
2191bf215546Sopenharmony_ci          device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
2192bf215546Sopenharmony_ci         pipeline->depth_clamp_mode = RADV_DEPTH_CLAMP_MODE_DISABLED;
2193bf215546Sopenharmony_ci      } else {
2194bf215546Sopenharmony_ci         pipeline->depth_clamp_mode = RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE;
2195bf215546Sopenharmony_ci      }
2196bf215546Sopenharmony_ci   }
2197bf215546Sopenharmony_ci}
2198bf215546Sopenharmony_ci
2199bf215546Sopenharmony_cistatic struct radv_depth_stencil_state
2200bf215546Sopenharmony_ciradv_pipeline_init_depth_stencil_state(struct radv_graphics_pipeline *pipeline,
2201bf215546Sopenharmony_ci                                       const struct radv_graphics_pipeline_info *info)
2202bf215546Sopenharmony_ci{
2203bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
2204bf215546Sopenharmony_ci   struct radv_depth_stencil_state ds_state = {0};
2205bf215546Sopenharmony_ci   uint32_t db_depth_control = 0;
2206bf215546Sopenharmony_ci
2207bf215546Sopenharmony_ci   bool has_depth_attachment = info->ri.depth_att_format != VK_FORMAT_UNDEFINED;
2208bf215546Sopenharmony_ci   bool has_stencil_attachment = info->ri.stencil_att_format != VK_FORMAT_UNDEFINED;
2209bf215546Sopenharmony_ci
2210bf215546Sopenharmony_ci   if (has_depth_attachment) {
2211bf215546Sopenharmony_ci      /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
2212bf215546Sopenharmony_ci      ds_state.db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(info->ms.raster_samples > 2);
2213bf215546Sopenharmony_ci
2214bf215546Sopenharmony_ci      if (pdevice->rad_info.gfx_level >= GFX10_3)
2215bf215546Sopenharmony_ci         ds_state.db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE(1);
2216bf215546Sopenharmony_ci
2217bf215546Sopenharmony_ci      db_depth_control = S_028800_Z_ENABLE(info->ds.depth_test_enable) |
2218bf215546Sopenharmony_ci                         S_028800_Z_WRITE_ENABLE(info->ds.depth_write_enable) |
2219bf215546Sopenharmony_ci                         S_028800_ZFUNC(info->ds.depth_compare_op) |
2220bf215546Sopenharmony_ci                         S_028800_DEPTH_BOUNDS_ENABLE(info->ds.depth_bounds_test_enable);
2221bf215546Sopenharmony_ci   }
2222bf215546Sopenharmony_ci
2223bf215546Sopenharmony_ci   if (has_stencil_attachment && info->ds.stencil_test_enable) {
2224bf215546Sopenharmony_ci      db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
2225bf215546Sopenharmony_ci      db_depth_control |= S_028800_STENCILFUNC(info->ds.front.compare_op);
2226bf215546Sopenharmony_ci      db_depth_control |= S_028800_STENCILFUNC_BF(info->ds.back.compare_op);
2227bf215546Sopenharmony_ci   }
2228bf215546Sopenharmony_ci
2229bf215546Sopenharmony_ci   ds_state.db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
2230bf215546Sopenharmony_ci                                  S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
2231bf215546Sopenharmony_ci
2232bf215546Sopenharmony_ci   if (pipeline->depth_clamp_mode == RADV_DEPTH_CLAMP_MODE_DISABLED)
2233bf215546Sopenharmony_ci      ds_state.db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1);
2234bf215546Sopenharmony_ci
2235bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX11) {
2236bf215546Sopenharmony_ci      unsigned max_allowed_tiles_in_wave = 0;
2237bf215546Sopenharmony_ci      unsigned num_samples = MAX2(radv_pipeline_color_samples(info),
2238bf215546Sopenharmony_ci                                  radv_pipeline_depth_samples(info));
2239bf215546Sopenharmony_ci
2240bf215546Sopenharmony_ci      if (pdevice->rad_info.has_dedicated_vram) {
2241bf215546Sopenharmony_ci         if (num_samples == 8)
2242bf215546Sopenharmony_ci            max_allowed_tiles_in_wave = 7;
2243bf215546Sopenharmony_ci         else if (num_samples == 4)
2244bf215546Sopenharmony_ci            max_allowed_tiles_in_wave = 14;
2245bf215546Sopenharmony_ci      } else {
2246bf215546Sopenharmony_ci         if (num_samples == 8)
2247bf215546Sopenharmony_ci            max_allowed_tiles_in_wave = 8;
2248bf215546Sopenharmony_ci      }
2249bf215546Sopenharmony_ci
2250bf215546Sopenharmony_ci      /* TODO: We may want to disable this workaround for future chips. */
2251bf215546Sopenharmony_ci      if (num_samples >= 4) {
2252bf215546Sopenharmony_ci         if (max_allowed_tiles_in_wave)
2253bf215546Sopenharmony_ci            max_allowed_tiles_in_wave--;
2254bf215546Sopenharmony_ci         else
2255bf215546Sopenharmony_ci            max_allowed_tiles_in_wave = 15;
2256bf215546Sopenharmony_ci      }
2257bf215546Sopenharmony_ci
2258bf215546Sopenharmony_ci      ds_state.db_render_control |= S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) |
2259bf215546Sopenharmony_ci                                    S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave);
2260bf215546Sopenharmony_ci   }
2261bf215546Sopenharmony_ci
2262bf215546Sopenharmony_ci   pipeline->db_depth_control = db_depth_control;
2263bf215546Sopenharmony_ci
2264bf215546Sopenharmony_ci   return ds_state;
2265bf215546Sopenharmony_ci}
2266bf215546Sopenharmony_ci
2267bf215546Sopenharmony_cistatic void
2268bf215546Sopenharmony_cigfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline *pipeline,
2269bf215546Sopenharmony_ci                 struct radv_pipeline_stage *stages, struct gfx9_gs_info *out)
2270bf215546Sopenharmony_ci{
2271bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->device->physical_device;
2272bf215546Sopenharmony_ci   struct radv_shader_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info;
2273bf215546Sopenharmony_ci   struct radv_es_output_info *es_info;
2274bf215546Sopenharmony_ci   bool has_tess = !!stages[MESA_SHADER_TESS_CTRL].nir;
2275bf215546Sopenharmony_ci
2276bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX9)
2277bf215546Sopenharmony_ci      es_info = has_tess ? &gs_info->tes.es_info : &gs_info->vs.es_info;
2278bf215546Sopenharmony_ci   else
2279bf215546Sopenharmony_ci      es_info = has_tess ? &stages[MESA_SHADER_TESS_EVAL].info.tes.es_info
2280bf215546Sopenharmony_ci                         : &stages[MESA_SHADER_VERTEX].info.vs.es_info;
2281bf215546Sopenharmony_ci
2282bf215546Sopenharmony_ci   unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
2283bf215546Sopenharmony_ci   bool uses_adjacency;
2284bf215546Sopenharmony_ci   switch (key->vs.topology) {
2285bf215546Sopenharmony_ci   case V_008958_DI_PT_LINELIST_ADJ:
2286bf215546Sopenharmony_ci   case V_008958_DI_PT_LINESTRIP_ADJ:
2287bf215546Sopenharmony_ci   case V_008958_DI_PT_TRILIST_ADJ:
2288bf215546Sopenharmony_ci   case V_008958_DI_PT_TRISTRIP_ADJ:
2289bf215546Sopenharmony_ci      uses_adjacency = true;
2290bf215546Sopenharmony_ci      break;
2291bf215546Sopenharmony_ci   default:
2292bf215546Sopenharmony_ci      uses_adjacency = false;
2293bf215546Sopenharmony_ci      break;
2294bf215546Sopenharmony_ci   }
2295bf215546Sopenharmony_ci
2296bf215546Sopenharmony_ci   /* All these are in dwords: */
2297bf215546Sopenharmony_ci   /* We can't allow using the whole LDS, because GS waves compete with
2298bf215546Sopenharmony_ci    * other shader stages for LDS space. */
2299bf215546Sopenharmony_ci   const unsigned max_lds_size = 8 * 1024;
2300bf215546Sopenharmony_ci   const unsigned esgs_itemsize = es_info->esgs_itemsize / 4;
2301bf215546Sopenharmony_ci   unsigned esgs_lds_size;
2302bf215546Sopenharmony_ci
2303bf215546Sopenharmony_ci   /* All these are per subgroup: */
2304bf215546Sopenharmony_ci   const unsigned max_out_prims = 32 * 1024;
2305bf215546Sopenharmony_ci   const unsigned max_es_verts = 255;
2306bf215546Sopenharmony_ci   const unsigned ideal_gs_prims = 64;
2307bf215546Sopenharmony_ci   unsigned max_gs_prims, gs_prims;
2308bf215546Sopenharmony_ci   unsigned min_es_verts, es_verts, worst_case_es_verts;
2309bf215546Sopenharmony_ci
2310bf215546Sopenharmony_ci   if (uses_adjacency || gs_num_invocations > 1)
2311bf215546Sopenharmony_ci      max_gs_prims = 127 / gs_num_invocations;
2312bf215546Sopenharmony_ci   else
2313bf215546Sopenharmony_ci      max_gs_prims = 255;
2314bf215546Sopenharmony_ci
2315bf215546Sopenharmony_ci   /* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations.
2316bf215546Sopenharmony_ci    * Make sure we don't go over the maximum value.
2317bf215546Sopenharmony_ci    */
2318bf215546Sopenharmony_ci   if (gs_info->gs.vertices_out > 0) {
2319bf215546Sopenharmony_ci      max_gs_prims =
2320bf215546Sopenharmony_ci         MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations));
2321bf215546Sopenharmony_ci   }
2322bf215546Sopenharmony_ci   assert(max_gs_prims > 0);
2323bf215546Sopenharmony_ci
2324bf215546Sopenharmony_ci   /* If the primitive has adjacency, halve the number of vertices
2325bf215546Sopenharmony_ci    * that will be reused in multiple primitives.
2326bf215546Sopenharmony_ci    */
2327bf215546Sopenharmony_ci   min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1);
2328bf215546Sopenharmony_ci
2329bf215546Sopenharmony_ci   gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
2330bf215546Sopenharmony_ci   worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
2331bf215546Sopenharmony_ci
2332bf215546Sopenharmony_ci   /* Compute ESGS LDS size based on the worst case number of ES vertices
2333bf215546Sopenharmony_ci    * needed to create the target number of GS prims per subgroup.
2334bf215546Sopenharmony_ci    */
2335bf215546Sopenharmony_ci   esgs_lds_size = esgs_itemsize * worst_case_es_verts;
2336bf215546Sopenharmony_ci
2337bf215546Sopenharmony_ci   /* If total LDS usage is too big, refactor partitions based on ratio
2338bf215546Sopenharmony_ci    * of ESGS item sizes.
2339bf215546Sopenharmony_ci    */
2340bf215546Sopenharmony_ci   if (esgs_lds_size > max_lds_size) {
2341bf215546Sopenharmony_ci      /* Our target GS Prims Per Subgroup was too large. Calculate
2342bf215546Sopenharmony_ci       * the maximum number of GS Prims Per Subgroup that will fit
2343bf215546Sopenharmony_ci       * into LDS, capped by the maximum that the hardware can support.
2344bf215546Sopenharmony_ci       */
2345bf215546Sopenharmony_ci      gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)), max_gs_prims);
2346bf215546Sopenharmony_ci      assert(gs_prims > 0);
2347bf215546Sopenharmony_ci      worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
2348bf215546Sopenharmony_ci
2349bf215546Sopenharmony_ci      esgs_lds_size = esgs_itemsize * worst_case_es_verts;
2350bf215546Sopenharmony_ci      assert(esgs_lds_size <= max_lds_size);
2351bf215546Sopenharmony_ci   }
2352bf215546Sopenharmony_ci
2353bf215546Sopenharmony_ci   /* Now calculate remaining ESGS information. */
2354bf215546Sopenharmony_ci   if (esgs_lds_size)
2355bf215546Sopenharmony_ci      es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts);
2356bf215546Sopenharmony_ci   else
2357bf215546Sopenharmony_ci      es_verts = max_es_verts;
2358bf215546Sopenharmony_ci
2359bf215546Sopenharmony_ci   /* Vertices for adjacency primitives are not always reused, so restore
2360bf215546Sopenharmony_ci    * it for ES_VERTS_PER_SUBGRP.
2361bf215546Sopenharmony_ci    */
2362bf215546Sopenharmony_ci   min_es_verts = gs_info->gs.vertices_in;
2363bf215546Sopenharmony_ci
2364bf215546Sopenharmony_ci   /* For normal primitives, the VGT only checks if they are past the ES
2365bf215546Sopenharmony_ci    * verts per subgroup after allocating a full GS primitive and if they
2366bf215546Sopenharmony_ci    * are, kick off a new subgroup.  But if those additional ES verts are
2367bf215546Sopenharmony_ci    * unique (e.g. not reused) we need to make sure there is enough LDS
2368bf215546Sopenharmony_ci    * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP.
2369bf215546Sopenharmony_ci    */
2370bf215546Sopenharmony_ci   es_verts -= min_es_verts - 1;
2371bf215546Sopenharmony_ci
2372bf215546Sopenharmony_ci   uint32_t es_verts_per_subgroup = es_verts;
2373bf215546Sopenharmony_ci   uint32_t gs_prims_per_subgroup = gs_prims;
2374bf215546Sopenharmony_ci   uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
2375bf215546Sopenharmony_ci   uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
2376bf215546Sopenharmony_ci   out->lds_size = align(esgs_lds_size, 128) / 128;
2377bf215546Sopenharmony_ci   out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
2378bf215546Sopenharmony_ci                             S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
2379bf215546Sopenharmony_ci                             S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
2380bf215546Sopenharmony_ci   out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
2381bf215546Sopenharmony_ci   out->vgt_esgs_ring_itemsize = esgs_itemsize;
2382bf215546Sopenharmony_ci   assert(max_prims_per_subgroup <= max_out_prims);
2383bf215546Sopenharmony_ci
2384bf215546Sopenharmony_ci   gl_shader_stage es_stage = has_tess ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2385bf215546Sopenharmony_ci   unsigned workgroup_size = ac_compute_esgs_workgroup_size(
2386bf215546Sopenharmony_ci      pdevice->rad_info.gfx_level, stages[es_stage].info.wave_size,
2387bf215546Sopenharmony_ci      es_verts_per_subgroup, gs_inst_prims_in_subgroup);
2388bf215546Sopenharmony_ci   stages[es_stage].info.workgroup_size = workgroup_size;
2389bf215546Sopenharmony_ci   stages[MESA_SHADER_GEOMETRY].info.workgroup_size = workgroup_size;
2390bf215546Sopenharmony_ci}
2391bf215546Sopenharmony_ci
2392bf215546Sopenharmony_cistatic void
2393bf215546Sopenharmony_ciclamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim,
2394bf215546Sopenharmony_ci                         bool use_adjacency)
2395bf215546Sopenharmony_ci{
2396bf215546Sopenharmony_ci   unsigned max_reuse = max_esverts - min_verts_per_prim;
2397bf215546Sopenharmony_ci   if (use_adjacency)
2398bf215546Sopenharmony_ci      max_reuse /= 2;
2399bf215546Sopenharmony_ci   *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
2400bf215546Sopenharmony_ci}
2401bf215546Sopenharmony_ci
2402bf215546Sopenharmony_cistatic unsigned
2403bf215546Sopenharmony_ciradv_get_num_input_vertices(const struct radv_pipeline_stage *stages)
2404bf215546Sopenharmony_ci{
2405bf215546Sopenharmony_ci   if (stages[MESA_SHADER_GEOMETRY].nir) {
2406bf215546Sopenharmony_ci      nir_shader *gs = stages[MESA_SHADER_GEOMETRY].nir;
2407bf215546Sopenharmony_ci
2408bf215546Sopenharmony_ci      return gs->info.gs.vertices_in;
2409bf215546Sopenharmony_ci   }
2410bf215546Sopenharmony_ci
2411bf215546Sopenharmony_ci   if (stages[MESA_SHADER_TESS_CTRL].nir) {
2412bf215546Sopenharmony_ci      nir_shader *tes = stages[MESA_SHADER_TESS_EVAL].nir;
2413bf215546Sopenharmony_ci
2414bf215546Sopenharmony_ci      if (tes->info.tess.point_mode)
2415bf215546Sopenharmony_ci         return 1;
2416bf215546Sopenharmony_ci      if (tes->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES)
2417bf215546Sopenharmony_ci         return 2;
2418bf215546Sopenharmony_ci      return 3;
2419bf215546Sopenharmony_ci   }
2420bf215546Sopenharmony_ci
2421bf215546Sopenharmony_ci   return 3;
2422bf215546Sopenharmony_ci}
2423bf215546Sopenharmony_ci
2424bf215546Sopenharmony_cistatic void
2425bf215546Sopenharmony_cigfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
2426bf215546Sopenharmony_ci                       uint32_t oversub_pc_lines)
2427bf215546Sopenharmony_ci{
2428bf215546Sopenharmony_ci   radeon_set_uconfig_reg(
2429bf215546Sopenharmony_ci      cs, R_030980_GE_PC_ALLOC,
2430bf215546Sopenharmony_ci      S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
2431bf215546Sopenharmony_ci}
2432bf215546Sopenharmony_ci
2433bf215546Sopenharmony_cistatic void
2434bf215546Sopenharmony_cigfx10_get_ngg_ms_info(struct radv_pipeline_stage *stage, struct gfx10_ngg_info *ngg)
2435bf215546Sopenharmony_ci{
2436bf215546Sopenharmony_ci   /* Special case for mesh shader workgroups.
2437bf215546Sopenharmony_ci    *
2438bf215546Sopenharmony_ci    * Mesh shaders don't have any real vertex input, but they can produce
2439bf215546Sopenharmony_ci    * an arbitrary number of vertices and primitives (up to 256).
2440bf215546Sopenharmony_ci    * We need to precisely control the number of mesh shader workgroups
2441bf215546Sopenharmony_ci    * that are launched from draw calls.
2442bf215546Sopenharmony_ci    *
2443bf215546Sopenharmony_ci    * To achieve that, we set:
2444bf215546Sopenharmony_ci    * - input primitive topology to point list
2445bf215546Sopenharmony_ci    * - input vertex and primitive count to 1
2446bf215546Sopenharmony_ci    * - max output vertex count and primitive amplification factor
2447bf215546Sopenharmony_ci    *   to the boundaries of the shader
2448bf215546Sopenharmony_ci    *
2449bf215546Sopenharmony_ci    * With that, in the draw call:
2450bf215546Sopenharmony_ci    * - drawing 1 input vertex ~ launching 1 mesh shader workgroup
2451bf215546Sopenharmony_ci    *
2452bf215546Sopenharmony_ci    * In the shader:
2453bf215546Sopenharmony_ci    * - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
2454bf215546Sopenharmony_ci    * - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
2455bf215546Sopenharmony_ci    *
2456bf215546Sopenharmony_ci    * Notes:
2457bf215546Sopenharmony_ci    * - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work
2458bf215546Sopenharmony_ci    * - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs)
2459bf215546Sopenharmony_ci    * - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index
2460bf215546Sopenharmony_ci    *
2461bf215546Sopenharmony_ci    */
2462bf215546Sopenharmony_ci   nir_shader *ms = stage->nir;
2463bf215546Sopenharmony_ci
2464bf215546Sopenharmony_ci   ngg->enable_vertex_grouping = true;
2465bf215546Sopenharmony_ci   ngg->esgs_ring_size = 1;
2466bf215546Sopenharmony_ci   ngg->hw_max_esverts = 1;
2467bf215546Sopenharmony_ci   ngg->max_gsprims = 1;
2468bf215546Sopenharmony_ci   ngg->max_out_verts = ms->info.mesh.max_vertices_out;
2469bf215546Sopenharmony_ci   ngg->max_vert_out_per_gs_instance = false;
2470bf215546Sopenharmony_ci   ngg->ngg_emit_size = 0;
2471bf215546Sopenharmony_ci   ngg->prim_amp_factor = ms->info.mesh.max_primitives_out;
2472bf215546Sopenharmony_ci   ngg->vgt_esgs_ring_itemsize = 1;
2473bf215546Sopenharmony_ci
2474bf215546Sopenharmony_ci   unsigned min_ngg_workgroup_size =
2475bf215546Sopenharmony_ci      ac_compute_ngg_workgroup_size(ngg->hw_max_esverts, ngg->max_gsprims,
2476bf215546Sopenharmony_ci                                    ngg->max_out_verts, ngg->prim_amp_factor);
2477bf215546Sopenharmony_ci
2478bf215546Sopenharmony_ci   unsigned api_workgroup_size =
2479bf215546Sopenharmony_ci      ac_compute_cs_workgroup_size(ms->info.workgroup_size, false, UINT32_MAX);
2480bf215546Sopenharmony_ci
2481bf215546Sopenharmony_ci   stage->info.workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
2482bf215546Sopenharmony_ci}
2483bf215546Sopenharmony_ci
2484bf215546Sopenharmony_cistatic void
2485bf215546Sopenharmony_cigfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline,
2486bf215546Sopenharmony_ci                   struct radv_pipeline_stage *stages, struct gfx10_ngg_info *ngg)
2487bf215546Sopenharmony_ci{
2488bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->device->physical_device;
2489bf215546Sopenharmony_ci   struct radv_shader_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info;
2490bf215546Sopenharmony_ci   struct radv_es_output_info *es_info =
2491bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].nir ? &gs_info->tes.es_info : &gs_info->vs.es_info;
2492bf215546Sopenharmony_ci   unsigned gs_type = stages[MESA_SHADER_GEOMETRY].nir ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX;
2493bf215546Sopenharmony_ci   unsigned max_verts_per_prim = radv_get_num_input_vertices(stages);
2494bf215546Sopenharmony_ci   unsigned min_verts_per_prim = gs_type == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
2495bf215546Sopenharmony_ci   unsigned gs_num_invocations = stages[MESA_SHADER_GEOMETRY].nir ? MAX2(gs_info->gs.invocations, 1) : 1;
2496bf215546Sopenharmony_ci   bool uses_adjacency;
2497bf215546Sopenharmony_ci   switch (key->vs.topology) {
2498bf215546Sopenharmony_ci   case V_008958_DI_PT_LINELIST_ADJ:
2499bf215546Sopenharmony_ci   case V_008958_DI_PT_LINESTRIP_ADJ:
2500bf215546Sopenharmony_ci   case V_008958_DI_PT_TRILIST_ADJ:
2501bf215546Sopenharmony_ci   case V_008958_DI_PT_TRISTRIP_ADJ:
2502bf215546Sopenharmony_ci      uses_adjacency = true;
2503bf215546Sopenharmony_ci      break;
2504bf215546Sopenharmony_ci   default:
2505bf215546Sopenharmony_ci      uses_adjacency = false;
2506bf215546Sopenharmony_ci      break;
2507bf215546Sopenharmony_ci   }
2508bf215546Sopenharmony_ci
2509bf215546Sopenharmony_ci   /* All these are in dwords: */
2510bf215546Sopenharmony_ci   /* We can't allow using the whole LDS, because GS waves compete with
2511bf215546Sopenharmony_ci    * other shader stages for LDS space.
2512bf215546Sopenharmony_ci    *
2513bf215546Sopenharmony_ci    * TODO: We should really take the shader's internal LDS use into
2514bf215546Sopenharmony_ci    *       account. The linker will fail if the size is greater than
2515bf215546Sopenharmony_ci    *       8K dwords.
2516bf215546Sopenharmony_ci    */
2517bf215546Sopenharmony_ci   const unsigned max_lds_size = 8 * 1024 - 768;
2518bf215546Sopenharmony_ci   const unsigned target_lds_size = max_lds_size;
2519bf215546Sopenharmony_ci   unsigned esvert_lds_size = 0;
2520bf215546Sopenharmony_ci   unsigned gsprim_lds_size = 0;
2521bf215546Sopenharmony_ci
2522bf215546Sopenharmony_ci   /* All these are per subgroup: */
2523bf215546Sopenharmony_ci   const unsigned min_esverts = pdevice->rad_info.gfx_level >= GFX10_3 ? 29 : 24;
2524bf215546Sopenharmony_ci   bool max_vert_out_per_gs_instance = false;
2525bf215546Sopenharmony_ci   unsigned max_esverts_base = 128;
2526bf215546Sopenharmony_ci   unsigned max_gsprims_base = 128; /* default prim group size clamp */
2527bf215546Sopenharmony_ci
2528bf215546Sopenharmony_ci   /* Hardware has the following non-natural restrictions on the value
2529bf215546Sopenharmony_ci    * of GE_CNTL.VERT_GRP_SIZE based on based on the primitive type of
2530bf215546Sopenharmony_ci    * the draw:
2531bf215546Sopenharmony_ci    *  - at most 252 for any line input primitive type
2532bf215546Sopenharmony_ci    *  - at most 251 for any quad input primitive type
2533bf215546Sopenharmony_ci    *  - at most 251 for triangle strips with adjacency (this happens to
2534bf215546Sopenharmony_ci    *    be the natural limit for triangle *lists* with adjacency)
2535bf215546Sopenharmony_ci    */
2536bf215546Sopenharmony_ci   max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
2537bf215546Sopenharmony_ci
2538bf215546Sopenharmony_ci   if (gs_type == MESA_SHADER_GEOMETRY) {
2539bf215546Sopenharmony_ci      unsigned max_out_verts_per_gsprim = gs_info->gs.vertices_out * gs_num_invocations;
2540bf215546Sopenharmony_ci
2541bf215546Sopenharmony_ci      if (max_out_verts_per_gsprim <= 256) {
2542bf215546Sopenharmony_ci         if (max_out_verts_per_gsprim) {
2543bf215546Sopenharmony_ci            max_gsprims_base = MIN2(max_gsprims_base, 256 / max_out_verts_per_gsprim);
2544bf215546Sopenharmony_ci         }
2545bf215546Sopenharmony_ci      } else {
2546bf215546Sopenharmony_ci         /* Use special multi-cycling mode in which each GS
2547bf215546Sopenharmony_ci          * instance gets its own subgroup. Does not work with
2548bf215546Sopenharmony_ci          * tessellation. */
2549bf215546Sopenharmony_ci         max_vert_out_per_gs_instance = true;
2550bf215546Sopenharmony_ci         max_gsprims_base = 1;
2551bf215546Sopenharmony_ci         max_out_verts_per_gsprim = gs_info->gs.vertices_out;
2552bf215546Sopenharmony_ci      }
2553bf215546Sopenharmony_ci
2554bf215546Sopenharmony_ci      esvert_lds_size = es_info->esgs_itemsize / 4;
2555bf215546Sopenharmony_ci      gsprim_lds_size = (gs_info->gs.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
2556bf215546Sopenharmony_ci   } else {
2557bf215546Sopenharmony_ci      /* VS and TES. */
2558bf215546Sopenharmony_ci      /* LDS size for passing data from GS to ES. */
2559bf215546Sopenharmony_ci      struct radv_streamout_info *so_info = stages[MESA_SHADER_TESS_CTRL].nir
2560bf215546Sopenharmony_ci                                               ? &stages[MESA_SHADER_TESS_EVAL].info.so
2561bf215546Sopenharmony_ci                                               : &stages[MESA_SHADER_VERTEX].info.so;
2562bf215546Sopenharmony_ci
2563bf215546Sopenharmony_ci      if (so_info->num_outputs)
2564bf215546Sopenharmony_ci         esvert_lds_size = 4 * so_info->num_outputs + 1;
2565bf215546Sopenharmony_ci
2566bf215546Sopenharmony_ci      /* GS stores Primitive IDs (one DWORD) into LDS at the address
2567bf215546Sopenharmony_ci       * corresponding to the ES thread of the provoking vertex. All
2568bf215546Sopenharmony_ci       * ES threads load and export PrimitiveID for their thread.
2569bf215546Sopenharmony_ci       */
2570bf215546Sopenharmony_ci      if (!stages[MESA_SHADER_TESS_CTRL].nir && stages[MESA_SHADER_VERTEX].info.vs.outinfo.export_prim_id)
2571bf215546Sopenharmony_ci         esvert_lds_size = MAX2(esvert_lds_size, 1);
2572bf215546Sopenharmony_ci   }
2573bf215546Sopenharmony_ci
2574bf215546Sopenharmony_ci   unsigned max_gsprims = max_gsprims_base;
2575bf215546Sopenharmony_ci   unsigned max_esverts = max_esverts_base;
2576bf215546Sopenharmony_ci
2577bf215546Sopenharmony_ci   if (esvert_lds_size)
2578bf215546Sopenharmony_ci      max_esverts = MIN2(max_esverts, target_lds_size / esvert_lds_size);
2579bf215546Sopenharmony_ci   if (gsprim_lds_size)
2580bf215546Sopenharmony_ci      max_gsprims = MIN2(max_gsprims, target_lds_size / gsprim_lds_size);
2581bf215546Sopenharmony_ci
2582bf215546Sopenharmony_ci   max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
2583bf215546Sopenharmony_ci   clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
2584bf215546Sopenharmony_ci   assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
2585bf215546Sopenharmony_ci
2586bf215546Sopenharmony_ci   if (esvert_lds_size || gsprim_lds_size) {
2587bf215546Sopenharmony_ci      /* Now that we have a rough proportionality between esverts
2588bf215546Sopenharmony_ci       * and gsprims based on the primitive type, scale both of them
2589bf215546Sopenharmony_ci       * down simultaneously based on required LDS space.
2590bf215546Sopenharmony_ci       *
2591bf215546Sopenharmony_ci       * We could be smarter about this if we knew how much vertex
2592bf215546Sopenharmony_ci       * reuse to expect.
2593bf215546Sopenharmony_ci       */
2594bf215546Sopenharmony_ci      unsigned lds_total = max_esverts * esvert_lds_size + max_gsprims * gsprim_lds_size;
2595bf215546Sopenharmony_ci      if (lds_total > target_lds_size) {
2596bf215546Sopenharmony_ci         max_esverts = max_esverts * target_lds_size / lds_total;
2597bf215546Sopenharmony_ci         max_gsprims = max_gsprims * target_lds_size / lds_total;
2598bf215546Sopenharmony_ci
2599bf215546Sopenharmony_ci         max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
2600bf215546Sopenharmony_ci         clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
2601bf215546Sopenharmony_ci         assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
2602bf215546Sopenharmony_ci      }
2603bf215546Sopenharmony_ci   }
2604bf215546Sopenharmony_ci
2605bf215546Sopenharmony_ci   /* Round up towards full wave sizes for better ALU utilization. */
2606bf215546Sopenharmony_ci   if (!max_vert_out_per_gs_instance) {
2607bf215546Sopenharmony_ci      unsigned orig_max_esverts;
2608bf215546Sopenharmony_ci      unsigned orig_max_gsprims;
2609bf215546Sopenharmony_ci      unsigned wavesize;
2610bf215546Sopenharmony_ci
2611bf215546Sopenharmony_ci      if (gs_type == MESA_SHADER_GEOMETRY) {
2612bf215546Sopenharmony_ci         wavesize = gs_info->wave_size;
2613bf215546Sopenharmony_ci      } else {
2614bf215546Sopenharmony_ci         wavesize = stages[MESA_SHADER_TESS_CTRL].nir ? stages[MESA_SHADER_TESS_EVAL].info.wave_size
2615bf215546Sopenharmony_ci                                                      : stages[MESA_SHADER_VERTEX].info.wave_size;
2616bf215546Sopenharmony_ci      }
2617bf215546Sopenharmony_ci
2618bf215546Sopenharmony_ci      do {
2619bf215546Sopenharmony_ci         orig_max_esverts = max_esverts;
2620bf215546Sopenharmony_ci         orig_max_gsprims = max_gsprims;
2621bf215546Sopenharmony_ci
2622bf215546Sopenharmony_ci         max_esverts = align(max_esverts, wavesize);
2623bf215546Sopenharmony_ci         max_esverts = MIN2(max_esverts, max_esverts_base);
2624bf215546Sopenharmony_ci         if (esvert_lds_size)
2625bf215546Sopenharmony_ci            max_esverts =
2626bf215546Sopenharmony_ci               MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
2627bf215546Sopenharmony_ci         max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
2628bf215546Sopenharmony_ci
2629bf215546Sopenharmony_ci         /* Hardware restriction: minimum value of max_esverts */
2630bf215546Sopenharmony_ci         if (pdevice->rad_info.gfx_level == GFX10)
2631bf215546Sopenharmony_ci            max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
2632bf215546Sopenharmony_ci         else
2633bf215546Sopenharmony_ci            max_esverts = MAX2(max_esverts, min_esverts);
2634bf215546Sopenharmony_ci
2635bf215546Sopenharmony_ci         max_gsprims = align(max_gsprims, wavesize);
2636bf215546Sopenharmony_ci         max_gsprims = MIN2(max_gsprims, max_gsprims_base);
2637bf215546Sopenharmony_ci         if (gsprim_lds_size) {
2638bf215546Sopenharmony_ci            /* Don't count unusable vertices to the LDS
2639bf215546Sopenharmony_ci             * size. Those are vertices above the maximum
2640bf215546Sopenharmony_ci             * number of vertices that can occur in the
2641bf215546Sopenharmony_ci             * workgroup, which is e.g. max_gsprims * 3
2642bf215546Sopenharmony_ci             * for triangles.
2643bf215546Sopenharmony_ci             */
2644bf215546Sopenharmony_ci            unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
2645bf215546Sopenharmony_ci            max_gsprims = MIN2(max_gsprims,
2646bf215546Sopenharmony_ci                               (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
2647bf215546Sopenharmony_ci         }
2648bf215546Sopenharmony_ci         clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
2649bf215546Sopenharmony_ci         assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
2650bf215546Sopenharmony_ci      } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
2651bf215546Sopenharmony_ci
2652bf215546Sopenharmony_ci      /* Verify the restriction. */
2653bf215546Sopenharmony_ci      if (pdevice->rad_info.gfx_level == GFX10)
2654bf215546Sopenharmony_ci         assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
2655bf215546Sopenharmony_ci      else
2656bf215546Sopenharmony_ci         assert(max_esverts >= min_esverts);
2657bf215546Sopenharmony_ci   } else {
2658bf215546Sopenharmony_ci      /* Hardware restriction: minimum value of max_esverts */
2659bf215546Sopenharmony_ci      if (pdevice->rad_info.gfx_level == GFX10)
2660bf215546Sopenharmony_ci         max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
2661bf215546Sopenharmony_ci      else
2662bf215546Sopenharmony_ci         max_esverts = MAX2(max_esverts, min_esverts);
2663bf215546Sopenharmony_ci   }
2664bf215546Sopenharmony_ci
2665bf215546Sopenharmony_ci   unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out
2666bf215546Sopenharmony_ci                               : gs_type == MESA_SHADER_GEOMETRY
2667bf215546Sopenharmony_ci                                  ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out
2668bf215546Sopenharmony_ci                                  : max_esverts;
2669bf215546Sopenharmony_ci   assert(max_out_vertices <= 256);
2670bf215546Sopenharmony_ci
2671bf215546Sopenharmony_ci   unsigned prim_amp_factor = 1;
2672bf215546Sopenharmony_ci   if (gs_type == MESA_SHADER_GEOMETRY) {
2673bf215546Sopenharmony_ci      /* Number of output primitives per GS input primitive after
2674bf215546Sopenharmony_ci       * GS instancing. */
2675bf215546Sopenharmony_ci      prim_amp_factor = gs_info->gs.vertices_out;
2676bf215546Sopenharmony_ci   }
2677bf215546Sopenharmony_ci
2678bf215546Sopenharmony_ci   /* On Gfx10, the GE only checks against the maximum number of ES verts
2679bf215546Sopenharmony_ci    * after allocating a full GS primitive. So we need to ensure that
2680bf215546Sopenharmony_ci    * whenever this check passes, there is enough space for a full
2681bf215546Sopenharmony_ci    * primitive without vertex reuse.
2682bf215546Sopenharmony_ci    */
2683bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level == GFX10)
2684bf215546Sopenharmony_ci      ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1;
2685bf215546Sopenharmony_ci   else
2686bf215546Sopenharmony_ci      ngg->hw_max_esverts = max_esverts;
2687bf215546Sopenharmony_ci
2688bf215546Sopenharmony_ci   ngg->max_gsprims = max_gsprims;
2689bf215546Sopenharmony_ci   ngg->max_out_verts = max_out_vertices;
2690bf215546Sopenharmony_ci   ngg->prim_amp_factor = prim_amp_factor;
2691bf215546Sopenharmony_ci   ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
2692bf215546Sopenharmony_ci   ngg->ngg_emit_size = max_gsprims * gsprim_lds_size;
2693bf215546Sopenharmony_ci   ngg->enable_vertex_grouping = true;
2694bf215546Sopenharmony_ci
2695bf215546Sopenharmony_ci   /* Don't count unusable vertices. */
2696bf215546Sopenharmony_ci   ngg->esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4;
2697bf215546Sopenharmony_ci
2698bf215546Sopenharmony_ci   if (gs_type == MESA_SHADER_GEOMETRY) {
2699bf215546Sopenharmony_ci      ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
2700bf215546Sopenharmony_ci   } else {
2701bf215546Sopenharmony_ci      ngg->vgt_esgs_ring_itemsize = 1;
2702bf215546Sopenharmony_ci   }
2703bf215546Sopenharmony_ci
2704bf215546Sopenharmony_ci   assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */
2705bf215546Sopenharmony_ci
2706bf215546Sopenharmony_ci   gl_shader_stage es_stage = stages[MESA_SHADER_TESS_CTRL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2707bf215546Sopenharmony_ci   unsigned workgroup_size =
2708bf215546Sopenharmony_ci      ac_compute_ngg_workgroup_size(
2709bf215546Sopenharmony_ci         max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor);
2710bf215546Sopenharmony_ci   stages[MESA_SHADER_GEOMETRY].info.workgroup_size = workgroup_size;
2711bf215546Sopenharmony_ci   stages[es_stage].info.workgroup_size = workgroup_size;
2712bf215546Sopenharmony_ci}
2713bf215546Sopenharmony_ci
2714bf215546Sopenharmony_cistatic void
2715bf215546Sopenharmony_ciradv_pipeline_init_gs_ring_state(struct radv_graphics_pipeline *pipeline, const struct gfx9_gs_info *gs)
2716bf215546Sopenharmony_ci{
2717bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
2718bf215546Sopenharmony_ci   unsigned num_se = pdevice->rad_info.max_se;
2719bf215546Sopenharmony_ci   unsigned wave_size = 64;
2720bf215546Sopenharmony_ci   unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
2721bf215546Sopenharmony_ci   /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
2722bf215546Sopenharmony_ci    * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
2723bf215546Sopenharmony_ci    */
2724bf215546Sopenharmony_ci   unsigned gs_vertex_reuse = (pdevice->rad_info.gfx_level >= GFX8 ? 32 : 16) * num_se;
2725bf215546Sopenharmony_ci   unsigned alignment = 256 * num_se;
2726bf215546Sopenharmony_ci   /* The maximum size is 63.999 MB per SE. */
2727bf215546Sopenharmony_ci   unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
2728bf215546Sopenharmony_ci   struct radv_shader_info *gs_info = &pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info;
2729bf215546Sopenharmony_ci
2730bf215546Sopenharmony_ci   /* Calculate the minimum size. */
2731bf215546Sopenharmony_ci   unsigned min_esgs_ring_size =
2732bf215546Sopenharmony_ci      align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse * wave_size, alignment);
2733bf215546Sopenharmony_ci   /* These are recommended sizes, not minimum sizes. */
2734bf215546Sopenharmony_ci   unsigned esgs_ring_size =
2735bf215546Sopenharmony_ci      max_gs_waves * 2 * wave_size * gs->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in;
2736bf215546Sopenharmony_ci   unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size;
2737bf215546Sopenharmony_ci
2738bf215546Sopenharmony_ci   min_esgs_ring_size = align(min_esgs_ring_size, alignment);
2739bf215546Sopenharmony_ci   esgs_ring_size = align(esgs_ring_size, alignment);
2740bf215546Sopenharmony_ci   gsvs_ring_size = align(gsvs_ring_size, alignment);
2741bf215546Sopenharmony_ci
2742bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level <= GFX8)
2743bf215546Sopenharmony_ci      pipeline->esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
2744bf215546Sopenharmony_ci
2745bf215546Sopenharmony_ci   pipeline->gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
2746bf215546Sopenharmony_ci}
2747bf215546Sopenharmony_ci
2748bf215546Sopenharmony_cistruct radv_shader *
2749bf215546Sopenharmony_ciradv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage)
2750bf215546Sopenharmony_ci{
2751bf215546Sopenharmony_ci   if (stage == MESA_SHADER_VERTEX) {
2752bf215546Sopenharmony_ci      if (pipeline->shaders[MESA_SHADER_VERTEX])
2753bf215546Sopenharmony_ci         return pipeline->shaders[MESA_SHADER_VERTEX];
2754bf215546Sopenharmony_ci      if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
2755bf215546Sopenharmony_ci         return pipeline->shaders[MESA_SHADER_TESS_CTRL];
2756bf215546Sopenharmony_ci      if (pipeline->shaders[MESA_SHADER_GEOMETRY])
2757bf215546Sopenharmony_ci         return pipeline->shaders[MESA_SHADER_GEOMETRY];
2758bf215546Sopenharmony_ci   } else if (stage == MESA_SHADER_TESS_EVAL) {
2759bf215546Sopenharmony_ci      if (!pipeline->shaders[MESA_SHADER_TESS_CTRL])
2760bf215546Sopenharmony_ci         return NULL;
2761bf215546Sopenharmony_ci      if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
2762bf215546Sopenharmony_ci         return pipeline->shaders[MESA_SHADER_TESS_EVAL];
2763bf215546Sopenharmony_ci      if (pipeline->shaders[MESA_SHADER_GEOMETRY])
2764bf215546Sopenharmony_ci         return pipeline->shaders[MESA_SHADER_GEOMETRY];
2765bf215546Sopenharmony_ci   }
2766bf215546Sopenharmony_ci   return pipeline->shaders[stage];
2767bf215546Sopenharmony_ci}
2768bf215546Sopenharmony_ci
2769bf215546Sopenharmony_cistatic const struct radv_vs_output_info *
2770bf215546Sopenharmony_ciget_vs_output_info(const struct radv_graphics_pipeline *pipeline)
2771bf215546Sopenharmony_ci{
2772bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
2773bf215546Sopenharmony_ci      if (radv_pipeline_has_ngg(pipeline))
2774bf215546Sopenharmony_ci         return &pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.vs.outinfo;
2775bf215546Sopenharmony_ci      else
2776bf215546Sopenharmony_ci         return &pipeline->base.gs_copy_shader->info.vs.outinfo;
2777bf215546Sopenharmony_ci   else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
2778bf215546Sopenharmony_ci      return &pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo;
2779bf215546Sopenharmony_ci   else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
2780bf215546Sopenharmony_ci      return &pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.outinfo;
2781bf215546Sopenharmony_ci   else
2782bf215546Sopenharmony_ci      return &pipeline->base.shaders[MESA_SHADER_VERTEX]->info.vs.outinfo;
2783bf215546Sopenharmony_ci}
2784bf215546Sopenharmony_ci
2785bf215546Sopenharmony_cistatic bool
2786bf215546Sopenharmony_ciradv_lower_viewport_to_zero(nir_shader *nir)
2787bf215546Sopenharmony_ci{
2788bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2789bf215546Sopenharmony_ci   bool progress = false;
2790bf215546Sopenharmony_ci
2791bf215546Sopenharmony_ci   nir_builder b;
2792bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
2793bf215546Sopenharmony_ci
2794bf215546Sopenharmony_ci   /* There should be only one deref load for VIEWPORT after lower_io_to_temporaries. */
2795bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
2796bf215546Sopenharmony_ci      nir_foreach_instr(instr, block) {
2797bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
2798bf215546Sopenharmony_ci            continue;
2799bf215546Sopenharmony_ci
2800bf215546Sopenharmony_ci         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2801bf215546Sopenharmony_ci         if (intr->intrinsic != nir_intrinsic_load_deref)
2802bf215546Sopenharmony_ci            continue;
2803bf215546Sopenharmony_ci
2804bf215546Sopenharmony_ci         nir_variable *var = nir_intrinsic_get_var(intr, 0);
2805bf215546Sopenharmony_ci         if (var->data.mode != nir_var_shader_in ||
2806bf215546Sopenharmony_ci             var->data.location != VARYING_SLOT_VIEWPORT)
2807bf215546Sopenharmony_ci            continue;
2808bf215546Sopenharmony_ci
2809bf215546Sopenharmony_ci         b.cursor = nir_before_instr(instr);
2810bf215546Sopenharmony_ci
2811bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_imm_zero(&b, 1, 32));
2812bf215546Sopenharmony_ci         progress = true;
2813bf215546Sopenharmony_ci         break;
2814bf215546Sopenharmony_ci      }
2815bf215546Sopenharmony_ci      if (progress)
2816bf215546Sopenharmony_ci         break;
2817bf215546Sopenharmony_ci   }
2818bf215546Sopenharmony_ci
2819bf215546Sopenharmony_ci   if (progress)
2820bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
2821bf215546Sopenharmony_ci   else
2822bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
2823bf215546Sopenharmony_ci
2824bf215546Sopenharmony_ci   return progress;
2825bf215546Sopenharmony_ci}
2826bf215546Sopenharmony_ci
2827bf215546Sopenharmony_cistatic nir_variable *
2828bf215546Sopenharmony_cifind_layer_out_var(nir_shader *nir)
2829bf215546Sopenharmony_ci{
2830bf215546Sopenharmony_ci   nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_LAYER);
2831bf215546Sopenharmony_ci   if (var != NULL)
2832bf215546Sopenharmony_ci      return var;
2833bf215546Sopenharmony_ci
2834bf215546Sopenharmony_ci   var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), "layer id");
2835bf215546Sopenharmony_ci   var->data.location = VARYING_SLOT_LAYER;
2836bf215546Sopenharmony_ci   var->data.interpolation = INTERP_MODE_NONE;
2837bf215546Sopenharmony_ci
2838bf215546Sopenharmony_ci   return var;
2839bf215546Sopenharmony_ci}
2840bf215546Sopenharmony_ci
2841bf215546Sopenharmony_cistatic bool
2842bf215546Sopenharmony_ciradv_lower_multiview(nir_shader *nir)
2843bf215546Sopenharmony_ci{
2844bf215546Sopenharmony_ci   /* This pass is not suitable for mesh shaders, because it can't know
2845bf215546Sopenharmony_ci    * the mapping between API mesh shader invocations and output primitives.
2846bf215546Sopenharmony_ci    * Needs to be handled in ac_nir_lower_ngg.
2847bf215546Sopenharmony_ci    */
2848bf215546Sopenharmony_ci   if (nir->info.stage == MESA_SHADER_MESH)
2849bf215546Sopenharmony_ci      return false;
2850bf215546Sopenharmony_ci
2851bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2852bf215546Sopenharmony_ci   bool progress = false;
2853bf215546Sopenharmony_ci
2854bf215546Sopenharmony_ci   nir_builder b;
2855bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
2856bf215546Sopenharmony_ci
2857bf215546Sopenharmony_ci   /* Iterate in reverse order since there should be only one deref store to POS after
2858bf215546Sopenharmony_ci    * lower_io_to_temporaries for vertex shaders and inject the layer there. For geometry shaders,
2859bf215546Sopenharmony_ci    * the layer is injected right before every emit_vertex_with_counter.
2860bf215546Sopenharmony_ci    */
2861bf215546Sopenharmony_ci   nir_variable *layer = NULL;
2862bf215546Sopenharmony_ci   nir_foreach_block_reverse(block, impl) {
2863bf215546Sopenharmony_ci      nir_foreach_instr_reverse(instr, block) {
2864bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
2865bf215546Sopenharmony_ci            continue;
2866bf215546Sopenharmony_ci
2867bf215546Sopenharmony_ci         if (nir->info.stage == MESA_SHADER_GEOMETRY) {
2868bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2869bf215546Sopenharmony_ci            if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter)
2870bf215546Sopenharmony_ci               continue;
2871bf215546Sopenharmony_ci
2872bf215546Sopenharmony_ci            b.cursor = nir_before_instr(instr);
2873bf215546Sopenharmony_ci         } else {
2874bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2875bf215546Sopenharmony_ci            if (intr->intrinsic != nir_intrinsic_store_deref)
2876bf215546Sopenharmony_ci               continue;
2877bf215546Sopenharmony_ci
2878bf215546Sopenharmony_ci            nir_variable *var = nir_intrinsic_get_var(intr, 0);
2879bf215546Sopenharmony_ci            if (var->data.mode != nir_var_shader_out || var->data.location != VARYING_SLOT_POS)
2880bf215546Sopenharmony_ci               continue;
2881bf215546Sopenharmony_ci
2882bf215546Sopenharmony_ci            b.cursor = nir_after_instr(instr);
2883bf215546Sopenharmony_ci         }
2884bf215546Sopenharmony_ci
2885bf215546Sopenharmony_ci         if (!layer)
2886bf215546Sopenharmony_ci            layer = find_layer_out_var(nir);
2887bf215546Sopenharmony_ci
2888bf215546Sopenharmony_ci         nir_store_var(&b, layer, nir_load_view_index(&b), 1);
2889bf215546Sopenharmony_ci
2890bf215546Sopenharmony_ci         /* Update outputs_written to reflect that the pass added a new output. */
2891bf215546Sopenharmony_ci         nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_LAYER);
2892bf215546Sopenharmony_ci
2893bf215546Sopenharmony_ci         progress = true;
2894bf215546Sopenharmony_ci         if (nir->info.stage == MESA_SHADER_VERTEX)
2895bf215546Sopenharmony_ci            break;
2896bf215546Sopenharmony_ci      }
2897bf215546Sopenharmony_ci      if (nir->info.stage == MESA_SHADER_VERTEX && progress)
2898bf215546Sopenharmony_ci         break;
2899bf215546Sopenharmony_ci   }
2900bf215546Sopenharmony_ci
2901bf215546Sopenharmony_ci   if (progress)
2902bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
2903bf215546Sopenharmony_ci   else
2904bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
2905bf215546Sopenharmony_ci
2906bf215546Sopenharmony_ci   return progress;
2907bf215546Sopenharmony_ci}
2908bf215546Sopenharmony_ci
2909bf215546Sopenharmony_cistatic bool
2910bf215546Sopenharmony_ciradv_export_implicit_primitive_id(nir_shader *nir)
2911bf215546Sopenharmony_ci{
2912bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2913bf215546Sopenharmony_ci   nir_builder b;
2914bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
2915bf215546Sopenharmony_ci
2916bf215546Sopenharmony_ci   b.cursor = nir_after_cf_list(&impl->body);
2917bf215546Sopenharmony_ci
2918bf215546Sopenharmony_ci   nir_variable *var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), NULL);
2919bf215546Sopenharmony_ci   var->data.location = VARYING_SLOT_PRIMITIVE_ID;
2920bf215546Sopenharmony_ci   var->data.interpolation = INTERP_MODE_NONE;
2921bf215546Sopenharmony_ci
2922bf215546Sopenharmony_ci   nir_store_var(&b, var, nir_load_primitive_id(&b), 1);
2923bf215546Sopenharmony_ci
2924bf215546Sopenharmony_ci   /* Update outputs_written to reflect that the pass added a new output. */
2925bf215546Sopenharmony_ci   nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID);
2926bf215546Sopenharmony_ci
2927bf215546Sopenharmony_ci   nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
2928bf215546Sopenharmony_ci
2929bf215546Sopenharmony_ci   return true;
2930bf215546Sopenharmony_ci}
2931bf215546Sopenharmony_ci
2932bf215546Sopenharmony_cistatic void
2933bf215546Sopenharmony_ciradv_link_shaders(struct radv_pipeline *pipeline,
2934bf215546Sopenharmony_ci                  const struct radv_pipeline_key *pipeline_key,
2935bf215546Sopenharmony_ci                  const struct radv_pipeline_stage *stages,
2936bf215546Sopenharmony_ci                  bool optimize_conservatively,
2937bf215546Sopenharmony_ci                  gl_shader_stage last_vgt_api_stage)
2938bf215546Sopenharmony_ci{
2939bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->device->physical_device;
2940bf215546Sopenharmony_ci   nir_shader *ordered_shaders[MESA_VULKAN_SHADER_STAGES];
2941bf215546Sopenharmony_ci   int shader_count = 0;
2942bf215546Sopenharmony_ci
2943bf215546Sopenharmony_ci   if (stages[MESA_SHADER_FRAGMENT].nir) {
2944bf215546Sopenharmony_ci      ordered_shaders[shader_count++] = stages[MESA_SHADER_FRAGMENT].nir;
2945bf215546Sopenharmony_ci   }
2946bf215546Sopenharmony_ci   if (stages[MESA_SHADER_GEOMETRY].nir) {
2947bf215546Sopenharmony_ci      ordered_shaders[shader_count++] = stages[MESA_SHADER_GEOMETRY].nir;
2948bf215546Sopenharmony_ci   }
2949bf215546Sopenharmony_ci   if (stages[MESA_SHADER_TESS_EVAL].nir) {
2950bf215546Sopenharmony_ci      ordered_shaders[shader_count++] = stages[MESA_SHADER_TESS_EVAL].nir;
2951bf215546Sopenharmony_ci   }
2952bf215546Sopenharmony_ci   if (stages[MESA_SHADER_TESS_CTRL].nir) {
2953bf215546Sopenharmony_ci      ordered_shaders[shader_count++] = stages[MESA_SHADER_TESS_CTRL].nir;
2954bf215546Sopenharmony_ci   }
2955bf215546Sopenharmony_ci   if (stages[MESA_SHADER_VERTEX].nir) {
2956bf215546Sopenharmony_ci      ordered_shaders[shader_count++] = stages[MESA_SHADER_VERTEX].nir;
2957bf215546Sopenharmony_ci   }
2958bf215546Sopenharmony_ci   if (stages[MESA_SHADER_MESH].nir) {
2959bf215546Sopenharmony_ci      ordered_shaders[shader_count++] = stages[MESA_SHADER_MESH].nir;
2960bf215546Sopenharmony_ci   }
2961bf215546Sopenharmony_ci   if (stages[MESA_SHADER_TASK].nir) {
2962bf215546Sopenharmony_ci      ordered_shaders[shader_count++] = stages[MESA_SHADER_TASK].nir;
2963bf215546Sopenharmony_ci   }
2964bf215546Sopenharmony_ci   if (stages[MESA_SHADER_COMPUTE].nir) {
2965bf215546Sopenharmony_ci      ordered_shaders[shader_count++] = stages[MESA_SHADER_COMPUTE].nir;
2966bf215546Sopenharmony_ci   }
2967bf215546Sopenharmony_ci
2968bf215546Sopenharmony_ci   if (stages[MESA_SHADER_MESH].nir && stages[MESA_SHADER_FRAGMENT].nir) {
2969bf215546Sopenharmony_ci      nir_shader *ps = stages[MESA_SHADER_FRAGMENT].nir;
2970bf215546Sopenharmony_ci
2971bf215546Sopenharmony_ci      nir_foreach_shader_in_variable(var, ps) {
2972bf215546Sopenharmony_ci         /* These variables are per-primitive when used with a mesh shader. */
2973bf215546Sopenharmony_ci         if (var->data.location == VARYING_SLOT_PRIMITIVE_ID ||
2974bf215546Sopenharmony_ci             var->data.location == VARYING_SLOT_VIEWPORT ||
2975bf215546Sopenharmony_ci             var->data.location == VARYING_SLOT_LAYER)
2976bf215546Sopenharmony_ci            var->data.per_primitive = true;
2977bf215546Sopenharmony_ci      }
2978bf215546Sopenharmony_ci   }
2979bf215546Sopenharmony_ci
2980bf215546Sopenharmony_ci   bool has_geom_tess = stages[MESA_SHADER_GEOMETRY].nir || stages[MESA_SHADER_TESS_CTRL].nir;
2981bf215546Sopenharmony_ci   bool merged_gs = stages[MESA_SHADER_GEOMETRY].nir && pdevice->rad_info.gfx_level >= GFX9;
2982bf215546Sopenharmony_ci
2983bf215546Sopenharmony_ci   if (!optimize_conservatively && shader_count > 1) {
2984bf215546Sopenharmony_ci      unsigned first = ordered_shaders[shader_count - 1]->info.stage;
2985bf215546Sopenharmony_ci      unsigned last = ordered_shaders[0]->info.stage;
2986bf215546Sopenharmony_ci
2987bf215546Sopenharmony_ci      if (ordered_shaders[0]->info.stage == MESA_SHADER_FRAGMENT &&
2988bf215546Sopenharmony_ci          ordered_shaders[1]->info.has_transform_feedback_varyings)
2989bf215546Sopenharmony_ci         nir_link_xfb_varyings(ordered_shaders[1], ordered_shaders[0]);
2990bf215546Sopenharmony_ci
2991bf215546Sopenharmony_ci      for (int i = 1; i < shader_count; ++i) {
2992bf215546Sopenharmony_ci         nir_lower_io_arrays_to_elements(ordered_shaders[i], ordered_shaders[i - 1]);
2993bf215546Sopenharmony_ci         nir_validate_shader(ordered_shaders[i], "after nir_lower_io_arrays_to_elements");
2994bf215546Sopenharmony_ci         nir_validate_shader(ordered_shaders[i - 1], "after nir_lower_io_arrays_to_elements");
2995bf215546Sopenharmony_ci      }
2996bf215546Sopenharmony_ci
2997bf215546Sopenharmony_ci      for (int i = 0; i < shader_count; ++i) {
2998bf215546Sopenharmony_ci         nir_variable_mode mask = 0;
2999bf215546Sopenharmony_ci
3000bf215546Sopenharmony_ci         if (ordered_shaders[i]->info.stage != first)
3001bf215546Sopenharmony_ci            mask = mask | nir_var_shader_in;
3002bf215546Sopenharmony_ci
3003bf215546Sopenharmony_ci         if (ordered_shaders[i]->info.stage != last)
3004bf215546Sopenharmony_ci            mask = mask | nir_var_shader_out;
3005bf215546Sopenharmony_ci
3006bf215546Sopenharmony_ci         bool progress = false;
3007bf215546Sopenharmony_ci         NIR_PASS(progress, ordered_shaders[i], nir_lower_io_to_scalar_early, mask);
3008bf215546Sopenharmony_ci         if (progress) {
3009bf215546Sopenharmony_ci            /* Optimize the new vector code and then remove dead vars */
3010bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_copy_prop);
3011bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_opt_shrink_vectors);
3012bf215546Sopenharmony_ci
3013bf215546Sopenharmony_ci            if (ordered_shaders[i]->info.stage != last) {
3014bf215546Sopenharmony_ci               /* Optimize swizzled movs of load_const for
3015bf215546Sopenharmony_ci                * nir_link_opt_varyings's constant propagation
3016bf215546Sopenharmony_ci                */
3017bf215546Sopenharmony_ci               NIR_PASS(_, ordered_shaders[i], nir_opt_constant_folding);
3018bf215546Sopenharmony_ci               /* For nir_link_opt_varyings's duplicate input opt */
3019bf215546Sopenharmony_ci               NIR_PASS(_, ordered_shaders[i], nir_opt_cse);
3020bf215546Sopenharmony_ci            }
3021bf215546Sopenharmony_ci
3022bf215546Sopenharmony_ci            /* Run copy-propagation to help remove dead
3023bf215546Sopenharmony_ci             * output variables (some shaders have useless
3024bf215546Sopenharmony_ci             * copies to/from an output), so compaction
3025bf215546Sopenharmony_ci             * later will be more effective.
3026bf215546Sopenharmony_ci             *
3027bf215546Sopenharmony_ci             * This will have been done earlier but it might
3028bf215546Sopenharmony_ci             * not have worked because the outputs were vector.
3029bf215546Sopenharmony_ci             */
3030bf215546Sopenharmony_ci            if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
3031bf215546Sopenharmony_ci               NIR_PASS(_, ordered_shaders[i], nir_opt_copy_prop_vars);
3032bf215546Sopenharmony_ci
3033bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_opt_dce);
3034bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_remove_dead_variables,
3035bf215546Sopenharmony_ci                     nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
3036bf215546Sopenharmony_ci         }
3037bf215546Sopenharmony_ci      }
3038bf215546Sopenharmony_ci   }
3039bf215546Sopenharmony_ci
3040bf215546Sopenharmony_ci   /* Export the primitive ID when VS or TES don't export it because it's implicit, while it's
3041bf215546Sopenharmony_ci    * required for GS or MS. The primitive ID is added during lowering for NGG.
3042bf215546Sopenharmony_ci    */
3043bf215546Sopenharmony_ci   if (stages[MESA_SHADER_FRAGMENT].nir &&
3044bf215546Sopenharmony_ci       (stages[MESA_SHADER_FRAGMENT].nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) &&
3045bf215546Sopenharmony_ci       !(stages[last_vgt_api_stage].nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID) &&
3046bf215546Sopenharmony_ci       ((last_vgt_api_stage == MESA_SHADER_VERTEX && !stages[MESA_SHADER_VERTEX].info.is_ngg) ||
3047bf215546Sopenharmony_ci        (last_vgt_api_stage == MESA_SHADER_TESS_EVAL && !stages[MESA_SHADER_TESS_EVAL].info.is_ngg))) {
3048bf215546Sopenharmony_ci      radv_export_implicit_primitive_id(stages[last_vgt_api_stage].nir);
3049bf215546Sopenharmony_ci   }
3050bf215546Sopenharmony_ci
3051bf215546Sopenharmony_ci   if (!optimize_conservatively) {
3052bf215546Sopenharmony_ci      bool uses_xfb = last_vgt_api_stage != -1 &&
3053bf215546Sopenharmony_ci                      stages[last_vgt_api_stage].nir->xfb_info;
3054bf215546Sopenharmony_ci
3055bf215546Sopenharmony_ci      for (unsigned i = 0; i < shader_count; ++i) {
3056bf215546Sopenharmony_ci         shader_info *info = &ordered_shaders[i]->info;
3057bf215546Sopenharmony_ci
3058bf215546Sopenharmony_ci         /* Remove exports without color attachment or writemask. */
3059bf215546Sopenharmony_ci         if (info->stage == MESA_SHADER_FRAGMENT) {
3060bf215546Sopenharmony_ci            bool fixup_derefs = false;
3061bf215546Sopenharmony_ci            nir_foreach_variable_with_modes(var, ordered_shaders[i], nir_var_shader_out) {
3062bf215546Sopenharmony_ci               int idx = var->data.location;
3063bf215546Sopenharmony_ci               idx -= FRAG_RESULT_DATA0;
3064bf215546Sopenharmony_ci               if (idx < 0)
3065bf215546Sopenharmony_ci                  continue;
3066bf215546Sopenharmony_ci
3067bf215546Sopenharmony_ci               unsigned col_format = (pipeline_key->ps.col_format >> (4 * idx)) & 0xf;
3068bf215546Sopenharmony_ci               unsigned cb_target_mask = (pipeline_key->ps.cb_target_mask >> (4 * idx)) & 0xf;
3069bf215546Sopenharmony_ci
3070bf215546Sopenharmony_ci               if (col_format == V_028714_SPI_SHADER_ZERO ||
3071bf215546Sopenharmony_ci                   (col_format == V_028714_SPI_SHADER_32_R && !cb_target_mask &&
3072bf215546Sopenharmony_ci                    !pipeline_key->ps.mrt0_is_dual_src)) {
3073bf215546Sopenharmony_ci                  /* Remove the color export if it's unused or in presence of holes. */
3074bf215546Sopenharmony_ci                  info->outputs_written &= ~BITFIELD64_BIT(var->data.location);
3075bf215546Sopenharmony_ci                  var->data.location = 0;
3076bf215546Sopenharmony_ci                  var->data.mode = nir_var_shader_temp;
3077bf215546Sopenharmony_ci                  fixup_derefs = true;
3078bf215546Sopenharmony_ci               }
3079bf215546Sopenharmony_ci            }
3080bf215546Sopenharmony_ci            if (fixup_derefs) {
3081bf215546Sopenharmony_ci               NIR_PASS_V(ordered_shaders[i], nir_fixup_deref_modes);
3082bf215546Sopenharmony_ci               NIR_PASS(_, ordered_shaders[i], nir_remove_dead_variables, nir_var_shader_temp,
3083bf215546Sopenharmony_ci                        NULL);
3084bf215546Sopenharmony_ci               NIR_PASS(_, ordered_shaders[i], nir_opt_dce);
3085bf215546Sopenharmony_ci            }
3086bf215546Sopenharmony_ci            continue;
3087bf215546Sopenharmony_ci         }
3088bf215546Sopenharmony_ci
3089bf215546Sopenharmony_ci         /* Remove PSIZ from shaders when it's not needed.
3090bf215546Sopenharmony_ci          * This is typically produced by translation layers like Zink or D9VK.
3091bf215546Sopenharmony_ci          */
3092bf215546Sopenharmony_ci         if (uses_xfb || !(info->outputs_written & VARYING_BIT_PSIZ))
3093bf215546Sopenharmony_ci            continue;
3094bf215546Sopenharmony_ci
3095bf215546Sopenharmony_ci         bool next_stage_needs_psiz =
3096bf215546Sopenharmony_ci            i != 0 && /* ordered_shaders is backwards, so next stage is: i - 1 */
3097bf215546Sopenharmony_ci            ordered_shaders[i - 1]->info.inputs_read & VARYING_BIT_PSIZ;
3098bf215546Sopenharmony_ci         bool topology_uses_psiz =
3099bf215546Sopenharmony_ci            info->stage == last_vgt_api_stage &&
3100bf215546Sopenharmony_ci            ((info->stage == MESA_SHADER_VERTEX && pipeline_key->vs.topology == V_008958_DI_PT_POINTLIST) ||
3101bf215546Sopenharmony_ci             (info->stage == MESA_SHADER_TESS_EVAL && info->tess.point_mode) ||
3102bf215546Sopenharmony_ci             (info->stage == MESA_SHADER_GEOMETRY && info->gs.output_primitive == SHADER_PRIM_POINTS) ||
3103bf215546Sopenharmony_ci             (info->stage == MESA_SHADER_MESH && info->mesh.primitive_type == SHADER_PRIM_POINTS));
3104bf215546Sopenharmony_ci
3105bf215546Sopenharmony_ci         nir_variable *psiz_var =
3106bf215546Sopenharmony_ci               nir_find_variable_with_location(ordered_shaders[i], nir_var_shader_out, VARYING_SLOT_PSIZ);
3107bf215546Sopenharmony_ci
3108bf215546Sopenharmony_ci         if (!next_stage_needs_psiz && !topology_uses_psiz && psiz_var) {
3109bf215546Sopenharmony_ci            /* Change PSIZ to a global variable which allows it to be DCE'd. */
3110bf215546Sopenharmony_ci            psiz_var->data.location = 0;
3111bf215546Sopenharmony_ci            psiz_var->data.mode = nir_var_shader_temp;
3112bf215546Sopenharmony_ci
3113bf215546Sopenharmony_ci            info->outputs_written &= ~VARYING_BIT_PSIZ;
3114bf215546Sopenharmony_ci            NIR_PASS_V(ordered_shaders[i], nir_fixup_deref_modes);
3115bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_remove_dead_variables, nir_var_shader_temp, NULL);
3116bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_opt_dce);
3117bf215546Sopenharmony_ci         }
3118bf215546Sopenharmony_ci      }
3119bf215546Sopenharmony_ci   }
3120bf215546Sopenharmony_ci
3121bf215546Sopenharmony_ci   /* Lower the viewport index to zero when the last vertex stage doesn't export it. */
3122bf215546Sopenharmony_ci   if (stages[MESA_SHADER_FRAGMENT].nir &&
3123bf215546Sopenharmony_ci       (stages[MESA_SHADER_FRAGMENT].nir->info.inputs_read & VARYING_BIT_VIEWPORT) &&
3124bf215546Sopenharmony_ci       !(stages[last_vgt_api_stage].nir->info.outputs_written & VARYING_BIT_VIEWPORT)) {
3125bf215546Sopenharmony_ci      NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_lower_viewport_to_zero);
3126bf215546Sopenharmony_ci   }
3127bf215546Sopenharmony_ci
3128bf215546Sopenharmony_ci   /* Export the layer in the last VGT stage if multiview is used. */
3129bf215546Sopenharmony_ci   if (pipeline_key->has_multiview_view_index && last_vgt_api_stage != -1 &&
3130bf215546Sopenharmony_ci       !(stages[last_vgt_api_stage].nir->info.outputs_written &
3131bf215546Sopenharmony_ci         VARYING_BIT_LAYER)) {
3132bf215546Sopenharmony_ci      nir_shader *last_vgt_shader = stages[last_vgt_api_stage].nir;
3133bf215546Sopenharmony_ci      NIR_PASS(_, last_vgt_shader, radv_lower_multiview);
3134bf215546Sopenharmony_ci   }
3135bf215546Sopenharmony_ci
3136bf215546Sopenharmony_ci   for (int i = 1; !optimize_conservatively && (i < shader_count); ++i) {
3137bf215546Sopenharmony_ci      if (nir_link_opt_varyings(ordered_shaders[i], ordered_shaders[i - 1])) {
3138bf215546Sopenharmony_ci         nir_validate_shader(ordered_shaders[i], "after nir_link_opt_varyings");
3139bf215546Sopenharmony_ci         nir_validate_shader(ordered_shaders[i - 1], "after nir_link_opt_varyings");
3140bf215546Sopenharmony_ci
3141bf215546Sopenharmony_ci         NIR_PASS(_, ordered_shaders[i - 1], nir_opt_constant_folding);
3142bf215546Sopenharmony_ci         NIR_PASS(_, ordered_shaders[i - 1], nir_opt_algebraic);
3143bf215546Sopenharmony_ci         NIR_PASS(_, ordered_shaders[i - 1], nir_opt_dce);
3144bf215546Sopenharmony_ci      }
3145bf215546Sopenharmony_ci
3146bf215546Sopenharmony_ci      NIR_PASS(_, ordered_shaders[i], nir_remove_dead_variables, nir_var_shader_out, NULL);
3147bf215546Sopenharmony_ci      NIR_PASS(_, ordered_shaders[i - 1], nir_remove_dead_variables, nir_var_shader_in, NULL);
3148bf215546Sopenharmony_ci
3149bf215546Sopenharmony_ci      bool progress = nir_remove_unused_varyings(ordered_shaders[i], ordered_shaders[i - 1]);
3150bf215546Sopenharmony_ci
3151bf215546Sopenharmony_ci      nir_compact_varyings(ordered_shaders[i], ordered_shaders[i - 1], true);
3152bf215546Sopenharmony_ci      nir_validate_shader(ordered_shaders[i], "after nir_compact_varyings");
3153bf215546Sopenharmony_ci      nir_validate_shader(ordered_shaders[i - 1], "after nir_compact_varyings");
3154bf215546Sopenharmony_ci      if (ordered_shaders[i]->info.stage == MESA_SHADER_MESH) {
3155bf215546Sopenharmony_ci         /* nir_compact_varyings can change the location of per-vertex and per-primitive outputs */
3156bf215546Sopenharmony_ci         nir_shader_gather_info(ordered_shaders[i], nir_shader_get_entrypoint(ordered_shaders[i]));
3157bf215546Sopenharmony_ci      }
3158bf215546Sopenharmony_ci
3159bf215546Sopenharmony_ci      if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL ||
3160bf215546Sopenharmony_ci          ordered_shaders[i]->info.stage == MESA_SHADER_MESH ||
3161bf215546Sopenharmony_ci          (ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) ||
3162bf215546Sopenharmony_ci          (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
3163bf215546Sopenharmony_ci         NIR_PASS(_, ordered_shaders[i], nir_lower_io_to_vector, nir_var_shader_out);
3164bf215546Sopenharmony_ci         if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
3165bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_vectorize_tess_levels);
3166bf215546Sopenharmony_ci         NIR_PASS(_, ordered_shaders[i], nir_opt_combine_stores, nir_var_shader_out);
3167bf215546Sopenharmony_ci      }
3168bf215546Sopenharmony_ci      if (ordered_shaders[i - 1]->info.stage == MESA_SHADER_GEOMETRY ||
3169bf215546Sopenharmony_ci          ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_CTRL ||
3170bf215546Sopenharmony_ci          ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_EVAL) {
3171bf215546Sopenharmony_ci         NIR_PASS(_, ordered_shaders[i - 1], nir_lower_io_to_vector, nir_var_shader_in);
3172bf215546Sopenharmony_ci      }
3173bf215546Sopenharmony_ci
3174bf215546Sopenharmony_ci      if (progress) {
3175bf215546Sopenharmony_ci         progress = false;
3176bf215546Sopenharmony_ci         NIR_PASS(progress, ordered_shaders[i], nir_lower_global_vars_to_local);
3177bf215546Sopenharmony_ci         if (progress) {
3178bf215546Sopenharmony_ci            ac_nir_lower_indirect_derefs(ordered_shaders[i], pdevice->rad_info.gfx_level);
3179bf215546Sopenharmony_ci            /* remove dead writes, which can remove input loads */
3180bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_lower_vars_to_ssa);
3181bf215546Sopenharmony_ci            NIR_PASS(_, ordered_shaders[i], nir_opt_dce);
3182bf215546Sopenharmony_ci         }
3183bf215546Sopenharmony_ci
3184bf215546Sopenharmony_ci         progress = false;
3185bf215546Sopenharmony_ci         NIR_PASS(progress, ordered_shaders[i - 1], nir_lower_global_vars_to_local);
3186bf215546Sopenharmony_ci         if (progress) {
3187bf215546Sopenharmony_ci            ac_nir_lower_indirect_derefs(ordered_shaders[i - 1], pdevice->rad_info.gfx_level);
3188bf215546Sopenharmony_ci         }
3189bf215546Sopenharmony_ci      }
3190bf215546Sopenharmony_ci   }
3191bf215546Sopenharmony_ci}
3192bf215546Sopenharmony_ci
3193bf215546Sopenharmony_cistatic void
3194bf215546Sopenharmony_ciradv_set_driver_locations(struct radv_pipeline *pipeline, struct radv_pipeline_stage *stages,
3195bf215546Sopenharmony_ci                          gl_shader_stage last_vgt_api_stage)
3196bf215546Sopenharmony_ci{
3197bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->device->physical_device;
3198bf215546Sopenharmony_ci
3199bf215546Sopenharmony_ci   if (stages[MESA_SHADER_FRAGMENT].nir) {
3200bf215546Sopenharmony_ci      nir_foreach_shader_out_variable(var, stages[MESA_SHADER_FRAGMENT].nir)
3201bf215546Sopenharmony_ci      {
3202bf215546Sopenharmony_ci         var->data.driver_location = var->data.location + var->data.index;
3203bf215546Sopenharmony_ci      }
3204bf215546Sopenharmony_ci   }
3205bf215546Sopenharmony_ci
3206bf215546Sopenharmony_ci   if (stages[MESA_SHADER_MESH].nir) {
3207bf215546Sopenharmony_ci      /* ac_nir_lower_ngg ignores driver locations for mesh shaders,
3208bf215546Sopenharmony_ci       * but set them to all zero just to be on the safe side.
3209bf215546Sopenharmony_ci       */
3210bf215546Sopenharmony_ci      nir_foreach_shader_out_variable(var, stages[MESA_SHADER_MESH].nir) {
3211bf215546Sopenharmony_ci         var->data.driver_location = 0;
3212bf215546Sopenharmony_ci      }
3213bf215546Sopenharmony_ci      return;
3214bf215546Sopenharmony_ci   }
3215bf215546Sopenharmony_ci
3216bf215546Sopenharmony_ci   if (!stages[MESA_SHADER_VERTEX].nir)
3217bf215546Sopenharmony_ci      return;
3218bf215546Sopenharmony_ci
3219bf215546Sopenharmony_ci   bool has_tess = stages[MESA_SHADER_TESS_CTRL].nir;
3220bf215546Sopenharmony_ci   bool has_gs = stages[MESA_SHADER_GEOMETRY].nir;
3221bf215546Sopenharmony_ci
3222bf215546Sopenharmony_ci   /* Merged stage for VS and TES */
3223bf215546Sopenharmony_ci   unsigned vs_info_idx = MESA_SHADER_VERTEX;
3224bf215546Sopenharmony_ci   unsigned tes_info_idx = MESA_SHADER_TESS_EVAL;
3225bf215546Sopenharmony_ci
3226bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX9) {
3227bf215546Sopenharmony_ci      /* These are merged into the next stage */
3228bf215546Sopenharmony_ci      vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY;
3229bf215546Sopenharmony_ci      tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL;
3230bf215546Sopenharmony_ci   }
3231bf215546Sopenharmony_ci
3232bf215546Sopenharmony_ci   nir_foreach_shader_in_variable (var, stages[MESA_SHADER_VERTEX].nir) {
3233bf215546Sopenharmony_ci      var->data.driver_location = var->data.location;
3234bf215546Sopenharmony_ci   }
3235bf215546Sopenharmony_ci
3236bf215546Sopenharmony_ci   if (has_tess) {
3237bf215546Sopenharmony_ci      nir_linked_io_var_info vs2tcs = nir_assign_linked_io_var_locations(
3238bf215546Sopenharmony_ci         stages[MESA_SHADER_VERTEX].nir, stages[MESA_SHADER_TESS_CTRL].nir);
3239bf215546Sopenharmony_ci      nir_linked_io_var_info tcs2tes = nir_assign_linked_io_var_locations(
3240bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_CTRL].nir, stages[MESA_SHADER_TESS_EVAL].nir);
3241bf215546Sopenharmony_ci
3242bf215546Sopenharmony_ci      stages[MESA_SHADER_VERTEX].info.vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
3243bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs = vs2tcs.num_linked_io_vars;
3244bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs = tcs2tes.num_linked_io_vars;
3245bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars;
3246bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_EVAL].info.tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
3247bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_EVAL].info.tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
3248bf215546Sopenharmony_ci
3249bf215546Sopenharmony_ci      /* Copy data to merged stage */
3250bf215546Sopenharmony_ci      stages[vs_info_idx].info.vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
3251bf215546Sopenharmony_ci      stages[tes_info_idx].info.tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
3252bf215546Sopenharmony_ci      stages[tes_info_idx].info.tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
3253bf215546Sopenharmony_ci
3254bf215546Sopenharmony_ci      if (has_gs) {
3255bf215546Sopenharmony_ci         nir_linked_io_var_info tes2gs = nir_assign_linked_io_var_locations(
3256bf215546Sopenharmony_ci            stages[MESA_SHADER_TESS_EVAL].nir, stages[MESA_SHADER_GEOMETRY].nir);
3257bf215546Sopenharmony_ci
3258bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_EVAL].info.tes.num_linked_outputs = tes2gs.num_linked_io_vars;
3259bf215546Sopenharmony_ci         stages[MESA_SHADER_GEOMETRY].info.gs.num_linked_inputs = tes2gs.num_linked_io_vars;
3260bf215546Sopenharmony_ci
3261bf215546Sopenharmony_ci         /* Copy data to merged stage */
3262bf215546Sopenharmony_ci         stages[tes_info_idx].info.tes.num_linked_outputs = tes2gs.num_linked_io_vars;
3263bf215546Sopenharmony_ci      }
3264bf215546Sopenharmony_ci   } else if (has_gs) {
3265bf215546Sopenharmony_ci      nir_linked_io_var_info vs2gs = nir_assign_linked_io_var_locations(
3266bf215546Sopenharmony_ci         stages[MESA_SHADER_VERTEX].nir, stages[MESA_SHADER_GEOMETRY].nir);
3267bf215546Sopenharmony_ci
3268bf215546Sopenharmony_ci      stages[MESA_SHADER_VERTEX].info.vs.num_linked_outputs = vs2gs.num_linked_io_vars;
3269bf215546Sopenharmony_ci      stages[MESA_SHADER_GEOMETRY].info.gs.num_linked_inputs = vs2gs.num_linked_io_vars;
3270bf215546Sopenharmony_ci
3271bf215546Sopenharmony_ci      /* Copy data to merged stage */
3272bf215546Sopenharmony_ci      stages[vs_info_idx].info.vs.num_linked_outputs = vs2gs.num_linked_io_vars;
3273bf215546Sopenharmony_ci   }
3274bf215546Sopenharmony_ci
3275bf215546Sopenharmony_ci   assert(last_vgt_api_stage != MESA_SHADER_NONE);
3276bf215546Sopenharmony_ci   nir_foreach_shader_out_variable(var, stages[last_vgt_api_stage].nir)
3277bf215546Sopenharmony_ci   {
3278bf215546Sopenharmony_ci      var->data.driver_location = var->data.location;
3279bf215546Sopenharmony_ci   }
3280bf215546Sopenharmony_ci}
3281bf215546Sopenharmony_ci
3282bf215546Sopenharmony_cistatic struct radv_pipeline_key
3283bf215546Sopenharmony_ciradv_generate_pipeline_key(const struct radv_pipeline *pipeline, VkPipelineCreateFlags flags)
3284bf215546Sopenharmony_ci{
3285bf215546Sopenharmony_ci   struct radv_device *device = pipeline->device;
3286bf215546Sopenharmony_ci   struct radv_pipeline_key key;
3287bf215546Sopenharmony_ci
3288bf215546Sopenharmony_ci   memset(&key, 0, sizeof(key));
3289bf215546Sopenharmony_ci
3290bf215546Sopenharmony_ci   if (flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
3291bf215546Sopenharmony_ci      key.optimisations_disabled = 1;
3292bf215546Sopenharmony_ci
3293bf215546Sopenharmony_ci   key.disable_aniso_single_level = device->instance->disable_aniso_single_level &&
3294bf215546Sopenharmony_ci                                    device->physical_device->rad_info.gfx_level < GFX8;
3295bf215546Sopenharmony_ci
3296bf215546Sopenharmony_ci   key.image_2d_view_of_3d = device->image_2d_view_of_3d &&
3297bf215546Sopenharmony_ci                             device->physical_device->rad_info.gfx_level == GFX9;
3298bf215546Sopenharmony_ci
3299bf215546Sopenharmony_ci   return key;
3300bf215546Sopenharmony_ci}
3301bf215546Sopenharmony_ci
3302bf215546Sopenharmony_cistatic struct radv_pipeline_key
3303bf215546Sopenharmony_ciradv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipeline,
3304bf215546Sopenharmony_ci                                    const VkGraphicsPipelineCreateInfo *pCreateInfo,
3305bf215546Sopenharmony_ci                                    const struct radv_graphics_pipeline_info *info,
3306bf215546Sopenharmony_ci                                    const struct radv_blend_state *blend)
3307bf215546Sopenharmony_ci{
3308bf215546Sopenharmony_ci   struct radv_device *device = pipeline->base.device;
3309bf215546Sopenharmony_ci   struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base, pCreateInfo->flags);
3310bf215546Sopenharmony_ci
3311bf215546Sopenharmony_ci   key.has_multiview_view_index = !!info->ri.view_mask;
3312bf215546Sopenharmony_ci
3313bf215546Sopenharmony_ci   if (pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT) {
3314bf215546Sopenharmony_ci      key.vs.dynamic_input_state = true;
3315bf215546Sopenharmony_ci   }
3316bf215546Sopenharmony_ci
3317bf215546Sopenharmony_ci   /* Vertex input state */
3318bf215546Sopenharmony_ci   key.vs.instance_rate_inputs = info->vi.instance_rate_inputs;
3319bf215546Sopenharmony_ci   key.vs.vertex_post_shuffle = info->vi.vertex_post_shuffle;
3320bf215546Sopenharmony_ci
3321bf215546Sopenharmony_ci   for (uint32_t i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
3322bf215546Sopenharmony_ci      key.vs.instance_rate_divisors[i] = info->vi.instance_rate_divisors[i];
3323bf215546Sopenharmony_ci      key.vs.vertex_attribute_formats[i] = info->vi.vertex_attribute_formats[i];
3324bf215546Sopenharmony_ci      key.vs.vertex_attribute_bindings[i] = info->vi.vertex_attribute_bindings[i];
3325bf215546Sopenharmony_ci      key.vs.vertex_attribute_offsets[i] = info->vi.vertex_attribute_offsets[i];
3326bf215546Sopenharmony_ci      key.vs.vertex_attribute_strides[i] = info->vi.vertex_attribute_strides[i];
3327bf215546Sopenharmony_ci      key.vs.vertex_alpha_adjust[i] = info->vi.vertex_alpha_adjust[i];
3328bf215546Sopenharmony_ci   }
3329bf215546Sopenharmony_ci
3330bf215546Sopenharmony_ci   for (uint32_t i = 0; i < MAX_VBS; i++) {
3331bf215546Sopenharmony_ci      key.vs.vertex_binding_align[i] = info->vi.vertex_binding_align[i];
3332bf215546Sopenharmony_ci   }
3333bf215546Sopenharmony_ci
3334bf215546Sopenharmony_ci   key.tcs.tess_input_vertices = info->ts.patch_control_points;
3335bf215546Sopenharmony_ci
3336bf215546Sopenharmony_ci   if (info->ms.raster_samples > 1) {
3337bf215546Sopenharmony_ci      uint32_t ps_iter_samples = radv_pipeline_get_ps_iter_samples(info);
3338bf215546Sopenharmony_ci      key.ps.num_samples = info->ms.raster_samples;
3339bf215546Sopenharmony_ci      key.ps.log2_ps_iter_samples = util_logbase2(ps_iter_samples);
3340bf215546Sopenharmony_ci   }
3341bf215546Sopenharmony_ci
3342bf215546Sopenharmony_ci   key.ps.col_format = blend->spi_shader_col_format;
3343bf215546Sopenharmony_ci   key.ps.cb_target_mask = blend->cb_target_mask;
3344bf215546Sopenharmony_ci   key.ps.mrt0_is_dual_src = blend->mrt0_is_dual_src;
3345bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level < GFX8) {
3346bf215546Sopenharmony_ci      key.ps.is_int8 = blend->col_format_is_int8;
3347bf215546Sopenharmony_ci      key.ps.is_int10 = blend->col_format_is_int10;
3348bf215546Sopenharmony_ci   }
3349bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level >= GFX11) {
3350bf215546Sopenharmony_ci      key.ps.alpha_to_coverage_via_mrtz = info->ms.alpha_to_coverage_enable;
3351bf215546Sopenharmony_ci   }
3352bf215546Sopenharmony_ci
3353bf215546Sopenharmony_ci   key.vs.topology = info->ia.primitive_topology;
3354bf215546Sopenharmony_ci
3355bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level >= GFX10) {
3356bf215546Sopenharmony_ci      key.vs.provoking_vtx_last = info->rs.provoking_vtx_last;
3357bf215546Sopenharmony_ci   }
3358bf215546Sopenharmony_ci
3359bf215546Sopenharmony_ci   if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
3360bf215546Sopenharmony_ci      key.ps.lower_discard_to_demote = true;
3361bf215546Sopenharmony_ci
3362bf215546Sopenharmony_ci   if (device->instance->enable_mrt_output_nan_fixup)
3363bf215546Sopenharmony_ci      key.ps.enable_mrt_output_nan_fixup = blend->col_format_is_float32;
3364bf215546Sopenharmony_ci
3365bf215546Sopenharmony_ci
3366bf215546Sopenharmony_ci   key.ps.force_vrs_enabled = device->force_vrs_enabled;
3367bf215546Sopenharmony_ci
3368bf215546Sopenharmony_ci   if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM)
3369bf215546Sopenharmony_ci      key.invariant_geom = true;
3370bf215546Sopenharmony_ci
3371bf215546Sopenharmony_ci   key.use_ngg = device->physical_device->use_ngg;
3372bf215546Sopenharmony_ci
3373bf215546Sopenharmony_ci   if ((radv_is_vrs_enabled(pipeline, info) || device->force_vrs_enabled) &&
3374bf215546Sopenharmony_ci       (device->physical_device->rad_info.family == CHIP_NAVI21 ||
3375bf215546Sopenharmony_ci        device->physical_device->rad_info.family == CHIP_NAVI22 ||
3376bf215546Sopenharmony_ci        device->physical_device->rad_info.family == CHIP_VANGOGH))
3377bf215546Sopenharmony_ci      key.adjust_frag_coord_z = true;
3378bf215546Sopenharmony_ci
3379bf215546Sopenharmony_ci   if (device->instance->disable_sinking_load_input_fs)
3380bf215546Sopenharmony_ci      key.disable_sinking_load_input_fs = true;
3381bf215546Sopenharmony_ci
3382bf215546Sopenharmony_ci   if (device->primitives_generated_query)
3383bf215546Sopenharmony_ci      key.primitives_generated_query = true;
3384bf215546Sopenharmony_ci
3385bf215546Sopenharmony_ci   key.ps.has_epilog = false; /* TODO: hook up PS epilogs */
3386bf215546Sopenharmony_ci
3387bf215546Sopenharmony_ci   return key;
3388bf215546Sopenharmony_ci}
3389bf215546Sopenharmony_ci
3390bf215546Sopenharmony_cistatic uint8_t
3391bf215546Sopenharmony_ciradv_get_wave_size(struct radv_device *device,  gl_shader_stage stage,
3392bf215546Sopenharmony_ci                   const struct radv_shader_info *info)
3393bf215546Sopenharmony_ci{
3394bf215546Sopenharmony_ci   if (stage == MESA_SHADER_GEOMETRY && !info->is_ngg)
3395bf215546Sopenharmony_ci      return 64;
3396bf215546Sopenharmony_ci   else if (stage == MESA_SHADER_COMPUTE) {
3397bf215546Sopenharmony_ci      return info->cs.subgroup_size;
3398bf215546Sopenharmony_ci   } else if (stage == MESA_SHADER_FRAGMENT)
3399bf215546Sopenharmony_ci      return device->physical_device->ps_wave_size;
3400bf215546Sopenharmony_ci   else if (stage == MESA_SHADER_TASK)
3401bf215546Sopenharmony_ci      return device->physical_device->cs_wave_size;
3402bf215546Sopenharmony_ci   else
3403bf215546Sopenharmony_ci      return device->physical_device->ge_wave_size;
3404bf215546Sopenharmony_ci}
3405bf215546Sopenharmony_ci
3406bf215546Sopenharmony_cistatic uint8_t
3407bf215546Sopenharmony_ciradv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage,
3408bf215546Sopenharmony_ci                         const struct radv_shader_info *info)
3409bf215546Sopenharmony_ci{
3410bf215546Sopenharmony_ci   if (stage == MESA_SHADER_COMPUTE && info->cs.subgroup_size)
3411bf215546Sopenharmony_ci      return info->cs.subgroup_size;
3412bf215546Sopenharmony_ci   return 64;
3413bf215546Sopenharmony_ci}
3414bf215546Sopenharmony_ci
3415bf215546Sopenharmony_cistatic void
3416bf215546Sopenharmony_ciradv_determine_ngg_settings(struct radv_pipeline *pipeline,
3417bf215546Sopenharmony_ci                            const struct radv_pipeline_key *pipeline_key,
3418bf215546Sopenharmony_ci                            struct radv_pipeline_stage *stages,
3419bf215546Sopenharmony_ci                            gl_shader_stage last_vgt_api_stage)
3420bf215546Sopenharmony_ci{
3421bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->device->physical_device;
3422bf215546Sopenharmony_ci
3423bf215546Sopenharmony_ci   /* Shader settings for VS or TES without GS. */
3424bf215546Sopenharmony_ci   if (last_vgt_api_stage == MESA_SHADER_VERTEX ||
3425bf215546Sopenharmony_ci       last_vgt_api_stage == MESA_SHADER_TESS_EVAL) {
3426bf215546Sopenharmony_ci      uint64_t ps_inputs_read =
3427bf215546Sopenharmony_ci         stages[MESA_SHADER_FRAGMENT].nir ? stages[MESA_SHADER_FRAGMENT].nir->info.inputs_read : 0;
3428bf215546Sopenharmony_ci      gl_shader_stage es_stage = last_vgt_api_stage;
3429bf215546Sopenharmony_ci
3430bf215546Sopenharmony_ci      unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(pipeline_key->vs.topology) + 1;
3431bf215546Sopenharmony_ci      if (es_stage == MESA_SHADER_TESS_EVAL)
3432bf215546Sopenharmony_ci         num_vertices_per_prim = stages[es_stage].nir->info.tess.point_mode                      ? 1
3433bf215546Sopenharmony_ci                                 : stages[es_stage].nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES ? 2
3434bf215546Sopenharmony_ci                                                                                          : 3;
3435bf215546Sopenharmony_ci      /* TODO: Enable culling for LLVM. */
3436bf215546Sopenharmony_ci      stages[es_stage].info.has_ngg_culling = radv_consider_culling(
3437bf215546Sopenharmony_ci         pdevice, stages[es_stage].nir, ps_inputs_read, num_vertices_per_prim, &stages[es_stage].info) &&
3438bf215546Sopenharmony_ci         !radv_use_llvm_for_stage(pipeline->device, es_stage);
3439bf215546Sopenharmony_ci
3440bf215546Sopenharmony_ci      nir_function_impl *impl = nir_shader_get_entrypoint(stages[es_stage].nir);
3441bf215546Sopenharmony_ci      stages[es_stage].info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body);
3442bf215546Sopenharmony_ci
3443bf215546Sopenharmony_ci      /* Invocations that process an input vertex */
3444bf215546Sopenharmony_ci      const struct gfx10_ngg_info *ngg_info = &stages[es_stage].info.ngg_info;
3445bf215546Sopenharmony_ci      unsigned max_vtx_in = MIN2(256, ngg_info->enable_vertex_grouping ? ngg_info->hw_max_esverts : num_vertices_per_prim * ngg_info->max_gsprims);
3446bf215546Sopenharmony_ci
3447bf215546Sopenharmony_ci      unsigned lds_bytes_if_culling_off = 0;
3448bf215546Sopenharmony_ci      /* We need LDS space when VS needs to export the primitive ID. */
3449bf215546Sopenharmony_ci      if (es_stage == MESA_SHADER_VERTEX && stages[es_stage].info.vs.outinfo.export_prim_id)
3450bf215546Sopenharmony_ci         lds_bytes_if_culling_off = max_vtx_in * 4u;
3451bf215546Sopenharmony_ci      stages[es_stage].info.num_lds_blocks_when_not_culling =
3452bf215546Sopenharmony_ci         DIV_ROUND_UP(lds_bytes_if_culling_off, pdevice->rad_info.lds_encode_granularity);
3453bf215546Sopenharmony_ci
3454bf215546Sopenharmony_ci      /* NGG passthrough mode should be disabled when culling and when the vertex shader exports the
3455bf215546Sopenharmony_ci       * primitive ID.
3456bf215546Sopenharmony_ci       */
3457bf215546Sopenharmony_ci      stages[es_stage].info.is_ngg_passthrough = stages[es_stage].info.is_ngg_passthrough &&
3458bf215546Sopenharmony_ci                                                !stages[es_stage].info.has_ngg_culling &&
3459bf215546Sopenharmony_ci                                                 !(es_stage == MESA_SHADER_VERTEX &&
3460bf215546Sopenharmony_ci                                                   stages[es_stage].info.vs.outinfo.export_prim_id);
3461bf215546Sopenharmony_ci   }
3462bf215546Sopenharmony_ci}
3463bf215546Sopenharmony_ci
3464bf215546Sopenharmony_cistatic void
3465bf215546Sopenharmony_ciradv_fill_shader_info_ngg(struct radv_pipeline *pipeline,
3466bf215546Sopenharmony_ci                          const struct radv_pipeline_key *pipeline_key,
3467bf215546Sopenharmony_ci                          struct radv_pipeline_stage *stages)
3468bf215546Sopenharmony_ci{
3469bf215546Sopenharmony_ci   struct radv_device *device = pipeline->device;
3470bf215546Sopenharmony_ci
3471bf215546Sopenharmony_ci   if (pipeline_key->use_ngg) {
3472bf215546Sopenharmony_ci      if (stages[MESA_SHADER_TESS_CTRL].nir) {
3473bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_EVAL].info.is_ngg = true;
3474bf215546Sopenharmony_ci      } else if (stages[MESA_SHADER_VERTEX].nir) {
3475bf215546Sopenharmony_ci         stages[MESA_SHADER_VERTEX].info.is_ngg = true;
3476bf215546Sopenharmony_ci      } else if (stages[MESA_SHADER_MESH].nir) {
3477bf215546Sopenharmony_ci         stages[MESA_SHADER_MESH].info.is_ngg = true;
3478bf215546Sopenharmony_ci      }
3479bf215546Sopenharmony_ci
3480bf215546Sopenharmony_ci      if (stages[MESA_SHADER_TESS_CTRL].nir && stages[MESA_SHADER_GEOMETRY].nir &&
3481bf215546Sopenharmony_ci          stages[MESA_SHADER_GEOMETRY].nir->info.gs.invocations *
3482bf215546Sopenharmony_ci                stages[MESA_SHADER_GEOMETRY].nir->info.gs.vertices_out >
3483bf215546Sopenharmony_ci             256) {
3484bf215546Sopenharmony_ci         /* Fallback to the legacy path if tessellation is
3485bf215546Sopenharmony_ci          * enabled with extreme geometry because
3486bf215546Sopenharmony_ci          * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it
3487bf215546Sopenharmony_ci          * might hang.
3488bf215546Sopenharmony_ci          */
3489bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
3490bf215546Sopenharmony_ci
3491bf215546Sopenharmony_ci         /* GFX11+ requires NGG. */
3492bf215546Sopenharmony_ci         assert(device->physical_device->rad_info.gfx_level < GFX11);
3493bf215546Sopenharmony_ci      }
3494bf215546Sopenharmony_ci
3495bf215546Sopenharmony_ci      gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
3496bf215546Sopenharmony_ci
3497bf215546Sopenharmony_ci      for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
3498bf215546Sopenharmony_ci         if (stages[i].nir)
3499bf215546Sopenharmony_ci            last_xfb_stage = i;
3500bf215546Sopenharmony_ci      }
3501bf215546Sopenharmony_ci
3502bf215546Sopenharmony_ci      bool uses_xfb = stages[last_xfb_stage].nir &&
3503bf215546Sopenharmony_ci                      stages[last_xfb_stage].nir->xfb_info;
3504bf215546Sopenharmony_ci
3505bf215546Sopenharmony_ci      if (!device->physical_device->use_ngg_streamout && uses_xfb) {
3506bf215546Sopenharmony_ci         /* GFX11+ requires NGG. */
3507bf215546Sopenharmony_ci         assert(device->physical_device->rad_info.gfx_level < GFX11);
3508bf215546Sopenharmony_ci
3509bf215546Sopenharmony_ci         if (stages[MESA_SHADER_TESS_CTRL].nir)
3510bf215546Sopenharmony_ci           stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
3511bf215546Sopenharmony_ci         else
3512bf215546Sopenharmony_ci           stages[MESA_SHADER_VERTEX].info.is_ngg = false;
3513bf215546Sopenharmony_ci      }
3514bf215546Sopenharmony_ci
3515bf215546Sopenharmony_ci      /* Determine if the pipeline is eligible for the NGG passthrough
3516bf215546Sopenharmony_ci       * mode. It can't be enabled for geometry shaders, for NGG
3517bf215546Sopenharmony_ci       * streamout or for vertex shaders that export the primitive ID
3518bf215546Sopenharmony_ci       * (this is checked later because we don't have the info here.)
3519bf215546Sopenharmony_ci       */
3520bf215546Sopenharmony_ci      if (!stages[MESA_SHADER_GEOMETRY].nir && !uses_xfb) {
3521bf215546Sopenharmony_ci         if (stages[MESA_SHADER_TESS_CTRL].nir && stages[MESA_SHADER_TESS_EVAL].info.is_ngg) {
3522bf215546Sopenharmony_ci            stages[MESA_SHADER_TESS_EVAL].info.is_ngg_passthrough = true;
3523bf215546Sopenharmony_ci         } else if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.is_ngg) {
3524bf215546Sopenharmony_ci            stages[MESA_SHADER_VERTEX].info.is_ngg_passthrough = true;
3525bf215546Sopenharmony_ci         }
3526bf215546Sopenharmony_ci      }
3527bf215546Sopenharmony_ci   }
3528bf215546Sopenharmony_ci}
3529bf215546Sopenharmony_ci
3530bf215546Sopenharmony_cistatic void
3531bf215546Sopenharmony_ciradv_fill_shader_info(struct radv_pipeline *pipeline,
3532bf215546Sopenharmony_ci                      struct radv_pipeline_layout *pipeline_layout,
3533bf215546Sopenharmony_ci                      const struct radv_pipeline_key *pipeline_key,
3534bf215546Sopenharmony_ci                      struct radv_pipeline_stage *stages,
3535bf215546Sopenharmony_ci                      gl_shader_stage last_vgt_api_stage)
3536bf215546Sopenharmony_ci{
3537bf215546Sopenharmony_ci   struct radv_device *device = pipeline->device;
3538bf215546Sopenharmony_ci   unsigned active_stages = 0;
3539bf215546Sopenharmony_ci   unsigned filled_stages = 0;
3540bf215546Sopenharmony_ci
3541bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
3542bf215546Sopenharmony_ci      if (stages[i].nir)
3543bf215546Sopenharmony_ci         active_stages |= (1 << i);
3544bf215546Sopenharmony_ci   }
3545bf215546Sopenharmony_ci
3546bf215546Sopenharmony_ci   if (stages[MESA_SHADER_TESS_CTRL].nir) {
3547bf215546Sopenharmony_ci      stages[MESA_SHADER_VERTEX].info.vs.as_ls = true;
3548bf215546Sopenharmony_ci   }
3549bf215546Sopenharmony_ci
3550bf215546Sopenharmony_ci   if (stages[MESA_SHADER_GEOMETRY].nir) {
3551bf215546Sopenharmony_ci      if (stages[MESA_SHADER_TESS_CTRL].nir)
3552bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_EVAL].info.tes.as_es = true;
3553bf215546Sopenharmony_ci      else
3554bf215546Sopenharmony_ci         stages[MESA_SHADER_VERTEX].info.vs.as_es = true;
3555bf215546Sopenharmony_ci   }
3556bf215546Sopenharmony_ci
3557bf215546Sopenharmony_ci   if (stages[MESA_SHADER_FRAGMENT].nir) {
3558bf215546Sopenharmony_ci      radv_nir_shader_info_init(&stages[MESA_SHADER_FRAGMENT].info);
3559bf215546Sopenharmony_ci      radv_nir_shader_info_pass(device, stages[MESA_SHADER_FRAGMENT].nir, pipeline_layout,
3560bf215546Sopenharmony_ci                                pipeline_key, &stages[MESA_SHADER_FRAGMENT].info);
3561bf215546Sopenharmony_ci
3562bf215546Sopenharmony_ci      assert(last_vgt_api_stage != MESA_SHADER_NONE);
3563bf215546Sopenharmony_ci      struct radv_shader_info *pre_ps_info = &stages[last_vgt_api_stage].info;
3564bf215546Sopenharmony_ci      struct radv_vs_output_info *outinfo = NULL;
3565bf215546Sopenharmony_ci      if (last_vgt_api_stage == MESA_SHADER_VERTEX ||
3566bf215546Sopenharmony_ci          last_vgt_api_stage == MESA_SHADER_GEOMETRY) {
3567bf215546Sopenharmony_ci         outinfo = &pre_ps_info->vs.outinfo;
3568bf215546Sopenharmony_ci      } else if (last_vgt_api_stage == MESA_SHADER_TESS_EVAL) {
3569bf215546Sopenharmony_ci         outinfo = &pre_ps_info->tes.outinfo;
3570bf215546Sopenharmony_ci      } else if (last_vgt_api_stage == MESA_SHADER_MESH) {
3571bf215546Sopenharmony_ci         outinfo = &pre_ps_info->ms.outinfo;
3572bf215546Sopenharmony_ci      }
3573bf215546Sopenharmony_ci
3574bf215546Sopenharmony_ci      /* Add PS input requirements to the output of the pre-PS stage. */
3575bf215546Sopenharmony_ci      bool ps_prim_id_in = stages[MESA_SHADER_FRAGMENT].info.ps.prim_id_input;
3576bf215546Sopenharmony_ci      bool ps_clip_dists_in = !!stages[MESA_SHADER_FRAGMENT].info.ps.num_input_clips_culls;
3577bf215546Sopenharmony_ci
3578bf215546Sopenharmony_ci      assert(outinfo);
3579bf215546Sopenharmony_ci      outinfo->export_clip_dists |= ps_clip_dists_in;
3580bf215546Sopenharmony_ci      if (last_vgt_api_stage == MESA_SHADER_VERTEX ||
3581bf215546Sopenharmony_ci          last_vgt_api_stage == MESA_SHADER_TESS_EVAL) {
3582bf215546Sopenharmony_ci         outinfo->export_prim_id |= ps_prim_id_in;
3583bf215546Sopenharmony_ci      }
3584bf215546Sopenharmony_ci
3585bf215546Sopenharmony_ci      filled_stages |= (1 << MESA_SHADER_FRAGMENT);
3586bf215546Sopenharmony_ci   }
3587bf215546Sopenharmony_ci
3588bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level >= GFX9 &&
3589bf215546Sopenharmony_ci       stages[MESA_SHADER_TESS_CTRL].nir) {
3590bf215546Sopenharmony_ci      struct nir_shader *combined_nir[] = {stages[MESA_SHADER_VERTEX].nir, stages[MESA_SHADER_TESS_CTRL].nir};
3591bf215546Sopenharmony_ci
3592bf215546Sopenharmony_ci      radv_nir_shader_info_init(&stages[MESA_SHADER_TESS_CTRL].info);
3593bf215546Sopenharmony_ci
3594bf215546Sopenharmony_ci      /* Copy data to merged stage. */
3595bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.vs.as_ls = true;
3596bf215546Sopenharmony_ci
3597bf215546Sopenharmony_ci      for (int i = 0; i < 2; i++) {
3598bf215546Sopenharmony_ci         radv_nir_shader_info_pass(device, combined_nir[i], pipeline_layout, pipeline_key,
3599bf215546Sopenharmony_ci                                   &stages[MESA_SHADER_TESS_CTRL].info);
3600bf215546Sopenharmony_ci      }
3601bf215546Sopenharmony_ci
3602bf215546Sopenharmony_ci      filled_stages |= (1 << MESA_SHADER_VERTEX);
3603bf215546Sopenharmony_ci      filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
3604bf215546Sopenharmony_ci   }
3605bf215546Sopenharmony_ci
3606bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level >= GFX9 &&
3607bf215546Sopenharmony_ci       stages[MESA_SHADER_GEOMETRY].nir) {
3608bf215546Sopenharmony_ci      gl_shader_stage pre_stage =
3609bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
3610bf215546Sopenharmony_ci      struct nir_shader *combined_nir[] = {stages[pre_stage].nir, stages[MESA_SHADER_GEOMETRY].nir};
3611bf215546Sopenharmony_ci
3612bf215546Sopenharmony_ci      radv_nir_shader_info_init(&stages[MESA_SHADER_GEOMETRY].info);
3613bf215546Sopenharmony_ci
3614bf215546Sopenharmony_ci      /* Copy data to merged stage. */
3615bf215546Sopenharmony_ci      if (pre_stage == MESA_SHADER_VERTEX) {
3616bf215546Sopenharmony_ci         stages[MESA_SHADER_GEOMETRY].info.vs.as_es = stages[MESA_SHADER_VERTEX].info.vs.as_es;
3617bf215546Sopenharmony_ci      } else {
3618bf215546Sopenharmony_ci         stages[MESA_SHADER_GEOMETRY].info.tes.as_es = stages[MESA_SHADER_TESS_EVAL].info.tes.as_es;
3619bf215546Sopenharmony_ci      }
3620bf215546Sopenharmony_ci      stages[MESA_SHADER_GEOMETRY].info.is_ngg = stages[pre_stage].info.is_ngg;
3621bf215546Sopenharmony_ci      stages[MESA_SHADER_GEOMETRY].info.gs.es_type = pre_stage;
3622bf215546Sopenharmony_ci
3623bf215546Sopenharmony_ci      for (int i = 0; i < 2; i++) {
3624bf215546Sopenharmony_ci         radv_nir_shader_info_pass(device, combined_nir[i], pipeline_layout, pipeline_key,
3625bf215546Sopenharmony_ci                                   &stages[MESA_SHADER_GEOMETRY].info);
3626bf215546Sopenharmony_ci      }
3627bf215546Sopenharmony_ci
3628bf215546Sopenharmony_ci      filled_stages |= (1 << pre_stage);
3629bf215546Sopenharmony_ci      filled_stages |= (1 << MESA_SHADER_GEOMETRY);
3630bf215546Sopenharmony_ci   }
3631bf215546Sopenharmony_ci
3632bf215546Sopenharmony_ci   active_stages ^= filled_stages;
3633bf215546Sopenharmony_ci   while (active_stages) {
3634bf215546Sopenharmony_ci      int i = u_bit_scan(&active_stages);
3635bf215546Sopenharmony_ci      radv_nir_shader_info_init(&stages[i].info);
3636bf215546Sopenharmony_ci      radv_nir_shader_info_pass(device, stages[i].nir, pipeline_layout, pipeline_key,
3637bf215546Sopenharmony_ci                                &stages[i].info);
3638bf215546Sopenharmony_ci   }
3639bf215546Sopenharmony_ci
3640bf215546Sopenharmony_ci   if (stages[MESA_SHADER_COMPUTE].nir) {
3641bf215546Sopenharmony_ci      unsigned subgroup_size = pipeline_key->cs.compute_subgroup_size;
3642bf215546Sopenharmony_ci      unsigned req_subgroup_size = subgroup_size;
3643bf215546Sopenharmony_ci      bool require_full_subgroups = pipeline_key->cs.require_full_subgroups;
3644bf215546Sopenharmony_ci
3645bf215546Sopenharmony_ci      if (!subgroup_size)
3646bf215546Sopenharmony_ci         subgroup_size = device->physical_device->cs_wave_size;
3647bf215546Sopenharmony_ci
3648bf215546Sopenharmony_ci      unsigned local_size = stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size[0] *
3649bf215546Sopenharmony_ci                            stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size[1] *
3650bf215546Sopenharmony_ci                            stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size[2];
3651bf215546Sopenharmony_ci
3652bf215546Sopenharmony_ci      /* Games don't always request full subgroups when they should,
3653bf215546Sopenharmony_ci       * which can cause bugs if cswave32 is enabled.
3654bf215546Sopenharmony_ci       */
3655bf215546Sopenharmony_ci      if (device->physical_device->cs_wave_size == 32 &&
3656bf215546Sopenharmony_ci          stages[MESA_SHADER_COMPUTE].nir->info.cs.uses_wide_subgroup_intrinsics && !req_subgroup_size &&
3657bf215546Sopenharmony_ci          local_size % RADV_SUBGROUP_SIZE == 0)
3658bf215546Sopenharmony_ci         require_full_subgroups = true;
3659bf215546Sopenharmony_ci
3660bf215546Sopenharmony_ci      if (require_full_subgroups && !req_subgroup_size) {
3661bf215546Sopenharmony_ci         /* don't use wave32 pretending to be wave64 */
3662bf215546Sopenharmony_ci         subgroup_size = RADV_SUBGROUP_SIZE;
3663bf215546Sopenharmony_ci      }
3664bf215546Sopenharmony_ci
3665bf215546Sopenharmony_ci      stages[MESA_SHADER_COMPUTE].info.cs.subgroup_size = subgroup_size;
3666bf215546Sopenharmony_ci   }
3667bf215546Sopenharmony_ci
3668bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
3669bf215546Sopenharmony_ci      if (stages[i].nir) {
3670bf215546Sopenharmony_ci         stages[i].info.wave_size = radv_get_wave_size(device, i, &stages[i].info);
3671bf215546Sopenharmony_ci         stages[i].info.ballot_bit_size = radv_get_ballot_bit_size(device, i, &stages[i].info);
3672bf215546Sopenharmony_ci      }
3673bf215546Sopenharmony_ci   }
3674bf215546Sopenharmony_ci
3675bf215546Sopenharmony_ci   /* PS always operates without workgroups. */
3676bf215546Sopenharmony_ci   if (stages[MESA_SHADER_FRAGMENT].nir)
3677bf215546Sopenharmony_ci      stages[MESA_SHADER_FRAGMENT].info.workgroup_size = stages[MESA_SHADER_FRAGMENT].info.wave_size;
3678bf215546Sopenharmony_ci
3679bf215546Sopenharmony_ci   if (stages[MESA_SHADER_COMPUTE].nir) {
3680bf215546Sopenharmony_ci      /* Variable workgroup size is not supported by Vulkan. */
3681bf215546Sopenharmony_ci      assert(!stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size_variable);
3682bf215546Sopenharmony_ci
3683bf215546Sopenharmony_ci      stages[MESA_SHADER_COMPUTE].info.workgroup_size =
3684bf215546Sopenharmony_ci         ac_compute_cs_workgroup_size(
3685bf215546Sopenharmony_ci            stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size, false, UINT32_MAX);
3686bf215546Sopenharmony_ci   }
3687bf215546Sopenharmony_ci
3688bf215546Sopenharmony_ci   if (stages[MESA_SHADER_TASK].nir) {
3689bf215546Sopenharmony_ci      /* Task/mesh I/O uses the task ring buffers. */
3690bf215546Sopenharmony_ci      stages[MESA_SHADER_TASK].info.cs.uses_task_rings = true;
3691bf215546Sopenharmony_ci      stages[MESA_SHADER_MESH].info.cs.uses_task_rings = true;
3692bf215546Sopenharmony_ci
3693bf215546Sopenharmony_ci      stages[MESA_SHADER_TASK].info.workgroup_size =
3694bf215546Sopenharmony_ci         ac_compute_cs_workgroup_size(
3695bf215546Sopenharmony_ci            stages[MESA_SHADER_TASK].nir->info.workgroup_size, false, UINT32_MAX);
3696bf215546Sopenharmony_ci   }
3697bf215546Sopenharmony_ci}
3698bf215546Sopenharmony_ci
3699bf215546Sopenharmony_cistatic void
3700bf215546Sopenharmony_ciradv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stage *stages,
3701bf215546Sopenharmony_ci                           const struct radv_pipeline_key *pipeline_key)
3702bf215546Sopenharmony_ci{
3703bf215546Sopenharmony_ci   enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
3704bf215546Sopenharmony_ci   unsigned active_stages = 0;
3705bf215546Sopenharmony_ci
3706bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
3707bf215546Sopenharmony_ci      if (stages[i].nir)
3708bf215546Sopenharmony_ci         active_stages |= (1 << i);
3709bf215546Sopenharmony_ci   }
3710bf215546Sopenharmony_ci
3711bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
3712bf215546Sopenharmony_ci      stages[i].args.is_gs_copy_shader = false;
3713bf215546Sopenharmony_ci      stages[i].args.explicit_scratch_args = !radv_use_llvm_for_stage(device, i);
3714bf215546Sopenharmony_ci      stages[i].args.remap_spi_ps_input = !radv_use_llvm_for_stage(device, i);
3715bf215546Sopenharmony_ci      stages[i].args.load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr;
3716bf215546Sopenharmony_ci   }
3717bf215546Sopenharmony_ci
3718bf215546Sopenharmony_ci   if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
3719bf215546Sopenharmony_ci      radv_declare_shader_args(gfx_level, pipeline_key, &stages[MESA_SHADER_TESS_CTRL].info,
3720bf215546Sopenharmony_ci                               MESA_SHADER_TESS_CTRL, true, MESA_SHADER_VERTEX,
3721bf215546Sopenharmony_ci                               &stages[MESA_SHADER_TESS_CTRL].args);
3722bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
3723bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask =
3724bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_CTRL].args.ac.inline_push_const_mask;
3725bf215546Sopenharmony_ci
3726bf215546Sopenharmony_ci      stages[MESA_SHADER_VERTEX].args = stages[MESA_SHADER_TESS_CTRL].args;
3727bf215546Sopenharmony_ci      active_stages &= ~(1 << MESA_SHADER_VERTEX);
3728bf215546Sopenharmony_ci      active_stages &= ~(1 << MESA_SHADER_TESS_CTRL);
3729bf215546Sopenharmony_ci   }
3730bf215546Sopenharmony_ci
3731bf215546Sopenharmony_ci   if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
3732bf215546Sopenharmony_ci      gl_shader_stage pre_stage =
3733bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
3734bf215546Sopenharmony_ci      radv_declare_shader_args(gfx_level, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info,
3735bf215546Sopenharmony_ci                               MESA_SHADER_GEOMETRY, true, pre_stage,
3736bf215546Sopenharmony_ci                               &stages[MESA_SHADER_GEOMETRY].args);
3737bf215546Sopenharmony_ci      stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
3738bf215546Sopenharmony_ci      stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
3739bf215546Sopenharmony_ci         stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
3740bf215546Sopenharmony_ci
3741bf215546Sopenharmony_ci      stages[pre_stage].args = stages[MESA_SHADER_GEOMETRY].args;
3742bf215546Sopenharmony_ci      active_stages &= ~(1 << pre_stage);
3743bf215546Sopenharmony_ci      active_stages &= ~(1 << MESA_SHADER_GEOMETRY);
3744bf215546Sopenharmony_ci   }
3745bf215546Sopenharmony_ci
3746bf215546Sopenharmony_ci   u_foreach_bit(i, active_stages) {
3747bf215546Sopenharmony_ci      radv_declare_shader_args(gfx_level, pipeline_key, &stages[i].info, i, false,
3748bf215546Sopenharmony_ci                               MESA_SHADER_VERTEX, &stages[i].args);
3749bf215546Sopenharmony_ci      stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
3750bf215546Sopenharmony_ci      stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
3751bf215546Sopenharmony_ci   }
3752bf215546Sopenharmony_ci}
3753bf215546Sopenharmony_ci
3754bf215546Sopenharmony_cistatic void
3755bf215546Sopenharmony_cimerge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
3756bf215546Sopenharmony_ci{
3757bf215546Sopenharmony_ci   /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
3758bf215546Sopenharmony_ci    *
3759bf215546Sopenharmony_ci    *    "PointMode. Controls generation of points rather than triangles
3760bf215546Sopenharmony_ci    *     or lines. This functionality defaults to disabled, and is
3761bf215546Sopenharmony_ci    *     enabled if either shader stage includes the execution mode.
3762bf215546Sopenharmony_ci    *
3763bf215546Sopenharmony_ci    * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
3764bf215546Sopenharmony_ci    * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
3765bf215546Sopenharmony_ci    * and OutputVertices, it says:
3766bf215546Sopenharmony_ci    *
3767bf215546Sopenharmony_ci    *    "One mode must be set in at least one of the tessellation
3768bf215546Sopenharmony_ci    *     shader stages."
3769bf215546Sopenharmony_ci    *
3770bf215546Sopenharmony_ci    * So, the fields can be set in either the TCS or TES, but they must
3771bf215546Sopenharmony_ci    * agree if set in both.  Our backend looks at TES, so bitwise-or in
3772bf215546Sopenharmony_ci    * the values from the TCS.
3773bf215546Sopenharmony_ci    */
3774bf215546Sopenharmony_ci   assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 ||
3775bf215546Sopenharmony_ci          tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
3776bf215546Sopenharmony_ci   tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
3777bf215546Sopenharmony_ci
3778bf215546Sopenharmony_ci   assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
3779bf215546Sopenharmony_ci          tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
3780bf215546Sopenharmony_ci          tcs_info->tess.spacing == tes_info->tess.spacing);
3781bf215546Sopenharmony_ci   tes_info->tess.spacing |= tcs_info->tess.spacing;
3782bf215546Sopenharmony_ci
3783bf215546Sopenharmony_ci   assert(tcs_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
3784bf215546Sopenharmony_ci          tes_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
3785bf215546Sopenharmony_ci          tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
3786bf215546Sopenharmony_ci   tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
3787bf215546Sopenharmony_ci   tes_info->tess.ccw |= tcs_info->tess.ccw;
3788bf215546Sopenharmony_ci   tes_info->tess.point_mode |= tcs_info->tess.point_mode;
3789bf215546Sopenharmony_ci
3790bf215546Sopenharmony_ci   /* Copy the merged info back to the TCS */
3791bf215546Sopenharmony_ci   tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
3792bf215546Sopenharmony_ci   tcs_info->tess.spacing = tes_info->tess.spacing;
3793bf215546Sopenharmony_ci   tcs_info->tess._primitive_mode = tes_info->tess._primitive_mode;
3794bf215546Sopenharmony_ci   tcs_info->tess.ccw = tes_info->tess.ccw;
3795bf215546Sopenharmony_ci   tcs_info->tess.point_mode = tes_info->tess.point_mode;
3796bf215546Sopenharmony_ci}
3797bf215546Sopenharmony_ci
3798bf215546Sopenharmony_cistatic void
3799bf215546Sopenharmony_cigather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages,
3800bf215546Sopenharmony_ci                 const struct radv_pipeline_key *pipeline_key)
3801bf215546Sopenharmony_ci{
3802bf215546Sopenharmony_ci   merge_tess_info(&stages[MESA_SHADER_TESS_EVAL].nir->info,
3803bf215546Sopenharmony_ci                   &stages[MESA_SHADER_TESS_CTRL].nir->info);
3804bf215546Sopenharmony_ci
3805bf215546Sopenharmony_ci   unsigned tess_in_patch_size = pipeline_key->tcs.tess_input_vertices;
3806bf215546Sopenharmony_ci   unsigned tess_out_patch_size = stages[MESA_SHADER_TESS_CTRL].nir->info.tess.tcs_vertices_out;
3807bf215546Sopenharmony_ci
3808bf215546Sopenharmony_ci   /* Number of tessellation patches per workgroup processed by the current pipeline. */
3809bf215546Sopenharmony_ci   unsigned num_patches = get_tcs_num_patches(
3810bf215546Sopenharmony_ci      tess_in_patch_size, tess_out_patch_size,
3811bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs,
3812bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs,
3813bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs,
3814bf215546Sopenharmony_ci      device->physical_device->hs.tess_offchip_block_dw_size, device->physical_device->rad_info.gfx_level,
3815bf215546Sopenharmony_ci      device->physical_device->rad_info.family);
3816bf215546Sopenharmony_ci
3817bf215546Sopenharmony_ci   /* LDS size used by VS+TCS for storing TCS inputs and outputs. */
3818bf215546Sopenharmony_ci   unsigned tcs_lds_size = calculate_tess_lds_size(
3819bf215546Sopenharmony_ci      device->physical_device->rad_info.gfx_level, tess_in_patch_size, tess_out_patch_size,
3820bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs, num_patches,
3821bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs,
3822bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs);
3823bf215546Sopenharmony_ci
3824bf215546Sopenharmony_ci   stages[MESA_SHADER_TESS_CTRL].info.num_tess_patches = num_patches;
3825bf215546Sopenharmony_ci   stages[MESA_SHADER_TESS_CTRL].info.tcs.num_lds_blocks = tcs_lds_size;
3826bf215546Sopenharmony_ci   stages[MESA_SHADER_TESS_CTRL].info.tcs.tes_reads_tess_factors =
3827bf215546Sopenharmony_ci      !!(stages[MESA_SHADER_TESS_EVAL].nir->info.inputs_read &
3828bf215546Sopenharmony_ci         (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER));
3829bf215546Sopenharmony_ci   stages[MESA_SHADER_TESS_CTRL].info.tcs.tes_inputs_read = stages[MESA_SHADER_TESS_EVAL].nir->info.inputs_read;
3830bf215546Sopenharmony_ci   stages[MESA_SHADER_TESS_CTRL].info.tcs.tes_patch_inputs_read =
3831bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_EVAL].nir->info.patch_inputs_read;
3832bf215546Sopenharmony_ci
3833bf215546Sopenharmony_ci   stages[MESA_SHADER_TESS_EVAL].info.num_tess_patches = num_patches;
3834bf215546Sopenharmony_ci   stages[MESA_SHADER_GEOMETRY].info.num_tess_patches = num_patches;
3835bf215546Sopenharmony_ci   stages[MESA_SHADER_VERTEX].info.num_tess_patches = num_patches;
3836bf215546Sopenharmony_ci   stages[MESA_SHADER_TESS_CTRL].info.tcs.tcs_vertices_out = tess_out_patch_size;
3837bf215546Sopenharmony_ci   stages[MESA_SHADER_VERTEX].info.tcs.tcs_vertices_out = tess_out_patch_size;
3838bf215546Sopenharmony_ci
3839bf215546Sopenharmony_ci   if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
3840bf215546Sopenharmony_ci      /* When the number of TCS input and output vertices are the same (typically 3):
3841bf215546Sopenharmony_ci       * - There is an equal amount of LS and HS invocations
3842bf215546Sopenharmony_ci       * - In case of merged LSHS shaders, the LS and HS halves of the shader
3843bf215546Sopenharmony_ci       *   always process the exact same vertex. We can use this knowledge to optimize them.
3844bf215546Sopenharmony_ci       *
3845bf215546Sopenharmony_ci       * We don't set tcs_in_out_eq if the float controls differ because that might
3846bf215546Sopenharmony_ci       * involve different float modes for the same block and our optimizer
3847bf215546Sopenharmony_ci       * doesn't handle a instruction dominating another with a different mode.
3848bf215546Sopenharmony_ci       */
3849bf215546Sopenharmony_ci      stages[MESA_SHADER_VERTEX].info.vs.tcs_in_out_eq =
3850bf215546Sopenharmony_ci         device->physical_device->rad_info.gfx_level >= GFX9 &&
3851bf215546Sopenharmony_ci         tess_in_patch_size == tess_out_patch_size &&
3852bf215546Sopenharmony_ci         stages[MESA_SHADER_VERTEX].nir->info.float_controls_execution_mode ==
3853bf215546Sopenharmony_ci            stages[MESA_SHADER_TESS_CTRL].nir->info.float_controls_execution_mode;
3854bf215546Sopenharmony_ci
3855bf215546Sopenharmony_ci      if (stages[MESA_SHADER_VERTEX].info.vs.tcs_in_out_eq)
3856bf215546Sopenharmony_ci         stages[MESA_SHADER_VERTEX].info.vs.tcs_temp_only_input_mask =
3857bf215546Sopenharmony_ci            stages[MESA_SHADER_TESS_CTRL].nir->info.inputs_read &
3858bf215546Sopenharmony_ci            stages[MESA_SHADER_VERTEX].nir->info.outputs_written &
3859bf215546Sopenharmony_ci            ~stages[MESA_SHADER_TESS_CTRL].nir->info.tess.tcs_cross_invocation_inputs_read &
3860bf215546Sopenharmony_ci            ~stages[MESA_SHADER_TESS_CTRL].nir->info.inputs_read_indirectly &
3861bf215546Sopenharmony_ci            ~stages[MESA_SHADER_VERTEX].nir->info.outputs_accessed_indirectly;
3862bf215546Sopenharmony_ci
3863bf215546Sopenharmony_ci      /* Copy data to TCS so it can be accessed by the backend if they are merged. */
3864bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.vs.tcs_in_out_eq = stages[MESA_SHADER_VERTEX].info.vs.tcs_in_out_eq;
3865bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_CTRL].info.vs.tcs_temp_only_input_mask =
3866bf215546Sopenharmony_ci         stages[MESA_SHADER_VERTEX].info.vs.tcs_temp_only_input_mask;
3867bf215546Sopenharmony_ci   }
3868bf215546Sopenharmony_ci
3869bf215546Sopenharmony_ci   for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; ++s)
3870bf215546Sopenharmony_ci      stages[s].info.workgroup_size =
3871bf215546Sopenharmony_ci         ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, s, num_patches,
3872bf215546Sopenharmony_ci                                        tess_in_patch_size, tess_out_patch_size);
3873bf215546Sopenharmony_ci}
3874bf215546Sopenharmony_ci
3875bf215546Sopenharmony_cistatic bool
3876bf215546Sopenharmony_cimem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
3877bf215546Sopenharmony_ci                       unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
3878bf215546Sopenharmony_ci                       void *data)
3879bf215546Sopenharmony_ci{
3880bf215546Sopenharmony_ci   if (num_components > 4)
3881bf215546Sopenharmony_ci      return false;
3882bf215546Sopenharmony_ci
3883bf215546Sopenharmony_ci   /* >128 bit loads are split except with SMEM */
3884bf215546Sopenharmony_ci   if (bit_size * num_components > 128)
3885bf215546Sopenharmony_ci      return false;
3886bf215546Sopenharmony_ci
3887bf215546Sopenharmony_ci   uint32_t align;
3888bf215546Sopenharmony_ci   if (align_offset)
3889bf215546Sopenharmony_ci      align = 1 << (ffs(align_offset) - 1);
3890bf215546Sopenharmony_ci   else
3891bf215546Sopenharmony_ci      align = align_mul;
3892bf215546Sopenharmony_ci
3893bf215546Sopenharmony_ci   switch (low->intrinsic) {
3894bf215546Sopenharmony_ci   case nir_intrinsic_load_global:
3895bf215546Sopenharmony_ci   case nir_intrinsic_store_global:
3896bf215546Sopenharmony_ci   case nir_intrinsic_store_ssbo:
3897bf215546Sopenharmony_ci   case nir_intrinsic_load_ssbo:
3898bf215546Sopenharmony_ci   case nir_intrinsic_load_ubo:
3899bf215546Sopenharmony_ci   case nir_intrinsic_load_push_constant: {
3900bf215546Sopenharmony_ci      unsigned max_components;
3901bf215546Sopenharmony_ci      if (align % 4 == 0)
3902bf215546Sopenharmony_ci         max_components = NIR_MAX_VEC_COMPONENTS;
3903bf215546Sopenharmony_ci      else if (align % 2 == 0)
3904bf215546Sopenharmony_ci         max_components = 16u / bit_size;
3905bf215546Sopenharmony_ci      else
3906bf215546Sopenharmony_ci         max_components = 8u / bit_size;
3907bf215546Sopenharmony_ci      return (align % (bit_size / 8u)) == 0 && num_components <= max_components;
3908bf215546Sopenharmony_ci   }
3909bf215546Sopenharmony_ci   case nir_intrinsic_load_deref:
3910bf215546Sopenharmony_ci   case nir_intrinsic_store_deref:
3911bf215546Sopenharmony_ci      assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]), nir_var_mem_shared));
3912bf215546Sopenharmony_ci      FALLTHROUGH;
3913bf215546Sopenharmony_ci   case nir_intrinsic_load_shared:
3914bf215546Sopenharmony_ci   case nir_intrinsic_store_shared:
3915bf215546Sopenharmony_ci      if (bit_size * num_components ==
3916bf215546Sopenharmony_ci          96) { /* 96 bit loads require 128 bit alignment and are split otherwise */
3917bf215546Sopenharmony_ci         return align % 16 == 0;
3918bf215546Sopenharmony_ci      } else if (bit_size == 16 && (align % 4)) {
3919bf215546Sopenharmony_ci         /* AMD hardware can't do 2-byte aligned f16vec2 loads, but they are useful for ALU
3920bf215546Sopenharmony_ci          * vectorization, because our vectorizer requires the scalar IR to already contain vectors.
3921bf215546Sopenharmony_ci          */
3922bf215546Sopenharmony_ci         return (align % 2 == 0) && num_components <= 2;
3923bf215546Sopenharmony_ci      } else {
3924bf215546Sopenharmony_ci         if (num_components == 3) {
3925bf215546Sopenharmony_ci            /* AMD hardware can't do 3-component loads except for 96-bit loads, handled above. */
3926bf215546Sopenharmony_ci            return false;
3927bf215546Sopenharmony_ci         }
3928bf215546Sopenharmony_ci         unsigned req = bit_size * num_components;
3929bf215546Sopenharmony_ci         if (req == 64 || req == 128) /* 64-bit and 128-bit loads can use ds_read2_b{32,64} */
3930bf215546Sopenharmony_ci            req /= 2u;
3931bf215546Sopenharmony_ci         return align % (req / 8u) == 0;
3932bf215546Sopenharmony_ci      }
3933bf215546Sopenharmony_ci   default:
3934bf215546Sopenharmony_ci      return false;
3935bf215546Sopenharmony_ci   }
3936bf215546Sopenharmony_ci   return false;
3937bf215546Sopenharmony_ci}
3938bf215546Sopenharmony_ci
3939bf215546Sopenharmony_cistatic unsigned
3940bf215546Sopenharmony_cilower_bit_size_callback(const nir_instr *instr, void *_)
3941bf215546Sopenharmony_ci{
3942bf215546Sopenharmony_ci   struct radv_device *device = _;
3943bf215546Sopenharmony_ci   enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level;
3944bf215546Sopenharmony_ci
3945bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_alu)
3946bf215546Sopenharmony_ci      return 0;
3947bf215546Sopenharmony_ci   nir_alu_instr *alu = nir_instr_as_alu(instr);
3948bf215546Sopenharmony_ci
3949bf215546Sopenharmony_ci   /* If an instruction is not scalarized by this point,
3950bf215546Sopenharmony_ci    * it can be emitted as packed instruction */
3951bf215546Sopenharmony_ci   if (alu->dest.dest.ssa.num_components > 1)
3952bf215546Sopenharmony_ci      return 0;
3953bf215546Sopenharmony_ci
3954bf215546Sopenharmony_ci   if (alu->dest.dest.ssa.bit_size & (8 | 16)) {
3955bf215546Sopenharmony_ci      unsigned bit_size = alu->dest.dest.ssa.bit_size;
3956bf215546Sopenharmony_ci      switch (alu->op) {
3957bf215546Sopenharmony_ci      case nir_op_bitfield_select:
3958bf215546Sopenharmony_ci      case nir_op_imul_high:
3959bf215546Sopenharmony_ci      case nir_op_umul_high:
3960bf215546Sopenharmony_ci         return 32;
3961bf215546Sopenharmony_ci      case nir_op_iabs:
3962bf215546Sopenharmony_ci      case nir_op_imax:
3963bf215546Sopenharmony_ci      case nir_op_umax:
3964bf215546Sopenharmony_ci      case nir_op_imin:
3965bf215546Sopenharmony_ci      case nir_op_umin:
3966bf215546Sopenharmony_ci      case nir_op_ishr:
3967bf215546Sopenharmony_ci      case nir_op_ushr:
3968bf215546Sopenharmony_ci      case nir_op_ishl:
3969bf215546Sopenharmony_ci      case nir_op_isign:
3970bf215546Sopenharmony_ci      case nir_op_uadd_sat:
3971bf215546Sopenharmony_ci      case nir_op_usub_sat:
3972bf215546Sopenharmony_ci         return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32
3973bf215546Sopenharmony_ci                                                                                            : 0;
3974bf215546Sopenharmony_ci      case nir_op_iadd_sat:
3975bf215546Sopenharmony_ci      case nir_op_isub_sat:
3976bf215546Sopenharmony_ci         return bit_size == 8 || !nir_dest_is_divergent(alu->dest.dest) ? 32 : 0;
3977bf215546Sopenharmony_ci
3978bf215546Sopenharmony_ci      default:
3979bf215546Sopenharmony_ci         return 0;
3980bf215546Sopenharmony_ci      }
3981bf215546Sopenharmony_ci   }
3982bf215546Sopenharmony_ci
3983bf215546Sopenharmony_ci   if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) {
3984bf215546Sopenharmony_ci      unsigned bit_size = nir_src_bit_size(alu->src[0].src);
3985bf215546Sopenharmony_ci      switch (alu->op) {
3986bf215546Sopenharmony_ci      case nir_op_bit_count:
3987bf215546Sopenharmony_ci      case nir_op_find_lsb:
3988bf215546Sopenharmony_ci      case nir_op_ufind_msb:
3989bf215546Sopenharmony_ci      case nir_op_i2b1:
3990bf215546Sopenharmony_ci         return 32;
3991bf215546Sopenharmony_ci      case nir_op_ilt:
3992bf215546Sopenharmony_ci      case nir_op_ige:
3993bf215546Sopenharmony_ci      case nir_op_ieq:
3994bf215546Sopenharmony_ci      case nir_op_ine:
3995bf215546Sopenharmony_ci      case nir_op_ult:
3996bf215546Sopenharmony_ci      case nir_op_uge:
3997bf215546Sopenharmony_ci         return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32
3998bf215546Sopenharmony_ci                                                                                            : 0;
3999bf215546Sopenharmony_ci      default:
4000bf215546Sopenharmony_ci         return 0;
4001bf215546Sopenharmony_ci      }
4002bf215546Sopenharmony_ci   }
4003bf215546Sopenharmony_ci
4004bf215546Sopenharmony_ci   return 0;
4005bf215546Sopenharmony_ci}
4006bf215546Sopenharmony_ci
4007bf215546Sopenharmony_cistatic uint8_t
4008bf215546Sopenharmony_ciopt_vectorize_callback(const nir_instr *instr, const void *_)
4009bf215546Sopenharmony_ci{
4010bf215546Sopenharmony_ci   if (instr->type != nir_instr_type_alu)
4011bf215546Sopenharmony_ci      return 0;
4012bf215546Sopenharmony_ci
4013bf215546Sopenharmony_ci   const struct radv_device *device = _;
4014bf215546Sopenharmony_ci   enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level;
4015bf215546Sopenharmony_ci   if (chip < GFX9)
4016bf215546Sopenharmony_ci      return 1;
4017bf215546Sopenharmony_ci
4018bf215546Sopenharmony_ci   const nir_alu_instr *alu = nir_instr_as_alu(instr);
4019bf215546Sopenharmony_ci   const unsigned bit_size = alu->dest.dest.ssa.bit_size;
4020bf215546Sopenharmony_ci   if (bit_size != 16)
4021bf215546Sopenharmony_ci      return 1;
4022bf215546Sopenharmony_ci
4023bf215546Sopenharmony_ci   switch (alu->op) {
4024bf215546Sopenharmony_ci   case nir_op_fadd:
4025bf215546Sopenharmony_ci   case nir_op_fsub:
4026bf215546Sopenharmony_ci   case nir_op_fmul:
4027bf215546Sopenharmony_ci   case nir_op_ffma:
4028bf215546Sopenharmony_ci   case nir_op_fdiv:
4029bf215546Sopenharmony_ci   case nir_op_flrp:
4030bf215546Sopenharmony_ci   case nir_op_fabs:
4031bf215546Sopenharmony_ci   case nir_op_fneg:
4032bf215546Sopenharmony_ci   case nir_op_fsat:
4033bf215546Sopenharmony_ci   case nir_op_fmin:
4034bf215546Sopenharmony_ci   case nir_op_fmax:
4035bf215546Sopenharmony_ci   case nir_op_iabs:
4036bf215546Sopenharmony_ci   case nir_op_iadd:
4037bf215546Sopenharmony_ci   case nir_op_iadd_sat:
4038bf215546Sopenharmony_ci   case nir_op_uadd_sat:
4039bf215546Sopenharmony_ci   case nir_op_isub:
4040bf215546Sopenharmony_ci   case nir_op_isub_sat:
4041bf215546Sopenharmony_ci   case nir_op_usub_sat:
4042bf215546Sopenharmony_ci   case nir_op_ineg:
4043bf215546Sopenharmony_ci   case nir_op_imul:
4044bf215546Sopenharmony_ci   case nir_op_imin:
4045bf215546Sopenharmony_ci   case nir_op_imax:
4046bf215546Sopenharmony_ci   case nir_op_umin:
4047bf215546Sopenharmony_ci   case nir_op_umax:
4048bf215546Sopenharmony_ci      return 2;
4049bf215546Sopenharmony_ci   case nir_op_ishl: /* TODO: in NIR, these have 32bit shift operands */
4050bf215546Sopenharmony_ci   case nir_op_ishr: /* while Radeon needs 16bit operands when vectorized */
4051bf215546Sopenharmony_ci   case nir_op_ushr:
4052bf215546Sopenharmony_ci   default:
4053bf215546Sopenharmony_ci      return 1;
4054bf215546Sopenharmony_ci   }
4055bf215546Sopenharmony_ci}
4056bf215546Sopenharmony_ci
4057bf215546Sopenharmony_cistatic nir_component_mask_t
4058bf215546Sopenharmony_cinon_uniform_access_callback(const nir_src *src, void *_)
4059bf215546Sopenharmony_ci{
4060bf215546Sopenharmony_ci   if (src->ssa->num_components == 1)
4061bf215546Sopenharmony_ci      return 0x1;
4062bf215546Sopenharmony_ci   return nir_chase_binding(*src).success ? 0x2 : 0x3;
4063bf215546Sopenharmony_ci}
4064bf215546Sopenharmony_ci
4065bf215546Sopenharmony_ci
4066bf215546Sopenharmony_ciVkResult
4067bf215546Sopenharmony_ciradv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
4068bf215546Sopenharmony_ci                    struct radv_shader_binary **binaries, struct radv_shader_binary *gs_copy_binary)
4069bf215546Sopenharmony_ci{
4070bf215546Sopenharmony_ci   uint32_t code_size = 0;
4071bf215546Sopenharmony_ci
4072bf215546Sopenharmony_ci   /* Compute the total code size. */
4073bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
4074bf215546Sopenharmony_ci      struct radv_shader *shader = pipeline->shaders[i];
4075bf215546Sopenharmony_ci      if (!shader)
4076bf215546Sopenharmony_ci         continue;
4077bf215546Sopenharmony_ci
4078bf215546Sopenharmony_ci      code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
4079bf215546Sopenharmony_ci   }
4080bf215546Sopenharmony_ci
4081bf215546Sopenharmony_ci   if (pipeline->gs_copy_shader) {
4082bf215546Sopenharmony_ci      code_size += align(pipeline->gs_copy_shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
4083bf215546Sopenharmony_ci   }
4084bf215546Sopenharmony_ci
4085bf215546Sopenharmony_ci   /* Allocate memory for all shader binaries. */
4086bf215546Sopenharmony_ci   pipeline->slab = radv_pipeline_slab_create(device, pipeline, code_size);
4087bf215546Sopenharmony_ci   if (!pipeline->slab)
4088bf215546Sopenharmony_ci      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
4089bf215546Sopenharmony_ci
4090bf215546Sopenharmony_ci   pipeline->slab_bo = pipeline->slab->alloc->arena->bo;
4091bf215546Sopenharmony_ci
4092bf215546Sopenharmony_ci   /* Upload shader binaries. */
4093bf215546Sopenharmony_ci   uint64_t slab_va = radv_buffer_get_va(pipeline->slab_bo);
4094bf215546Sopenharmony_ci   uint32_t slab_offset = pipeline->slab->alloc->offset;
4095bf215546Sopenharmony_ci   char *slab_ptr = pipeline->slab->alloc->arena->ptr;
4096bf215546Sopenharmony_ci
4097bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
4098bf215546Sopenharmony_ci      struct radv_shader *shader = pipeline->shaders[i];
4099bf215546Sopenharmony_ci      if (!shader)
4100bf215546Sopenharmony_ci         continue;
4101bf215546Sopenharmony_ci
4102bf215546Sopenharmony_ci      shader->va = slab_va + slab_offset;
4103bf215546Sopenharmony_ci
4104bf215546Sopenharmony_ci      void *dest_ptr = slab_ptr + slab_offset;
4105bf215546Sopenharmony_ci      if (!radv_shader_binary_upload(device, binaries[i], shader, dest_ptr))
4106bf215546Sopenharmony_ci         return VK_ERROR_OUT_OF_HOST_MEMORY;
4107bf215546Sopenharmony_ci
4108bf215546Sopenharmony_ci      slab_offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
4109bf215546Sopenharmony_ci   }
4110bf215546Sopenharmony_ci
4111bf215546Sopenharmony_ci   if (pipeline->gs_copy_shader) {
4112bf215546Sopenharmony_ci      pipeline->gs_copy_shader->va = slab_va + slab_offset;
4113bf215546Sopenharmony_ci
4114bf215546Sopenharmony_ci      void *dest_ptr = slab_ptr + slab_offset;
4115bf215546Sopenharmony_ci      if (!radv_shader_binary_upload(device, gs_copy_binary, pipeline->gs_copy_shader, dest_ptr))
4116bf215546Sopenharmony_ci         return VK_ERROR_OUT_OF_HOST_MEMORY;
4117bf215546Sopenharmony_ci   }
4118bf215546Sopenharmony_ci
4119bf215546Sopenharmony_ci   return VK_SUCCESS;
4120bf215546Sopenharmony_ci}
4121bf215546Sopenharmony_ci
4122bf215546Sopenharmony_cistatic bool
4123bf215546Sopenharmony_ciradv_consider_force_vrs(const struct radv_pipeline *pipeline, bool noop_fs,
4124bf215546Sopenharmony_ci                        const struct radv_pipeline_stage *stages,
4125bf215546Sopenharmony_ci                        gl_shader_stage last_vgt_api_stage)
4126bf215546Sopenharmony_ci{
4127bf215546Sopenharmony_ci   struct radv_device *device = pipeline->device;
4128bf215546Sopenharmony_ci
4129bf215546Sopenharmony_ci   if (!device->force_vrs_enabled)
4130bf215546Sopenharmony_ci      return false;
4131bf215546Sopenharmony_ci
4132bf215546Sopenharmony_ci   if (last_vgt_api_stage != MESA_SHADER_VERTEX &&
4133bf215546Sopenharmony_ci       last_vgt_api_stage != MESA_SHADER_TESS_EVAL &&
4134bf215546Sopenharmony_ci       last_vgt_api_stage != MESA_SHADER_GEOMETRY)
4135bf215546Sopenharmony_ci      return false;
4136bf215546Sopenharmony_ci
4137bf215546Sopenharmony_ci   nir_shader *last_vgt_shader = stages[last_vgt_api_stage].nir;
4138bf215546Sopenharmony_ci   if (last_vgt_shader->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE))
4139bf215546Sopenharmony_ci      return false;
4140bf215546Sopenharmony_ci
4141bf215546Sopenharmony_ci   /* VRS has no effect if there is no pixel shader. */
4142bf215546Sopenharmony_ci   if (noop_fs)
4143bf215546Sopenharmony_ci      return false;
4144bf215546Sopenharmony_ci
4145bf215546Sopenharmony_ci   /* Do not enable if the PS uses gl_FragCoord because it breaks postprocessing in some games. */
4146bf215546Sopenharmony_ci   nir_shader *fs_shader = stages[MESA_SHADER_FRAGMENT].nir;
4147bf215546Sopenharmony_ci   if (fs_shader &&
4148bf215546Sopenharmony_ci       BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
4149bf215546Sopenharmony_ci      return false;
4150bf215546Sopenharmony_ci   }
4151bf215546Sopenharmony_ci
4152bf215546Sopenharmony_ci   return true;
4153bf215546Sopenharmony_ci}
4154bf215546Sopenharmony_ci
4155bf215546Sopenharmony_cistatic nir_ssa_def *
4156bf215546Sopenharmony_ciradv_adjust_vertex_fetch_alpha(nir_builder *b,
4157bf215546Sopenharmony_ci                               enum radv_vs_input_alpha_adjust alpha_adjust,
4158bf215546Sopenharmony_ci                               nir_ssa_def *alpha)
4159bf215546Sopenharmony_ci{
4160bf215546Sopenharmony_ci   if (alpha_adjust == ALPHA_ADJUST_SSCALED)
4161bf215546Sopenharmony_ci      alpha = nir_f2u32(b, alpha);
4162bf215546Sopenharmony_ci
4163bf215546Sopenharmony_ci   /* For the integer-like cases, do a natural sign extension.
4164bf215546Sopenharmony_ci    *
4165bf215546Sopenharmony_ci    * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 and happen to contain 0, 1, 2, 3 as
4166bf215546Sopenharmony_ci    * the two LSBs of the exponent.
4167bf215546Sopenharmony_ci    */
4168bf215546Sopenharmony_ci   unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u;
4169bf215546Sopenharmony_ci
4170bf215546Sopenharmony_ci   alpha = nir_ibfe_imm(b, alpha, offset, 2u);
4171bf215546Sopenharmony_ci
4172bf215546Sopenharmony_ci   /* Convert back to the right type. */
4173bf215546Sopenharmony_ci   if (alpha_adjust == ALPHA_ADJUST_SNORM) {
4174bf215546Sopenharmony_ci      alpha = nir_i2f32(b, alpha);
4175bf215546Sopenharmony_ci      alpha = nir_fmax(b, alpha, nir_imm_float(b, -1.0f));
4176bf215546Sopenharmony_ci   } else if (alpha_adjust == ALPHA_ADJUST_SSCALED) {
4177bf215546Sopenharmony_ci      alpha = nir_i2f32(b, alpha);
4178bf215546Sopenharmony_ci   }
4179bf215546Sopenharmony_ci
4180bf215546Sopenharmony_ci   return alpha;
4181bf215546Sopenharmony_ci}
4182bf215546Sopenharmony_ci
4183bf215546Sopenharmony_cistatic bool
4184bf215546Sopenharmony_ciradv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_key)
4185bf215546Sopenharmony_ci{
4186bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4187bf215546Sopenharmony_ci   bool progress = false;
4188bf215546Sopenharmony_ci
4189bf215546Sopenharmony_ci   if (pipeline_key->vs.dynamic_input_state)
4190bf215546Sopenharmony_ci      return false;
4191bf215546Sopenharmony_ci
4192bf215546Sopenharmony_ci   nir_builder b;
4193bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
4194bf215546Sopenharmony_ci
4195bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
4196bf215546Sopenharmony_ci      nir_foreach_instr(instr, block) {
4197bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
4198bf215546Sopenharmony_ci            continue;
4199bf215546Sopenharmony_ci
4200bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4201bf215546Sopenharmony_ci         if (intrin->intrinsic != nir_intrinsic_load_input)
4202bf215546Sopenharmony_ci            continue;
4203bf215546Sopenharmony_ci
4204bf215546Sopenharmony_ci         unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0;
4205bf215546Sopenharmony_ci         enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location];
4206bf215546Sopenharmony_ci         bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
4207bf215546Sopenharmony_ci
4208bf215546Sopenharmony_ci         unsigned component = nir_intrinsic_component(intrin);
4209bf215546Sopenharmony_ci         unsigned num_components = intrin->dest.ssa.num_components;
4210bf215546Sopenharmony_ci
4211bf215546Sopenharmony_ci         unsigned attrib_format = pipeline_key->vs.vertex_attribute_formats[location];
4212bf215546Sopenharmony_ci         unsigned dfmt = attrib_format & 0xf;
4213bf215546Sopenharmony_ci         unsigned nfmt = (attrib_format >> 4) & 0x7;
4214bf215546Sopenharmony_ci         const struct ac_data_format_info *vtx_info = ac_get_data_format_info(dfmt);
4215bf215546Sopenharmony_ci         bool is_float =
4216bf215546Sopenharmony_ci            nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT;
4217bf215546Sopenharmony_ci
4218bf215546Sopenharmony_ci         unsigned mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component;
4219bf215546Sopenharmony_ci         unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
4220bf215546Sopenharmony_ci
4221bf215546Sopenharmony_ci         static const unsigned swizzle_normal[4] = {0, 1, 2, 3};
4222bf215546Sopenharmony_ci         static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3};
4223bf215546Sopenharmony_ci         const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal;
4224bf215546Sopenharmony_ci
4225bf215546Sopenharmony_ci         b.cursor = nir_after_instr(instr);
4226bf215546Sopenharmony_ci         nir_ssa_def *channels[4];
4227bf215546Sopenharmony_ci
4228bf215546Sopenharmony_ci         if (post_shuffle) {
4229bf215546Sopenharmony_ci            /* Expand to load 3 components because it's shuffled like X<->Z. */
4230bf215546Sopenharmony_ci            intrin->num_components = MAX2(component + num_components, 3);
4231bf215546Sopenharmony_ci            intrin->dest.ssa.num_components = intrin->num_components;
4232bf215546Sopenharmony_ci
4233bf215546Sopenharmony_ci            nir_intrinsic_set_component(intrin, 0);
4234bf215546Sopenharmony_ci
4235bf215546Sopenharmony_ci            num_channels = MAX2(num_channels, 3);
4236bf215546Sopenharmony_ci         }
4237bf215546Sopenharmony_ci
4238bf215546Sopenharmony_ci         for (uint32_t i = 0; i < num_components; i++) {
4239bf215546Sopenharmony_ci            unsigned idx = i + (post_shuffle ? component : 0);
4240bf215546Sopenharmony_ci
4241bf215546Sopenharmony_ci            if (swizzle[i + component] < num_channels) {
4242bf215546Sopenharmony_ci               channels[i] = nir_channel(&b, &intrin->dest.ssa, swizzle[idx]);
4243bf215546Sopenharmony_ci            } else if (i + component == 3) {
4244bf215546Sopenharmony_ci               channels[i] = is_float ? nir_imm_floatN_t(&b, 1.0f, intrin->dest.ssa.bit_size)
4245bf215546Sopenharmony_ci                                      : nir_imm_intN_t(&b, 1u, intrin->dest.ssa.bit_size);
4246bf215546Sopenharmony_ci            } else {
4247bf215546Sopenharmony_ci               channels[i] = nir_imm_zero(&b, 1, intrin->dest.ssa.bit_size);
4248bf215546Sopenharmony_ci            }
4249bf215546Sopenharmony_ci         }
4250bf215546Sopenharmony_ci
4251bf215546Sopenharmony_ci         if (alpha_adjust != ALPHA_ADJUST_NONE && component + num_components == 4) {
4252bf215546Sopenharmony_ci            unsigned idx = num_components - 1;
4253bf215546Sopenharmony_ci            channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]);
4254bf215546Sopenharmony_ci         }
4255bf215546Sopenharmony_ci
4256bf215546Sopenharmony_ci         nir_ssa_def *new_dest = nir_vec(&b, channels, num_components);
4257bf215546Sopenharmony_ci
4258bf215546Sopenharmony_ci         nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest,
4259bf215546Sopenharmony_ci                                        new_dest->parent_instr);
4260bf215546Sopenharmony_ci
4261bf215546Sopenharmony_ci         progress = true;
4262bf215546Sopenharmony_ci      }
4263bf215546Sopenharmony_ci   }
4264bf215546Sopenharmony_ci
4265bf215546Sopenharmony_ci   if (progress)
4266bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
4267bf215546Sopenharmony_ci   else
4268bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
4269bf215546Sopenharmony_ci
4270bf215546Sopenharmony_ci   return progress;
4271bf215546Sopenharmony_ci}
4272bf215546Sopenharmony_ci
4273bf215546Sopenharmony_cistatic bool
4274bf215546Sopenharmony_ciradv_lower_fs_output(nir_shader *nir, const struct radv_pipeline_key *pipeline_key)
4275bf215546Sopenharmony_ci{
4276bf215546Sopenharmony_ci   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4277bf215546Sopenharmony_ci   bool progress = false;
4278bf215546Sopenharmony_ci
4279bf215546Sopenharmony_ci   nir_builder b;
4280bf215546Sopenharmony_ci   nir_builder_init(&b, impl);
4281bf215546Sopenharmony_ci
4282bf215546Sopenharmony_ci   nir_foreach_block(block, impl) {
4283bf215546Sopenharmony_ci      nir_foreach_instr(instr, block) {
4284bf215546Sopenharmony_ci         if (instr->type != nir_instr_type_intrinsic)
4285bf215546Sopenharmony_ci            continue;
4286bf215546Sopenharmony_ci
4287bf215546Sopenharmony_ci         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4288bf215546Sopenharmony_ci         if (intrin->intrinsic != nir_intrinsic_store_output)
4289bf215546Sopenharmony_ci            continue;
4290bf215546Sopenharmony_ci
4291bf215546Sopenharmony_ci         int slot = nir_intrinsic_base(intrin) - FRAG_RESULT_DATA0;
4292bf215546Sopenharmony_ci         if (slot < 0)
4293bf215546Sopenharmony_ci            continue;
4294bf215546Sopenharmony_ci
4295bf215546Sopenharmony_ci         unsigned write_mask = nir_intrinsic_write_mask(intrin);
4296bf215546Sopenharmony_ci         unsigned col_format = (pipeline_key->ps.col_format >> (4 * slot)) & 0xf;
4297bf215546Sopenharmony_ci         bool is_int8 = (pipeline_key->ps.is_int8 >> slot) & 1;
4298bf215546Sopenharmony_ci         bool is_int10 = (pipeline_key->ps.is_int10 >> slot) & 1;
4299bf215546Sopenharmony_ci         bool enable_mrt_output_nan_fixup = (pipeline_key->ps.enable_mrt_output_nan_fixup >> slot) & 1;
4300bf215546Sopenharmony_ci         bool is_16bit = intrin->src[0].ssa->bit_size == 16;
4301bf215546Sopenharmony_ci
4302bf215546Sopenharmony_ci         if (col_format == V_028714_SPI_SHADER_ZERO)
4303bf215546Sopenharmony_ci            continue;
4304bf215546Sopenharmony_ci
4305bf215546Sopenharmony_ci         b.cursor = nir_before_instr(instr);
4306bf215546Sopenharmony_ci         nir_ssa_def *values[4];
4307bf215546Sopenharmony_ci
4308bf215546Sopenharmony_ci         /* Extract the export values. */
4309bf215546Sopenharmony_ci         for (unsigned i = 0; i < 4; i++) {
4310bf215546Sopenharmony_ci            if (write_mask & (1 << i)) {
4311bf215546Sopenharmony_ci               values[i] = nir_channel(&b, intrin->src[0].ssa, i);
4312bf215546Sopenharmony_ci            } else {
4313bf215546Sopenharmony_ci               values[i] = nir_ssa_undef(&b, 1, 32);
4314bf215546Sopenharmony_ci            }
4315bf215546Sopenharmony_ci         }
4316bf215546Sopenharmony_ci
4317bf215546Sopenharmony_ci         /* Replace NaN by zero (for 32-bit float formats) to fix game bugs if requested. */
4318bf215546Sopenharmony_ci         if (enable_mrt_output_nan_fixup && !nir->info.internal && !is_16bit) {
4319bf215546Sopenharmony_ci            u_foreach_bit(i, write_mask) {
4320bf215546Sopenharmony_ci               const bool save_exact = b.exact;
4321bf215546Sopenharmony_ci
4322bf215546Sopenharmony_ci               b.exact = true;
4323bf215546Sopenharmony_ci               nir_ssa_def *isnan = nir_fneu(&b, values[i], values[i]);
4324bf215546Sopenharmony_ci               b.exact = save_exact;
4325bf215546Sopenharmony_ci
4326bf215546Sopenharmony_ci               values[i] = nir_bcsel(&b, isnan, nir_imm_zero(&b, 1, 32), values[i]);
4327bf215546Sopenharmony_ci            }
4328bf215546Sopenharmony_ci         }
4329bf215546Sopenharmony_ci
4330bf215546Sopenharmony_ci         if (col_format == V_028714_SPI_SHADER_FP16_ABGR ||
4331bf215546Sopenharmony_ci             col_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
4332bf215546Sopenharmony_ci             col_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
4333bf215546Sopenharmony_ci             col_format == V_028714_SPI_SHADER_UINT16_ABGR ||
4334bf215546Sopenharmony_ci             col_format == V_028714_SPI_SHADER_SINT16_ABGR) {
4335bf215546Sopenharmony_ci            /* Convert and/or clamp the export values. */
4336bf215546Sopenharmony_ci            switch (col_format) {
4337bf215546Sopenharmony_ci            case V_028714_SPI_SHADER_UINT16_ABGR: {
4338bf215546Sopenharmony_ci               unsigned max_rgb = is_int8 ? 255 : is_int10 ? 1023 : 0;
4339bf215546Sopenharmony_ci               u_foreach_bit(i, write_mask) {
4340bf215546Sopenharmony_ci                  if (is_int8 || is_int10) {
4341bf215546Sopenharmony_ci                     values[i] = nir_umin(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, 3u)
4342bf215546Sopenharmony_ci                                                                            : nir_imm_int(&b, max_rgb));
4343bf215546Sopenharmony_ci                  } else if (is_16bit) {
4344bf215546Sopenharmony_ci                     values[i] = nir_u2u32(&b, values[i]);
4345bf215546Sopenharmony_ci                  }
4346bf215546Sopenharmony_ci               }
4347bf215546Sopenharmony_ci               break;
4348bf215546Sopenharmony_ci            }
4349bf215546Sopenharmony_ci            case V_028714_SPI_SHADER_SINT16_ABGR: {
4350bf215546Sopenharmony_ci               unsigned max_rgb = is_int8 ? 127 : is_int10 ? 511 : 0;
4351bf215546Sopenharmony_ci               unsigned min_rgb = is_int8 ? -128 : is_int10 ? -512 : 0;
4352bf215546Sopenharmony_ci               u_foreach_bit(i, write_mask) {
4353bf215546Sopenharmony_ci                  if (is_int8 || is_int10) {
4354bf215546Sopenharmony_ci                     values[i] = nir_imin(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, 1u)
4355bf215546Sopenharmony_ci                                                                            : nir_imm_int(&b, max_rgb));
4356bf215546Sopenharmony_ci                     values[i] = nir_imax(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, -2u)
4357bf215546Sopenharmony_ci                                                                            : nir_imm_int(&b, min_rgb));
4358bf215546Sopenharmony_ci                  } else if (is_16bit) {
4359bf215546Sopenharmony_ci                     values[i] = nir_i2i32(&b, values[i]);
4360bf215546Sopenharmony_ci                  }
4361bf215546Sopenharmony_ci               }
4362bf215546Sopenharmony_ci               break;
4363bf215546Sopenharmony_ci            }
4364bf215546Sopenharmony_ci            case V_028714_SPI_SHADER_UNORM16_ABGR:
4365bf215546Sopenharmony_ci            case V_028714_SPI_SHADER_SNORM16_ABGR:
4366bf215546Sopenharmony_ci               u_foreach_bit(i, write_mask) {
4367bf215546Sopenharmony_ci                  if (is_16bit) {
4368bf215546Sopenharmony_ci                     values[i] = nir_f2f32(&b, values[i]);
4369bf215546Sopenharmony_ci                  }
4370bf215546Sopenharmony_ci               }
4371bf215546Sopenharmony_ci               break;
4372bf215546Sopenharmony_ci            default:
4373bf215546Sopenharmony_ci               break;
4374bf215546Sopenharmony_ci            }
4375bf215546Sopenharmony_ci
4376bf215546Sopenharmony_ci            /* Only nir_pack_32_2x16_split needs 16-bit inputs. */
4377bf215546Sopenharmony_ci            bool input_16_bit = col_format == V_028714_SPI_SHADER_FP16_ABGR && is_16bit;
4378bf215546Sopenharmony_ci            unsigned new_write_mask = 0;
4379bf215546Sopenharmony_ci
4380bf215546Sopenharmony_ci            /* Pack the export values. */
4381bf215546Sopenharmony_ci            for (unsigned i = 0; i < 2; i++) {
4382bf215546Sopenharmony_ci               bool enabled = (write_mask >> (i * 2)) & 0x3;
4383bf215546Sopenharmony_ci
4384bf215546Sopenharmony_ci               if (!enabled) {
4385bf215546Sopenharmony_ci                  values[i] = nir_ssa_undef(&b, 1, 32);
4386bf215546Sopenharmony_ci                  continue;
4387bf215546Sopenharmony_ci               }
4388bf215546Sopenharmony_ci
4389bf215546Sopenharmony_ci               nir_ssa_def *src0 = values[i * 2];
4390bf215546Sopenharmony_ci               nir_ssa_def *src1 = values[i * 2 + 1];
4391bf215546Sopenharmony_ci
4392bf215546Sopenharmony_ci               if (!(write_mask & (1 << (i * 2))))
4393bf215546Sopenharmony_ci                  src0 = nir_imm_zero(&b, 1, input_16_bit ? 16 : 32);
4394bf215546Sopenharmony_ci               if (!(write_mask & (1 << (i * 2 + 1))))
4395bf215546Sopenharmony_ci                  src1 = nir_imm_zero(&b, 1, input_16_bit ? 16 : 32);
4396bf215546Sopenharmony_ci
4397bf215546Sopenharmony_ci               if (col_format == V_028714_SPI_SHADER_FP16_ABGR) {
4398bf215546Sopenharmony_ci                  if (is_16bit) {
4399bf215546Sopenharmony_ci                     values[i] = nir_pack_32_2x16_split(&b, src0, src1);
4400bf215546Sopenharmony_ci                  } else {
4401bf215546Sopenharmony_ci                     values[i] = nir_pack_half_2x16_split(&b, src0, src1);
4402bf215546Sopenharmony_ci                  }
4403bf215546Sopenharmony_ci               } else if (col_format == V_028714_SPI_SHADER_UNORM16_ABGR) {
4404bf215546Sopenharmony_ci                  values[i] = nir_pack_unorm_2x16(&b, nir_vec2(&b, src0, src1));
4405bf215546Sopenharmony_ci               } else if (col_format == V_028714_SPI_SHADER_SNORM16_ABGR) {
4406bf215546Sopenharmony_ci                  values[i] = nir_pack_snorm_2x16(&b, nir_vec2(&b, src0, src1));
4407bf215546Sopenharmony_ci               } else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR) {
4408bf215546Sopenharmony_ci                  values[i] = nir_pack_uint_2x16(&b, nir_vec2(&b, src0, src1));
4409bf215546Sopenharmony_ci               } else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR) {
4410bf215546Sopenharmony_ci                  values[i] = nir_pack_sint_2x16(&b, nir_vec2(&b, src0, src1));
4411bf215546Sopenharmony_ci               }
4412bf215546Sopenharmony_ci
4413bf215546Sopenharmony_ci               new_write_mask |= 1 << i;
4414bf215546Sopenharmony_ci            }
4415bf215546Sopenharmony_ci
4416bf215546Sopenharmony_ci            /* Update the write mask for compressed outputs. */
4417bf215546Sopenharmony_ci            nir_intrinsic_set_write_mask(intrin, new_write_mask);
4418bf215546Sopenharmony_ci            intrin->num_components = util_last_bit(new_write_mask);
4419bf215546Sopenharmony_ci         }
4420bf215546Sopenharmony_ci
4421bf215546Sopenharmony_ci         nir_ssa_def *new_src = nir_vec(&b, values, intrin->num_components);
4422bf215546Sopenharmony_ci
4423bf215546Sopenharmony_ci         nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], nir_src_for_ssa(new_src));
4424bf215546Sopenharmony_ci
4425bf215546Sopenharmony_ci         progress = true;
4426bf215546Sopenharmony_ci      }
4427bf215546Sopenharmony_ci   }
4428bf215546Sopenharmony_ci
4429bf215546Sopenharmony_ci   if (progress)
4430bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
4431bf215546Sopenharmony_ci   else
4432bf215546Sopenharmony_ci      nir_metadata_preserve(impl, nir_metadata_all);
4433bf215546Sopenharmony_ci
4434bf215546Sopenharmony_ci   return progress;
4435bf215546Sopenharmony_ci}
4436bf215546Sopenharmony_ci
4437bf215546Sopenharmony_civoid
4438bf215546Sopenharmony_ciradv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo,
4439bf215546Sopenharmony_ci                         struct radv_pipeline_stage *out_stage, gl_shader_stage stage)
4440bf215546Sopenharmony_ci{
4441bf215546Sopenharmony_ci   const VkShaderModuleCreateInfo *minfo =
4442bf215546Sopenharmony_ci      vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
4443bf215546Sopenharmony_ci   const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
4444bf215546Sopenharmony_ci      vk_find_struct_const(sinfo->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
4445bf215546Sopenharmony_ci
4446bf215546Sopenharmony_ci   if (sinfo->module == VK_NULL_HANDLE && !minfo && !iinfo)
4447bf215546Sopenharmony_ci      return;
4448bf215546Sopenharmony_ci
4449bf215546Sopenharmony_ci   memset(out_stage, 0, sizeof(*out_stage));
4450bf215546Sopenharmony_ci
4451bf215546Sopenharmony_ci   out_stage->stage = stage;
4452bf215546Sopenharmony_ci   out_stage->entrypoint = sinfo->pName;
4453bf215546Sopenharmony_ci   out_stage->spec_info = sinfo->pSpecializationInfo;
4454bf215546Sopenharmony_ci   out_stage->feedback.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
4455bf215546Sopenharmony_ci
4456bf215546Sopenharmony_ci   if (sinfo->module != VK_NULL_HANDLE) {
4457bf215546Sopenharmony_ci      struct vk_shader_module *module = vk_shader_module_from_handle(sinfo->module);
4458bf215546Sopenharmony_ci      STATIC_ASSERT(sizeof(out_stage->spirv.sha1) == sizeof(module->sha1));
4459bf215546Sopenharmony_ci
4460bf215546Sopenharmony_ci      out_stage->spirv.data = module->data;
4461bf215546Sopenharmony_ci      out_stage->spirv.size = module->size;
4462bf215546Sopenharmony_ci      out_stage->spirv.object = &module->base;
4463bf215546Sopenharmony_ci
4464bf215546Sopenharmony_ci      if (module->nir)
4465bf215546Sopenharmony_ci         out_stage->internal_nir = module->nir;
4466bf215546Sopenharmony_ci   } else if (minfo) {
4467bf215546Sopenharmony_ci      out_stage->spirv.data = (const char *) minfo->pCode;
4468bf215546Sopenharmony_ci      out_stage->spirv.size = minfo->codeSize;
4469bf215546Sopenharmony_ci   }
4470bf215546Sopenharmony_ci
4471bf215546Sopenharmony_ci   vk_pipeline_hash_shader_stage(sinfo, out_stage->shader_sha1);
4472bf215546Sopenharmony_ci}
4473bf215546Sopenharmony_ci
4474bf215546Sopenharmony_cistatic struct radv_shader *
4475bf215546Sopenharmony_ciradv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
4476bf215546Sopenharmony_ci                                    struct radv_pipeline_stage *stages,
4477bf215546Sopenharmony_ci                                    const struct radv_pipeline_key *pipeline_key,
4478bf215546Sopenharmony_ci                                    const struct radv_pipeline_layout *pipeline_layout,
4479bf215546Sopenharmony_ci                                    bool keep_executable_info, bool keep_statistic_info,
4480bf215546Sopenharmony_ci                                    struct radv_shader_binary **gs_copy_binary)
4481bf215546Sopenharmony_ci{
4482bf215546Sopenharmony_ci   struct radv_device *device = pipeline->device;
4483bf215546Sopenharmony_ci   struct radv_shader_info info = {0};
4484bf215546Sopenharmony_ci
4485bf215546Sopenharmony_ci   if (stages[MESA_SHADER_GEOMETRY].info.vs.outinfo.export_clip_dists)
4486bf215546Sopenharmony_ci      info.vs.outinfo.export_clip_dists = true;
4487bf215546Sopenharmony_ci
4488bf215546Sopenharmony_ci   radv_nir_shader_info_pass(device, stages[MESA_SHADER_GEOMETRY].nir, pipeline_layout, pipeline_key,
4489bf215546Sopenharmony_ci                             &info);
4490bf215546Sopenharmony_ci   info.wave_size = 64; /* Wave32 not supported. */
4491bf215546Sopenharmony_ci   info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
4492bf215546Sopenharmony_ci   info.ballot_bit_size = 64;
4493bf215546Sopenharmony_ci
4494bf215546Sopenharmony_ci   struct radv_shader_args gs_copy_args = {0};
4495bf215546Sopenharmony_ci   gs_copy_args.is_gs_copy_shader = true;
4496bf215546Sopenharmony_ci   gs_copy_args.explicit_scratch_args = !radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX);
4497bf215546Sopenharmony_ci   radv_declare_shader_args(device->physical_device->rad_info.gfx_level, pipeline_key, &info,
4498bf215546Sopenharmony_ci                            MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX, &gs_copy_args);
4499bf215546Sopenharmony_ci   info.user_sgprs_locs = gs_copy_args.user_sgprs_locs;
4500bf215546Sopenharmony_ci   info.inline_push_constant_mask = gs_copy_args.ac.inline_push_const_mask;
4501bf215546Sopenharmony_ci
4502bf215546Sopenharmony_ci   return radv_create_gs_copy_shader(device, stages[MESA_SHADER_GEOMETRY].nir, &info, &gs_copy_args,
4503bf215546Sopenharmony_ci                                     gs_copy_binary, keep_executable_info, keep_statistic_info,
4504bf215546Sopenharmony_ci                                     pipeline_key->optimisations_disabled);
4505bf215546Sopenharmony_ci}
4506bf215546Sopenharmony_ci
4507bf215546Sopenharmony_cistatic void
4508bf215546Sopenharmony_ciradv_pipeline_nir_to_asm(struct radv_pipeline *pipeline, struct radv_pipeline_stage *stages,
4509bf215546Sopenharmony_ci                         const struct radv_pipeline_key *pipeline_key,
4510bf215546Sopenharmony_ci                         const struct radv_pipeline_layout *pipeline_layout,
4511bf215546Sopenharmony_ci                         bool keep_executable_info, bool keep_statistic_info,
4512bf215546Sopenharmony_ci                         gl_shader_stage last_vgt_api_stage,
4513bf215546Sopenharmony_ci                         struct radv_shader_binary **binaries,
4514bf215546Sopenharmony_ci                         struct radv_shader_binary **gs_copy_binary)
4515bf215546Sopenharmony_ci{
4516bf215546Sopenharmony_ci   struct radv_device *device = pipeline->device;
4517bf215546Sopenharmony_ci   unsigned active_stages = 0;
4518bf215546Sopenharmony_ci
4519bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
4520bf215546Sopenharmony_ci      if (stages[i].nir)
4521bf215546Sopenharmony_ci         active_stages |= (1 << i);
4522bf215546Sopenharmony_ci   }
4523bf215546Sopenharmony_ci
4524bf215546Sopenharmony_ci   bool pipeline_has_ngg = last_vgt_api_stage != MESA_SHADER_NONE &&
4525bf215546Sopenharmony_ci                           stages[last_vgt_api_stage].info.is_ngg;
4526bf215546Sopenharmony_ci
4527bf215546Sopenharmony_ci   if (stages[MESA_SHADER_GEOMETRY].nir && !pipeline_has_ngg) {
4528bf215546Sopenharmony_ci      pipeline->gs_copy_shader =
4529bf215546Sopenharmony_ci         radv_pipeline_create_gs_copy_shader(pipeline, stages, pipeline_key, pipeline_layout,
4530bf215546Sopenharmony_ci                                             keep_executable_info, keep_statistic_info,
4531bf215546Sopenharmony_ci                                             gs_copy_binary);
4532bf215546Sopenharmony_ci   }
4533bf215546Sopenharmony_ci
4534bf215546Sopenharmony_ci   for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
4535bf215546Sopenharmony_ci      if (!(active_stages & (1 << s)) || pipeline->shaders[s])
4536bf215546Sopenharmony_ci         continue;
4537bf215546Sopenharmony_ci
4538bf215546Sopenharmony_ci      nir_shader *shaders[2] = { stages[s].nir, NULL };
4539bf215546Sopenharmony_ci      unsigned shader_count = 1;
4540bf215546Sopenharmony_ci
4541bf215546Sopenharmony_ci      /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
4542bf215546Sopenharmony_ci      if (device->physical_device->rad_info.gfx_level >= GFX9 &&
4543bf215546Sopenharmony_ci          (s == MESA_SHADER_TESS_CTRL || s == MESA_SHADER_GEOMETRY)) {
4544bf215546Sopenharmony_ci         gl_shader_stage pre_stage;
4545bf215546Sopenharmony_ci
4546bf215546Sopenharmony_ci         if (s == MESA_SHADER_GEOMETRY && stages[MESA_SHADER_TESS_EVAL].nir) {
4547bf215546Sopenharmony_ci            pre_stage = MESA_SHADER_TESS_EVAL;
4548bf215546Sopenharmony_ci         } else {
4549bf215546Sopenharmony_ci            pre_stage = MESA_SHADER_VERTEX;
4550bf215546Sopenharmony_ci         }
4551bf215546Sopenharmony_ci
4552bf215546Sopenharmony_ci         shaders[0] = stages[pre_stage].nir;
4553bf215546Sopenharmony_ci         shaders[1] = stages[s].nir;
4554bf215546Sopenharmony_ci         shader_count = 2;
4555bf215546Sopenharmony_ci      }
4556bf215546Sopenharmony_ci
4557bf215546Sopenharmony_ci      int64_t stage_start = os_time_get_nano();
4558bf215546Sopenharmony_ci
4559bf215546Sopenharmony_ci      pipeline->shaders[s] = radv_shader_nir_to_asm(device, &stages[s], shaders, shader_count,
4560bf215546Sopenharmony_ci                                                    pipeline_key, keep_executable_info,
4561bf215546Sopenharmony_ci                                                    keep_statistic_info, &binaries[s]);
4562bf215546Sopenharmony_ci
4563bf215546Sopenharmony_ci      stages[s].feedback.duration += os_time_get_nano() - stage_start;
4564bf215546Sopenharmony_ci
4565bf215546Sopenharmony_ci      active_stages &= ~(1 << shaders[0]->info.stage);
4566bf215546Sopenharmony_ci      if (shaders[1])
4567bf215546Sopenharmony_ci         active_stages &= ~(1 << shaders[1]->info.stage);
4568bf215546Sopenharmony_ci   }
4569bf215546Sopenharmony_ci}
4570bf215546Sopenharmony_ci
4571bf215546Sopenharmony_ciVkResult
4572bf215546Sopenharmony_ciradv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
4573bf215546Sopenharmony_ci                    struct radv_device *device, struct radv_pipeline_cache *cache,
4574bf215546Sopenharmony_ci                    const struct radv_pipeline_key *pipeline_key,
4575bf215546Sopenharmony_ci                    const VkPipelineShaderStageCreateInfo *pStages,
4576bf215546Sopenharmony_ci                    uint32_t stageCount,
4577bf215546Sopenharmony_ci                    const VkPipelineCreateFlags flags, const uint8_t *custom_hash,
4578bf215546Sopenharmony_ci                    const VkPipelineCreationFeedbackCreateInfo *creation_feedback,
4579bf215546Sopenharmony_ci                    struct radv_pipeline_shader_stack_size **stack_sizes,
4580bf215546Sopenharmony_ci                    uint32_t *num_stack_sizes,
4581bf215546Sopenharmony_ci                    gl_shader_stage *last_vgt_api_stage)
4582bf215546Sopenharmony_ci{
4583bf215546Sopenharmony_ci   const char *noop_fs_entrypoint = "noop_fs";
4584bf215546Sopenharmony_ci   struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
4585bf215546Sopenharmony_ci   struct radv_shader_binary *gs_copy_binary = NULL;
4586bf215546Sopenharmony_ci   unsigned char hash[20];
4587bf215546Sopenharmony_ci   bool keep_executable_info =
4588bf215546Sopenharmony_ci      (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) ||
4589bf215546Sopenharmony_ci      device->keep_shader_info;
4590bf215546Sopenharmony_ci   bool keep_statistic_info = (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
4591bf215546Sopenharmony_ci                              (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) ||
4592bf215546Sopenharmony_ci                              device->keep_shader_info;
4593bf215546Sopenharmony_ci   struct radv_pipeline_stage stages[MESA_VULKAN_SHADER_STAGES] = {0};
4594bf215546Sopenharmony_ci   VkPipelineCreationFeedback pipeline_feedback = {
4595bf215546Sopenharmony_ci      .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
4596bf215546Sopenharmony_ci   };
4597bf215546Sopenharmony_ci   bool noop_fs = false;
4598bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
4599bf215546Sopenharmony_ci
4600bf215546Sopenharmony_ci   int64_t pipeline_start = os_time_get_nano();
4601bf215546Sopenharmony_ci
4602bf215546Sopenharmony_ci   for (uint32_t i = 0; i < stageCount; i++) {
4603bf215546Sopenharmony_ci      const VkPipelineShaderStageCreateInfo *sinfo = &pStages[i];
4604bf215546Sopenharmony_ci      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
4605bf215546Sopenharmony_ci
4606bf215546Sopenharmony_ci      radv_pipeline_stage_init(sinfo, &stages[stage], stage);
4607bf215546Sopenharmony_ci   }
4608bf215546Sopenharmony_ci
4609bf215546Sopenharmony_ci   for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
4610bf215546Sopenharmony_ci      if (!stages[s].entrypoint)
4611bf215546Sopenharmony_ci         continue;
4612bf215546Sopenharmony_ci
4613bf215546Sopenharmony_ci      if (stages[s].stage < MESA_SHADER_FRAGMENT || stages[s].stage == MESA_SHADER_MESH)
4614bf215546Sopenharmony_ci         *last_vgt_api_stage = stages[s].stage;
4615bf215546Sopenharmony_ci   }
4616bf215546Sopenharmony_ci
4617bf215546Sopenharmony_ci   ASSERTED bool primitive_shading =
4618bf215546Sopenharmony_ci      stages[MESA_SHADER_VERTEX].entrypoint || stages[MESA_SHADER_TESS_CTRL].entrypoint ||
4619bf215546Sopenharmony_ci      stages[MESA_SHADER_TESS_EVAL].entrypoint || stages[MESA_SHADER_GEOMETRY].entrypoint;
4620bf215546Sopenharmony_ci   ASSERTED bool mesh_shading =
4621bf215546Sopenharmony_ci      stages[MESA_SHADER_MESH].entrypoint;
4622bf215546Sopenharmony_ci
4623bf215546Sopenharmony_ci   /* Primitive and mesh shading must not be mixed in the same pipeline. */
4624bf215546Sopenharmony_ci   assert(!primitive_shading || !mesh_shading);
4625bf215546Sopenharmony_ci   /* Mesh shaders are mandatory in mesh shading pipelines. */
4626bf215546Sopenharmony_ci   assert(mesh_shading == !!stages[MESA_SHADER_MESH].entrypoint);
4627bf215546Sopenharmony_ci   /* Mesh shaders always need NGG. */
4628bf215546Sopenharmony_ci   assert(!mesh_shading || pipeline_key->use_ngg);
4629bf215546Sopenharmony_ci
4630bf215546Sopenharmony_ci   if (custom_hash)
4631bf215546Sopenharmony_ci      memcpy(hash, custom_hash, 20);
4632bf215546Sopenharmony_ci   else {
4633bf215546Sopenharmony_ci      radv_hash_shaders(hash, stages, pipeline_layout, pipeline_key,
4634bf215546Sopenharmony_ci                        radv_get_hash_flags(device, keep_statistic_info));
4635bf215546Sopenharmony_ci   }
4636bf215546Sopenharmony_ci
4637bf215546Sopenharmony_ci   pipeline->pipeline_hash = *(uint64_t *)hash;
4638bf215546Sopenharmony_ci
4639bf215546Sopenharmony_ci   bool found_in_application_cache = true;
4640bf215546Sopenharmony_ci   if (!keep_executable_info &&
4641bf215546Sopenharmony_ci       radv_create_shaders_from_pipeline_cache(device, cache, hash, pipeline,
4642bf215546Sopenharmony_ci                                               stack_sizes, num_stack_sizes,
4643bf215546Sopenharmony_ci                                               &found_in_application_cache)) {
4644bf215546Sopenharmony_ci      if (found_in_application_cache)
4645bf215546Sopenharmony_ci         pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
4646bf215546Sopenharmony_ci      result = VK_SUCCESS;
4647bf215546Sopenharmony_ci      goto done;
4648bf215546Sopenharmony_ci   }
4649bf215546Sopenharmony_ci
4650bf215546Sopenharmony_ci   if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
4651bf215546Sopenharmony_ci      if (found_in_application_cache)
4652bf215546Sopenharmony_ci         pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
4653bf215546Sopenharmony_ci      result = VK_PIPELINE_COMPILE_REQUIRED;
4654bf215546Sopenharmony_ci      goto done;
4655bf215546Sopenharmony_ci   }
4656bf215546Sopenharmony_ci
4657bf215546Sopenharmony_ci   if (pipeline->type == RADV_PIPELINE_GRAPHICS && !stages[MESA_SHADER_FRAGMENT].entrypoint) {
4658bf215546Sopenharmony_ci      nir_builder fs_b = radv_meta_init_shader(device, MESA_SHADER_FRAGMENT, "noop_fs");
4659bf215546Sopenharmony_ci
4660bf215546Sopenharmony_ci      stages[MESA_SHADER_FRAGMENT] = (struct radv_pipeline_stage) {
4661bf215546Sopenharmony_ci         .stage = MESA_SHADER_FRAGMENT,
4662bf215546Sopenharmony_ci         .internal_nir = fs_b.shader,
4663bf215546Sopenharmony_ci         .entrypoint = noop_fs_entrypoint,
4664bf215546Sopenharmony_ci         .feedback = {
4665bf215546Sopenharmony_ci            .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
4666bf215546Sopenharmony_ci         },
4667bf215546Sopenharmony_ci      };
4668bf215546Sopenharmony_ci
4669bf215546Sopenharmony_ci      noop_fs = true;
4670bf215546Sopenharmony_ci   }
4671bf215546Sopenharmony_ci
4672bf215546Sopenharmony_ci   for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
4673bf215546Sopenharmony_ci      if (!stages[s].entrypoint)
4674bf215546Sopenharmony_ci         continue;
4675bf215546Sopenharmony_ci
4676bf215546Sopenharmony_ci      int64_t stage_start = os_time_get_nano();
4677bf215546Sopenharmony_ci
4678bf215546Sopenharmony_ci      stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], pipeline_key);
4679bf215546Sopenharmony_ci
4680bf215546Sopenharmony_ci      stages[s].feedback.duration += os_time_get_nano() - stage_start;
4681bf215546Sopenharmony_ci   }
4682bf215546Sopenharmony_ci
4683bf215546Sopenharmony_ci   /* Force per-vertex VRS. */
4684bf215546Sopenharmony_ci   if (radv_consider_force_vrs(pipeline, noop_fs, stages, *last_vgt_api_stage)) {
4685bf215546Sopenharmony_ci      assert(*last_vgt_api_stage == MESA_SHADER_VERTEX ||
4686bf215546Sopenharmony_ci             *last_vgt_api_stage == MESA_SHADER_TESS_EVAL ||
4687bf215546Sopenharmony_ci             *last_vgt_api_stage == MESA_SHADER_GEOMETRY);
4688bf215546Sopenharmony_ci      nir_shader *last_vgt_shader = stages[*last_vgt_api_stage].nir;
4689bf215546Sopenharmony_ci      NIR_PASS(_, last_vgt_shader, radv_force_primitive_shading_rate, device);
4690bf215546Sopenharmony_ci   }
4691bf215546Sopenharmony_ci
4692bf215546Sopenharmony_ci   bool optimize_conservatively = pipeline_key->optimisations_disabled;
4693bf215546Sopenharmony_ci
4694bf215546Sopenharmony_ci   /* Determine if shaders uses NGG before linking because it's needed for some NIR pass. */
4695bf215546Sopenharmony_ci   radv_fill_shader_info_ngg(pipeline, pipeline_key, stages);
4696bf215546Sopenharmony_ci
4697bf215546Sopenharmony_ci   bool pipeline_has_ngg = (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.is_ngg) ||
4698bf215546Sopenharmony_ci                           (stages[MESA_SHADER_TESS_EVAL].nir && stages[MESA_SHADER_TESS_EVAL].info.is_ngg) ||
4699bf215546Sopenharmony_ci                           (stages[MESA_SHADER_MESH].nir && stages[MESA_SHADER_MESH].info.is_ngg);
4700bf215546Sopenharmony_ci
4701bf215546Sopenharmony_ci   if (stages[MESA_SHADER_GEOMETRY].nir) {
4702bf215546Sopenharmony_ci      unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
4703bf215546Sopenharmony_ci
4704bf215546Sopenharmony_ci      if (pipeline_has_ngg) {
4705bf215546Sopenharmony_ci         nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives |
4706bf215546Sopenharmony_ci                         nir_lower_gs_intrinsics_count_vertices_per_primitive |
4707bf215546Sopenharmony_ci                         nir_lower_gs_intrinsics_overwrite_incomplete;
4708bf215546Sopenharmony_ci      }
4709bf215546Sopenharmony_ci
4710bf215546Sopenharmony_ci      NIR_PASS(_, stages[MESA_SHADER_GEOMETRY].nir, nir_lower_gs_intrinsics, nir_gs_flags);
4711bf215546Sopenharmony_ci   }
4712bf215546Sopenharmony_ci
4713bf215546Sopenharmony_ci   radv_link_shaders(pipeline, pipeline_key, stages, optimize_conservatively, *last_vgt_api_stage);
4714bf215546Sopenharmony_ci   radv_set_driver_locations(pipeline, stages, *last_vgt_api_stage);
4715bf215546Sopenharmony_ci
4716bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
4717bf215546Sopenharmony_ci      if (stages[i].nir) {
4718bf215546Sopenharmony_ci         int64_t stage_start = os_time_get_nano();
4719bf215546Sopenharmony_ci
4720bf215546Sopenharmony_ci         radv_optimize_nir(stages[i].nir, optimize_conservatively, false);
4721bf215546Sopenharmony_ci
4722bf215546Sopenharmony_ci         /* Gather info again, information such as outputs_read can be out-of-date. */
4723bf215546Sopenharmony_ci         nir_shader_gather_info(stages[i].nir, nir_shader_get_entrypoint(stages[i].nir));
4724bf215546Sopenharmony_ci         radv_lower_io(device, stages[i].nir, stages[MESA_SHADER_MESH].nir);
4725bf215546Sopenharmony_ci
4726bf215546Sopenharmony_ci         stages[i].feedback.duration += os_time_get_nano() - stage_start;
4727bf215546Sopenharmony_ci      }
4728bf215546Sopenharmony_ci   }
4729bf215546Sopenharmony_ci
4730bf215546Sopenharmony_ci   if (stages[MESA_SHADER_TESS_CTRL].nir) {
4731bf215546Sopenharmony_ci      nir_lower_patch_vertices(stages[MESA_SHADER_TESS_EVAL].nir,
4732bf215546Sopenharmony_ci                               stages[MESA_SHADER_TESS_CTRL].nir->info.tess.tcs_vertices_out, NULL);
4733bf215546Sopenharmony_ci      gather_tess_info(device, stages, pipeline_key);
4734bf215546Sopenharmony_ci   }
4735bf215546Sopenharmony_ci
4736bf215546Sopenharmony_ci   if (stages[MESA_SHADER_VERTEX].nir) {
4737bf215546Sopenharmony_ci      NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, pipeline_key);
4738bf215546Sopenharmony_ci   }
4739bf215546Sopenharmony_ci
4740bf215546Sopenharmony_ci   if (stages[MESA_SHADER_FRAGMENT].nir && !radv_use_llvm_for_stage(device, MESA_SHADER_FRAGMENT)) {
4741bf215546Sopenharmony_ci      /* TODO: Convert the LLVM backend. */
4742bf215546Sopenharmony_ci      NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_lower_fs_output, pipeline_key);
4743bf215546Sopenharmony_ci   }
4744bf215546Sopenharmony_ci
4745bf215546Sopenharmony_ci   radv_fill_shader_info(pipeline, pipeline_layout, pipeline_key, stages, *last_vgt_api_stage);
4746bf215546Sopenharmony_ci
4747bf215546Sopenharmony_ci   if (pipeline_has_ngg) {
4748bf215546Sopenharmony_ci      struct gfx10_ngg_info *ngg_info;
4749bf215546Sopenharmony_ci
4750bf215546Sopenharmony_ci      if (stages[MESA_SHADER_GEOMETRY].nir)
4751bf215546Sopenharmony_ci         ngg_info = &stages[MESA_SHADER_GEOMETRY].info.ngg_info;
4752bf215546Sopenharmony_ci      else if (stages[MESA_SHADER_TESS_CTRL].nir)
4753bf215546Sopenharmony_ci         ngg_info = &stages[MESA_SHADER_TESS_EVAL].info.ngg_info;
4754bf215546Sopenharmony_ci      else if (stages[MESA_SHADER_VERTEX].nir)
4755bf215546Sopenharmony_ci         ngg_info = &stages[MESA_SHADER_VERTEX].info.ngg_info;
4756bf215546Sopenharmony_ci      else if (stages[MESA_SHADER_MESH].nir)
4757bf215546Sopenharmony_ci         ngg_info = &stages[MESA_SHADER_MESH].info.ngg_info;
4758bf215546Sopenharmony_ci      else
4759bf215546Sopenharmony_ci         unreachable("Missing NGG shader stage.");
4760bf215546Sopenharmony_ci
4761bf215546Sopenharmony_ci      if (*last_vgt_api_stage == MESA_SHADER_MESH)
4762bf215546Sopenharmony_ci         gfx10_get_ngg_ms_info(&stages[MESA_SHADER_MESH], ngg_info);
4763bf215546Sopenharmony_ci      else
4764bf215546Sopenharmony_ci         gfx10_get_ngg_info(pipeline_key, pipeline, stages, ngg_info);
4765bf215546Sopenharmony_ci   } else if (stages[MESA_SHADER_GEOMETRY].nir) {
4766bf215546Sopenharmony_ci      struct gfx9_gs_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info.gs_ring_info;
4767bf215546Sopenharmony_ci
4768bf215546Sopenharmony_ci      gfx9_get_gs_info(pipeline_key, pipeline, stages, gs_info);
4769bf215546Sopenharmony_ci   } else {
4770bf215546Sopenharmony_ci      gl_shader_stage hw_vs_api_stage =
4771bf215546Sopenharmony_ci         stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
4772bf215546Sopenharmony_ci      stages[hw_vs_api_stage].info.workgroup_size = stages[hw_vs_api_stage].info.wave_size;
4773bf215546Sopenharmony_ci   }
4774bf215546Sopenharmony_ci
4775bf215546Sopenharmony_ci   radv_determine_ngg_settings(pipeline, pipeline_key, stages, *last_vgt_api_stage);
4776bf215546Sopenharmony_ci
4777bf215546Sopenharmony_ci   radv_declare_pipeline_args(device, stages, pipeline_key);
4778bf215546Sopenharmony_ci
4779bf215546Sopenharmony_ci   if (stages[MESA_SHADER_FRAGMENT].nir) {
4780bf215546Sopenharmony_ci      NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_lower_fs_intrinsics,
4781bf215546Sopenharmony_ci               &stages[MESA_SHADER_FRAGMENT], pipeline_key);
4782bf215546Sopenharmony_ci   }
4783bf215546Sopenharmony_ci
4784bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
4785bf215546Sopenharmony_ci      if (stages[i].nir) {
4786bf215546Sopenharmony_ci         int64_t stage_start = os_time_get_nano();
4787bf215546Sopenharmony_ci
4788bf215546Sopenharmony_ci         /* Wave and workgroup size should already be filled. */
4789bf215546Sopenharmony_ci         assert(stages[i].info.wave_size && stages[i].info.workgroup_size);
4790bf215546Sopenharmony_ci
4791bf215546Sopenharmony_ci         if (!radv_use_llvm_for_stage(device, i)) {
4792bf215546Sopenharmony_ci            nir_lower_non_uniform_access_options options = {
4793bf215546Sopenharmony_ci               .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access |
4794bf215546Sopenharmony_ci                        nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access,
4795bf215546Sopenharmony_ci               .callback = &non_uniform_access_callback,
4796bf215546Sopenharmony_ci               .callback_data = NULL,
4797bf215546Sopenharmony_ci            };
4798bf215546Sopenharmony_ci            NIR_PASS(_, stages[i].nir, nir_lower_non_uniform_access, &options);
4799bf215546Sopenharmony_ci         }
4800bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_lower_memory_model);
4801bf215546Sopenharmony_ci
4802bf215546Sopenharmony_ci         nir_load_store_vectorize_options vectorize_opts = {
4803bf215546Sopenharmony_ci            .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const |
4804bf215546Sopenharmony_ci                     nir_var_mem_shared | nir_var_mem_global,
4805bf215546Sopenharmony_ci            .callback = mem_vectorize_callback,
4806bf215546Sopenharmony_ci            .robust_modes = 0,
4807bf215546Sopenharmony_ci            /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
4808bf215546Sopenharmony_ci             * the final offset is not.
4809bf215546Sopenharmony_ci             */
4810bf215546Sopenharmony_ci            .has_shared2_amd = device->physical_device->rad_info.gfx_level >= GFX7,
4811bf215546Sopenharmony_ci         };
4812bf215546Sopenharmony_ci
4813bf215546Sopenharmony_ci         if (device->robust_buffer_access2) {
4814bf215546Sopenharmony_ci            vectorize_opts.robust_modes =
4815bf215546Sopenharmony_ci               nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_push_const;
4816bf215546Sopenharmony_ci         }
4817bf215546Sopenharmony_ci
4818bf215546Sopenharmony_ci         bool progress = false;
4819bf215546Sopenharmony_ci         NIR_PASS(progress, stages[i].nir, nir_opt_load_store_vectorize, &vectorize_opts);
4820bf215546Sopenharmony_ci         if (progress) {
4821bf215546Sopenharmony_ci            NIR_PASS(_, stages[i].nir, nir_copy_prop);
4822bf215546Sopenharmony_ci            NIR_PASS(_, stages[i].nir, nir_opt_shrink_stores,
4823bf215546Sopenharmony_ci                     !device->instance->disable_shrink_image_store);
4824bf215546Sopenharmony_ci
4825bf215546Sopenharmony_ci            /* Gather info again, to update whether 8/16-bit are used. */
4826bf215546Sopenharmony_ci            nir_shader_gather_info(stages[i].nir, nir_shader_get_entrypoint(stages[i].nir));
4827bf215546Sopenharmony_ci         }
4828bf215546Sopenharmony_ci
4829bf215546Sopenharmony_ci         struct radv_shader_info *info = &stages[i].info;
4830bf215546Sopenharmony_ci         if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9) {
4831bf215546Sopenharmony_ci            if (i == MESA_SHADER_VERTEX && stages[MESA_SHADER_TESS_CTRL].nir)
4832bf215546Sopenharmony_ci               info = &stages[MESA_SHADER_TESS_CTRL].info;
4833bf215546Sopenharmony_ci            else if (i == MESA_SHADER_VERTEX && stages[MESA_SHADER_GEOMETRY].nir)
4834bf215546Sopenharmony_ci               info = &stages[MESA_SHADER_GEOMETRY].info;
4835bf215546Sopenharmony_ci            else if (i == MESA_SHADER_TESS_EVAL && stages[MESA_SHADER_GEOMETRY].nir)
4836bf215546Sopenharmony_ci               info = &stages[MESA_SHADER_GEOMETRY].info;
4837bf215546Sopenharmony_ci         }
4838bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, radv_nir_lower_ycbcr_textures, pipeline_layout);
4839bf215546Sopenharmony_ci         NIR_PASS_V(stages[i].nir, radv_nir_apply_pipeline_layout, device, pipeline_layout, info,
4840bf215546Sopenharmony_ci                    &stages[i].args);
4841bf215546Sopenharmony_ci
4842bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_opt_shrink_vectors);
4843bf215546Sopenharmony_ci
4844bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_lower_alu_width, opt_vectorize_callback, device);
4845bf215546Sopenharmony_ci
4846bf215546Sopenharmony_ci         /* lower ALU operations */
4847bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_lower_int64);
4848bf215546Sopenharmony_ci
4849bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_opt_idiv_const, 8);
4850bf215546Sopenharmony_ci
4851bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_lower_idiv,
4852bf215546Sopenharmony_ci                  &(nir_lower_idiv_options){
4853bf215546Sopenharmony_ci                     .imprecise_32bit_lowering = false,
4854bf215546Sopenharmony_ci                     .allow_fp16 = device->physical_device->rad_info.gfx_level >= GFX9,
4855bf215546Sopenharmony_ci                  });
4856bf215546Sopenharmony_ci
4857bf215546Sopenharmony_ci         nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
4858bf215546Sopenharmony_ci         if (i != MESA_SHADER_FRAGMENT || !pipeline_key->disable_sinking_load_input_fs)
4859bf215546Sopenharmony_ci            sink_opts |= nir_move_load_input;
4860bf215546Sopenharmony_ci
4861bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_opt_sink, sink_opts);
4862bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_opt_move,
4863bf215546Sopenharmony_ci                  nir_move_load_input | nir_move_const_undef | nir_move_copies);
4864bf215546Sopenharmony_ci
4865bf215546Sopenharmony_ci         /* Lower I/O intrinsics to memory instructions. */
4866bf215546Sopenharmony_ci         bool io_to_mem = radv_lower_io_to_mem(device, &stages[i], pipeline_key);
4867bf215546Sopenharmony_ci         bool lowered_ngg = pipeline_has_ngg && i == *last_vgt_api_stage;
4868bf215546Sopenharmony_ci         if (lowered_ngg)
4869bf215546Sopenharmony_ci            radv_lower_ngg(device, &stages[i], pipeline_key);
4870bf215546Sopenharmony_ci
4871bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, ac_nir_lower_global_access);
4872bf215546Sopenharmony_ci         NIR_PASS_V(stages[i].nir, radv_nir_lower_abi, device->physical_device->rad_info.gfx_level,
4873bf215546Sopenharmony_ci                    &stages[i].info, &stages[i].args, pipeline_key,
4874bf215546Sopenharmony_ci                    radv_use_llvm_for_stage(device, i));
4875bf215546Sopenharmony_ci         radv_optimize_nir_algebraic(
4876bf215546Sopenharmony_ci            stages[i].nir, io_to_mem || lowered_ngg || i == MESA_SHADER_COMPUTE || i == MESA_SHADER_TASK);
4877bf215546Sopenharmony_ci
4878bf215546Sopenharmony_ci         if (stages[i].nir->info.bit_sizes_int & (8 | 16)) {
4879bf215546Sopenharmony_ci            if (device->physical_device->rad_info.gfx_level >= GFX8) {
4880bf215546Sopenharmony_ci               NIR_PASS(_, stages[i].nir, nir_convert_to_lcssa, true, true);
4881bf215546Sopenharmony_ci               nir_divergence_analysis(stages[i].nir);
4882bf215546Sopenharmony_ci            }
4883bf215546Sopenharmony_ci
4884bf215546Sopenharmony_ci            if (nir_lower_bit_size(stages[i].nir, lower_bit_size_callback, device)) {
4885bf215546Sopenharmony_ci               NIR_PASS(_, stages[i].nir, nir_opt_constant_folding);
4886bf215546Sopenharmony_ci            }
4887bf215546Sopenharmony_ci
4888bf215546Sopenharmony_ci            if (device->physical_device->rad_info.gfx_level >= GFX8)
4889bf215546Sopenharmony_ci               NIR_PASS(_, stages[i].nir, nir_opt_remove_phis); /* cleanup LCSSA phis */
4890bf215546Sopenharmony_ci         }
4891bf215546Sopenharmony_ci         if (((stages[i].nir->info.bit_sizes_int | stages[i].nir->info.bit_sizes_float) & 16) &&
4892bf215546Sopenharmony_ci             device->physical_device->rad_info.gfx_level >= GFX9) {
4893bf215546Sopenharmony_ci            bool separate_g16 = device->physical_device->rad_info.gfx_level >= GFX10;
4894bf215546Sopenharmony_ci            struct nir_fold_tex_srcs_options fold_srcs_options[] = {
4895bf215546Sopenharmony_ci               {
4896bf215546Sopenharmony_ci                  .sampler_dims =
4897bf215546Sopenharmony_ci                     ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
4898bf215546Sopenharmony_ci                  .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) |
4899bf215546Sopenharmony_ci                               (1 << nir_tex_src_bias) | (1 << nir_tex_src_min_lod) |
4900bf215546Sopenharmony_ci                               (1 << nir_tex_src_ms_index) |
4901bf215546Sopenharmony_ci                               (separate_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy)),
4902bf215546Sopenharmony_ci               },
4903bf215546Sopenharmony_ci               {
4904bf215546Sopenharmony_ci                  .sampler_dims = ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE),
4905bf215546Sopenharmony_ci                  .src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy),
4906bf215546Sopenharmony_ci               },
4907bf215546Sopenharmony_ci            };
4908bf215546Sopenharmony_ci            struct nir_fold_16bit_tex_image_options fold_16bit_options = {
4909bf215546Sopenharmony_ci               .rounding_mode = nir_rounding_mode_rtne,
4910bf215546Sopenharmony_ci               .fold_tex_dest = true,
4911bf215546Sopenharmony_ci               .fold_image_load_store_data = true,
4912bf215546Sopenharmony_ci               .fold_srcs_options_count = separate_g16 ? 2 : 1,
4913bf215546Sopenharmony_ci               .fold_srcs_options = fold_srcs_options,
4914bf215546Sopenharmony_ci            };
4915bf215546Sopenharmony_ci            NIR_PASS(_, stages[i].nir, nir_fold_16bit_tex_image, &fold_16bit_options);
4916bf215546Sopenharmony_ci
4917bf215546Sopenharmony_ci            NIR_PASS(_, stages[i].nir, nir_opt_vectorize, opt_vectorize_callback, device);
4918bf215546Sopenharmony_ci         }
4919bf215546Sopenharmony_ci
4920bf215546Sopenharmony_ci         /* cleanup passes */
4921bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_lower_alu_width, opt_vectorize_callback, device);
4922bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_lower_load_const_to_scalar);
4923bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_copy_prop);
4924bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_opt_dce);
4925bf215546Sopenharmony_ci
4926bf215546Sopenharmony_ci         sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo;
4927bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_opt_sink, sink_opts);
4928bf215546Sopenharmony_ci
4929bf215546Sopenharmony_ci         nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo |
4930bf215546Sopenharmony_ci                                      nir_move_load_input | nir_move_comparisons | nir_move_copies;
4931bf215546Sopenharmony_ci         NIR_PASS(_, stages[i].nir, nir_opt_move, move_opts);
4932bf215546Sopenharmony_ci
4933bf215546Sopenharmony_ci         stages[i].feedback.duration += os_time_get_nano() - stage_start;
4934bf215546Sopenharmony_ci      }
4935bf215546Sopenharmony_ci   }
4936bf215546Sopenharmony_ci
4937bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
4938bf215546Sopenharmony_ci      if (stages[i].nir) {
4939bf215546Sopenharmony_ci         if (radv_can_dump_shader(device, stages[i].nir, false))
4940bf215546Sopenharmony_ci            nir_print_shader(stages[i].nir, stderr);
4941bf215546Sopenharmony_ci      }
4942bf215546Sopenharmony_ci   }
4943bf215546Sopenharmony_ci
4944bf215546Sopenharmony_ci   /* Compile NIR shaders to AMD assembly. */
4945bf215546Sopenharmony_ci   radv_pipeline_nir_to_asm(pipeline, stages, pipeline_key, pipeline_layout, keep_executable_info,
4946bf215546Sopenharmony_ci                            keep_statistic_info, *last_vgt_api_stage, binaries, &gs_copy_binary);
4947bf215546Sopenharmony_ci
4948bf215546Sopenharmony_ci   if (keep_executable_info) {
4949bf215546Sopenharmony_ci      for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
4950bf215546Sopenharmony_ci         struct radv_shader *shader = pipeline->shaders[i];
4951bf215546Sopenharmony_ci         if (!shader)
4952bf215546Sopenharmony_ci            continue;
4953bf215546Sopenharmony_ci
4954bf215546Sopenharmony_ci         if (!stages[i].spirv.size)
4955bf215546Sopenharmony_ci            continue;
4956bf215546Sopenharmony_ci
4957bf215546Sopenharmony_ci         shader->spirv = malloc(stages[i].spirv.size);
4958bf215546Sopenharmony_ci         memcpy(shader->spirv, stages[i].spirv.data, stages[i].spirv.size);
4959bf215546Sopenharmony_ci         shader->spirv_size = stages[i].spirv.size;
4960bf215546Sopenharmony_ci      }
4961bf215546Sopenharmony_ci   }
4962bf215546Sopenharmony_ci
4963bf215546Sopenharmony_ci   /* Upload shader binaries. */
4964bf215546Sopenharmony_ci   radv_upload_shaders(device, pipeline, binaries, gs_copy_binary);
4965bf215546Sopenharmony_ci
4966bf215546Sopenharmony_ci   if (!keep_executable_info) {
4967bf215546Sopenharmony_ci      if (pipeline->gs_copy_shader) {
4968bf215546Sopenharmony_ci         assert(!binaries[MESA_SHADER_COMPUTE] && !pipeline->shaders[MESA_SHADER_COMPUTE]);
4969bf215546Sopenharmony_ci         binaries[MESA_SHADER_COMPUTE] = gs_copy_binary;
4970bf215546Sopenharmony_ci         pipeline->shaders[MESA_SHADER_COMPUTE] = pipeline->gs_copy_shader;
4971bf215546Sopenharmony_ci      }
4972bf215546Sopenharmony_ci
4973bf215546Sopenharmony_ci      radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline, binaries,
4974bf215546Sopenharmony_ci                                         stack_sizes ? *stack_sizes : NULL,
4975bf215546Sopenharmony_ci                                         num_stack_sizes ? *num_stack_sizes : 0);
4976bf215546Sopenharmony_ci
4977bf215546Sopenharmony_ci      if (pipeline->gs_copy_shader) {
4978bf215546Sopenharmony_ci         pipeline->gs_copy_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
4979bf215546Sopenharmony_ci         pipeline->shaders[MESA_SHADER_COMPUTE] = NULL;
4980bf215546Sopenharmony_ci         binaries[MESA_SHADER_COMPUTE] = NULL;
4981bf215546Sopenharmony_ci      }
4982bf215546Sopenharmony_ci   }
4983bf215546Sopenharmony_ci
4984bf215546Sopenharmony_ci   free(gs_copy_binary);
4985bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
4986bf215546Sopenharmony_ci      free(binaries[i]);
4987bf215546Sopenharmony_ci      if (stages[i].nir) {
4988bf215546Sopenharmony_ci         if (radv_can_dump_shader_stats(device, stages[i].nir) && pipeline->shaders[i]) {
4989bf215546Sopenharmony_ci            radv_dump_shader_stats(device, pipeline, i, stderr);
4990bf215546Sopenharmony_ci         }
4991bf215546Sopenharmony_ci
4992bf215546Sopenharmony_ci         ralloc_free(stages[i].nir);
4993bf215546Sopenharmony_ci      }
4994bf215546Sopenharmony_ci   }
4995bf215546Sopenharmony_ci
4996bf215546Sopenharmony_cidone:
4997bf215546Sopenharmony_ci   pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
4998bf215546Sopenharmony_ci
4999bf215546Sopenharmony_ci   if (creation_feedback) {
5000bf215546Sopenharmony_ci      *creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
5001bf215546Sopenharmony_ci
5002bf215546Sopenharmony_ci      uint32_t stage_count = creation_feedback->pipelineStageCreationFeedbackCount;
5003bf215546Sopenharmony_ci      assert(stage_count == 0 || stageCount == stage_count);
5004bf215546Sopenharmony_ci      for (uint32_t i = 0; i < stage_count; i++) {
5005bf215546Sopenharmony_ci         gl_shader_stage s = vk_to_mesa_shader_stage(pStages[i].stage);
5006bf215546Sopenharmony_ci         creation_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
5007bf215546Sopenharmony_ci      }
5008bf215546Sopenharmony_ci   }
5009bf215546Sopenharmony_ci
5010bf215546Sopenharmony_ci   return result;
5011bf215546Sopenharmony_ci}
5012bf215546Sopenharmony_ci
5013bf215546Sopenharmony_cistatic uint32_t
5014bf215546Sopenharmony_ciradv_pipeline_stage_to_user_data_0(struct radv_graphics_pipeline *pipeline, gl_shader_stage stage,
5015bf215546Sopenharmony_ci                                   enum amd_gfx_level gfx_level)
5016bf215546Sopenharmony_ci{
5017bf215546Sopenharmony_ci   bool has_gs = radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY);
5018bf215546Sopenharmony_ci   bool has_tess = radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL);
5019bf215546Sopenharmony_ci   bool has_ngg = radv_pipeline_has_ngg(pipeline);
5020bf215546Sopenharmony_ci
5021bf215546Sopenharmony_ci   switch (stage) {
5022bf215546Sopenharmony_ci   case MESA_SHADER_FRAGMENT:
5023bf215546Sopenharmony_ci      return R_00B030_SPI_SHADER_USER_DATA_PS_0;
5024bf215546Sopenharmony_ci   case MESA_SHADER_VERTEX:
5025bf215546Sopenharmony_ci      if (has_tess) {
5026bf215546Sopenharmony_ci         if (gfx_level >= GFX10) {
5027bf215546Sopenharmony_ci            return R_00B430_SPI_SHADER_USER_DATA_HS_0;
5028bf215546Sopenharmony_ci         } else if (gfx_level == GFX9) {
5029bf215546Sopenharmony_ci            return R_00B430_SPI_SHADER_USER_DATA_LS_0;
5030bf215546Sopenharmony_ci         } else {
5031bf215546Sopenharmony_ci            return R_00B530_SPI_SHADER_USER_DATA_LS_0;
5032bf215546Sopenharmony_ci         }
5033bf215546Sopenharmony_ci      }
5034bf215546Sopenharmony_ci
5035bf215546Sopenharmony_ci      if (has_gs) {
5036bf215546Sopenharmony_ci         if (gfx_level >= GFX10) {
5037bf215546Sopenharmony_ci            return R_00B230_SPI_SHADER_USER_DATA_GS_0;
5038bf215546Sopenharmony_ci         } else {
5039bf215546Sopenharmony_ci            return R_00B330_SPI_SHADER_USER_DATA_ES_0;
5040bf215546Sopenharmony_ci         }
5041bf215546Sopenharmony_ci      }
5042bf215546Sopenharmony_ci
5043bf215546Sopenharmony_ci      if (has_ngg)
5044bf215546Sopenharmony_ci         return R_00B230_SPI_SHADER_USER_DATA_GS_0;
5045bf215546Sopenharmony_ci
5046bf215546Sopenharmony_ci      return R_00B130_SPI_SHADER_USER_DATA_VS_0;
5047bf215546Sopenharmony_ci   case MESA_SHADER_GEOMETRY:
5048bf215546Sopenharmony_ci      return gfx_level == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0
5049bf215546Sopenharmony_ci                               : R_00B230_SPI_SHADER_USER_DATA_GS_0;
5050bf215546Sopenharmony_ci   case MESA_SHADER_COMPUTE:
5051bf215546Sopenharmony_ci   case MESA_SHADER_TASK:
5052bf215546Sopenharmony_ci      return R_00B900_COMPUTE_USER_DATA_0;
5053bf215546Sopenharmony_ci   case MESA_SHADER_TESS_CTRL:
5054bf215546Sopenharmony_ci      return gfx_level == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0
5055bf215546Sopenharmony_ci                               : R_00B430_SPI_SHADER_USER_DATA_HS_0;
5056bf215546Sopenharmony_ci   case MESA_SHADER_TESS_EVAL:
5057bf215546Sopenharmony_ci      if (has_gs) {
5058bf215546Sopenharmony_ci         return gfx_level >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0
5059bf215546Sopenharmony_ci                                   : R_00B330_SPI_SHADER_USER_DATA_ES_0;
5060bf215546Sopenharmony_ci      } else if (has_ngg) {
5061bf215546Sopenharmony_ci         return R_00B230_SPI_SHADER_USER_DATA_GS_0;
5062bf215546Sopenharmony_ci      } else {
5063bf215546Sopenharmony_ci         return R_00B130_SPI_SHADER_USER_DATA_VS_0;
5064bf215546Sopenharmony_ci      }
5065bf215546Sopenharmony_ci   case MESA_SHADER_MESH:
5066bf215546Sopenharmony_ci      assert(has_ngg);
5067bf215546Sopenharmony_ci      return R_00B230_SPI_SHADER_USER_DATA_GS_0;
5068bf215546Sopenharmony_ci   default:
5069bf215546Sopenharmony_ci      unreachable("unknown shader");
5070bf215546Sopenharmony_ci   }
5071bf215546Sopenharmony_ci}
5072bf215546Sopenharmony_ci
5073bf215546Sopenharmony_cistruct radv_bin_size_entry {
5074bf215546Sopenharmony_ci   unsigned bpp;
5075bf215546Sopenharmony_ci   VkExtent2D extent;
5076bf215546Sopenharmony_ci};
5077bf215546Sopenharmony_ci
5078bf215546Sopenharmony_cistatic VkExtent2D
5079bf215546Sopenharmony_ciradv_gfx9_compute_bin_size(const struct radv_graphics_pipeline *pipeline,
5080bf215546Sopenharmony_ci                           const struct radv_graphics_pipeline_info *info)
5081bf215546Sopenharmony_ci{
5082bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5083bf215546Sopenharmony_ci   static const struct radv_bin_size_entry color_size_table[][3][9] = {
5084bf215546Sopenharmony_ci      {
5085bf215546Sopenharmony_ci         /* One RB / SE */
5086bf215546Sopenharmony_ci         {
5087bf215546Sopenharmony_ci            /* One shader engine */
5088bf215546Sopenharmony_ci            {0, {128, 128}},
5089bf215546Sopenharmony_ci            {1, {64, 128}},
5090bf215546Sopenharmony_ci            {2, {32, 128}},
5091bf215546Sopenharmony_ci            {3, {16, 128}},
5092bf215546Sopenharmony_ci            {17, {0, 0}},
5093bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5094bf215546Sopenharmony_ci         },
5095bf215546Sopenharmony_ci         {
5096bf215546Sopenharmony_ci            /* Two shader engines */
5097bf215546Sopenharmony_ci            {0, {128, 128}},
5098bf215546Sopenharmony_ci            {2, {64, 128}},
5099bf215546Sopenharmony_ci            {3, {32, 128}},
5100bf215546Sopenharmony_ci            {5, {16, 128}},
5101bf215546Sopenharmony_ci            {17, {0, 0}},
5102bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5103bf215546Sopenharmony_ci         },
5104bf215546Sopenharmony_ci         {
5105bf215546Sopenharmony_ci            /* Four shader engines */
5106bf215546Sopenharmony_ci            {0, {128, 128}},
5107bf215546Sopenharmony_ci            {3, {64, 128}},
5108bf215546Sopenharmony_ci            {5, {16, 128}},
5109bf215546Sopenharmony_ci            {17, {0, 0}},
5110bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5111bf215546Sopenharmony_ci         },
5112bf215546Sopenharmony_ci      },
5113bf215546Sopenharmony_ci      {
5114bf215546Sopenharmony_ci         /* Two RB / SE */
5115bf215546Sopenharmony_ci         {
5116bf215546Sopenharmony_ci            /* One shader engine */
5117bf215546Sopenharmony_ci            {0, {128, 128}},
5118bf215546Sopenharmony_ci            {2, {64, 128}},
5119bf215546Sopenharmony_ci            {3, {32, 128}},
5120bf215546Sopenharmony_ci            {5, {16, 128}},
5121bf215546Sopenharmony_ci            {33, {0, 0}},
5122bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5123bf215546Sopenharmony_ci         },
5124bf215546Sopenharmony_ci         {
5125bf215546Sopenharmony_ci            /* Two shader engines */
5126bf215546Sopenharmony_ci            {0, {128, 128}},
5127bf215546Sopenharmony_ci            {3, {64, 128}},
5128bf215546Sopenharmony_ci            {5, {32, 128}},
5129bf215546Sopenharmony_ci            {9, {16, 128}},
5130bf215546Sopenharmony_ci            {33, {0, 0}},
5131bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5132bf215546Sopenharmony_ci         },
5133bf215546Sopenharmony_ci         {
5134bf215546Sopenharmony_ci            /* Four shader engines */
5135bf215546Sopenharmony_ci            {0, {256, 256}},
5136bf215546Sopenharmony_ci            {2, {128, 256}},
5137bf215546Sopenharmony_ci            {3, {128, 128}},
5138bf215546Sopenharmony_ci            {5, {64, 128}},
5139bf215546Sopenharmony_ci            {9, {16, 128}},
5140bf215546Sopenharmony_ci            {33, {0, 0}},
5141bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5142bf215546Sopenharmony_ci         },
5143bf215546Sopenharmony_ci      },
5144bf215546Sopenharmony_ci      {
5145bf215546Sopenharmony_ci         /* Four RB / SE */
5146bf215546Sopenharmony_ci         {
5147bf215546Sopenharmony_ci            /* One shader engine */
5148bf215546Sopenharmony_ci            {0, {128, 256}},
5149bf215546Sopenharmony_ci            {2, {128, 128}},
5150bf215546Sopenharmony_ci            {3, {64, 128}},
5151bf215546Sopenharmony_ci            {5, {32, 128}},
5152bf215546Sopenharmony_ci            {9, {16, 128}},
5153bf215546Sopenharmony_ci            {33, {0, 0}},
5154bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5155bf215546Sopenharmony_ci         },
5156bf215546Sopenharmony_ci         {
5157bf215546Sopenharmony_ci            /* Two shader engines */
5158bf215546Sopenharmony_ci            {0, {256, 256}},
5159bf215546Sopenharmony_ci            {2, {128, 256}},
5160bf215546Sopenharmony_ci            {3, {128, 128}},
5161bf215546Sopenharmony_ci            {5, {64, 128}},
5162bf215546Sopenharmony_ci            {9, {32, 128}},
5163bf215546Sopenharmony_ci            {17, {16, 128}},
5164bf215546Sopenharmony_ci            {33, {0, 0}},
5165bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5166bf215546Sopenharmony_ci         },
5167bf215546Sopenharmony_ci         {
5168bf215546Sopenharmony_ci            /* Four shader engines */
5169bf215546Sopenharmony_ci            {0, {256, 512}},
5170bf215546Sopenharmony_ci            {2, {256, 256}},
5171bf215546Sopenharmony_ci            {3, {128, 256}},
5172bf215546Sopenharmony_ci            {5, {128, 128}},
5173bf215546Sopenharmony_ci            {9, {64, 128}},
5174bf215546Sopenharmony_ci            {17, {16, 128}},
5175bf215546Sopenharmony_ci            {33, {0, 0}},
5176bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5177bf215546Sopenharmony_ci         },
5178bf215546Sopenharmony_ci      },
5179bf215546Sopenharmony_ci   };
5180bf215546Sopenharmony_ci   static const struct radv_bin_size_entry ds_size_table[][3][9] = {
5181bf215546Sopenharmony_ci      {
5182bf215546Sopenharmony_ci         // One RB / SE
5183bf215546Sopenharmony_ci         {
5184bf215546Sopenharmony_ci            // One shader engine
5185bf215546Sopenharmony_ci            {0, {128, 256}},
5186bf215546Sopenharmony_ci            {2, {128, 128}},
5187bf215546Sopenharmony_ci            {4, {64, 128}},
5188bf215546Sopenharmony_ci            {7, {32, 128}},
5189bf215546Sopenharmony_ci            {13, {16, 128}},
5190bf215546Sopenharmony_ci            {49, {0, 0}},
5191bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5192bf215546Sopenharmony_ci         },
5193bf215546Sopenharmony_ci         {
5194bf215546Sopenharmony_ci            // Two shader engines
5195bf215546Sopenharmony_ci            {0, {256, 256}},
5196bf215546Sopenharmony_ci            {2, {128, 256}},
5197bf215546Sopenharmony_ci            {4, {128, 128}},
5198bf215546Sopenharmony_ci            {7, {64, 128}},
5199bf215546Sopenharmony_ci            {13, {32, 128}},
5200bf215546Sopenharmony_ci            {25, {16, 128}},
5201bf215546Sopenharmony_ci            {49, {0, 0}},
5202bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5203bf215546Sopenharmony_ci         },
5204bf215546Sopenharmony_ci         {
5205bf215546Sopenharmony_ci            // Four shader engines
5206bf215546Sopenharmony_ci            {0, {256, 512}},
5207bf215546Sopenharmony_ci            {2, {256, 256}},
5208bf215546Sopenharmony_ci            {4, {128, 256}},
5209bf215546Sopenharmony_ci            {7, {128, 128}},
5210bf215546Sopenharmony_ci            {13, {64, 128}},
5211bf215546Sopenharmony_ci            {25, {16, 128}},
5212bf215546Sopenharmony_ci            {49, {0, 0}},
5213bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5214bf215546Sopenharmony_ci         },
5215bf215546Sopenharmony_ci      },
5216bf215546Sopenharmony_ci      {
5217bf215546Sopenharmony_ci         // Two RB / SE
5218bf215546Sopenharmony_ci         {
5219bf215546Sopenharmony_ci            // One shader engine
5220bf215546Sopenharmony_ci            {0, {256, 256}},
5221bf215546Sopenharmony_ci            {2, {128, 256}},
5222bf215546Sopenharmony_ci            {4, {128, 128}},
5223bf215546Sopenharmony_ci            {7, {64, 128}},
5224bf215546Sopenharmony_ci            {13, {32, 128}},
5225bf215546Sopenharmony_ci            {25, {16, 128}},
5226bf215546Sopenharmony_ci            {97, {0, 0}},
5227bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5228bf215546Sopenharmony_ci         },
5229bf215546Sopenharmony_ci         {
5230bf215546Sopenharmony_ci            // Two shader engines
5231bf215546Sopenharmony_ci            {0, {256, 512}},
5232bf215546Sopenharmony_ci            {2, {256, 256}},
5233bf215546Sopenharmony_ci            {4, {128, 256}},
5234bf215546Sopenharmony_ci            {7, {128, 128}},
5235bf215546Sopenharmony_ci            {13, {64, 128}},
5236bf215546Sopenharmony_ci            {25, {32, 128}},
5237bf215546Sopenharmony_ci            {49, {16, 128}},
5238bf215546Sopenharmony_ci            {97, {0, 0}},
5239bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5240bf215546Sopenharmony_ci         },
5241bf215546Sopenharmony_ci         {
5242bf215546Sopenharmony_ci            // Four shader engines
5243bf215546Sopenharmony_ci            {0, {512, 512}},
5244bf215546Sopenharmony_ci            {2, {256, 512}},
5245bf215546Sopenharmony_ci            {4, {256, 256}},
5246bf215546Sopenharmony_ci            {7, {128, 256}},
5247bf215546Sopenharmony_ci            {13, {128, 128}},
5248bf215546Sopenharmony_ci            {25, {64, 128}},
5249bf215546Sopenharmony_ci            {49, {16, 128}},
5250bf215546Sopenharmony_ci            {97, {0, 0}},
5251bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5252bf215546Sopenharmony_ci         },
5253bf215546Sopenharmony_ci      },
5254bf215546Sopenharmony_ci      {
5255bf215546Sopenharmony_ci         // Four RB / SE
5256bf215546Sopenharmony_ci         {
5257bf215546Sopenharmony_ci            // One shader engine
5258bf215546Sopenharmony_ci            {0, {256, 512}},
5259bf215546Sopenharmony_ci            {2, {256, 256}},
5260bf215546Sopenharmony_ci            {4, {128, 256}},
5261bf215546Sopenharmony_ci            {7, {128, 128}},
5262bf215546Sopenharmony_ci            {13, {64, 128}},
5263bf215546Sopenharmony_ci            {25, {32, 128}},
5264bf215546Sopenharmony_ci            {49, {16, 128}},
5265bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5266bf215546Sopenharmony_ci         },
5267bf215546Sopenharmony_ci         {
5268bf215546Sopenharmony_ci            // Two shader engines
5269bf215546Sopenharmony_ci            {0, {512, 512}},
5270bf215546Sopenharmony_ci            {2, {256, 512}},
5271bf215546Sopenharmony_ci            {4, {256, 256}},
5272bf215546Sopenharmony_ci            {7, {128, 256}},
5273bf215546Sopenharmony_ci            {13, {128, 128}},
5274bf215546Sopenharmony_ci            {25, {64, 128}},
5275bf215546Sopenharmony_ci            {49, {32, 128}},
5276bf215546Sopenharmony_ci            {97, {16, 128}},
5277bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5278bf215546Sopenharmony_ci         },
5279bf215546Sopenharmony_ci         {
5280bf215546Sopenharmony_ci            // Four shader engines
5281bf215546Sopenharmony_ci            {0, {512, 512}},
5282bf215546Sopenharmony_ci            {4, {256, 512}},
5283bf215546Sopenharmony_ci            {7, {256, 256}},
5284bf215546Sopenharmony_ci            {13, {128, 256}},
5285bf215546Sopenharmony_ci            {25, {128, 128}},
5286bf215546Sopenharmony_ci            {49, {64, 128}},
5287bf215546Sopenharmony_ci            {97, {16, 128}},
5288bf215546Sopenharmony_ci            {UINT_MAX, {0, 0}},
5289bf215546Sopenharmony_ci         },
5290bf215546Sopenharmony_ci      },
5291bf215546Sopenharmony_ci   };
5292bf215546Sopenharmony_ci
5293bf215546Sopenharmony_ci   VkExtent2D extent = {512, 512};
5294bf215546Sopenharmony_ci
5295bf215546Sopenharmony_ci   unsigned log_num_rb_per_se =
5296bf215546Sopenharmony_ci      util_logbase2_ceil(pdevice->rad_info.max_render_backends / pdevice->rad_info.max_se);
5297bf215546Sopenharmony_ci   unsigned log_num_se = util_logbase2_ceil(pdevice->rad_info.max_se);
5298bf215546Sopenharmony_ci
5299bf215546Sopenharmony_ci   unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->ms.pa_sc_aa_config);
5300bf215546Sopenharmony_ci   unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->ms.db_eqaa);
5301bf215546Sopenharmony_ci   unsigned effective_samples = total_samples;
5302bf215546Sopenharmony_ci   unsigned color_bytes_per_pixel = 0;
5303bf215546Sopenharmony_ci
5304bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->ri.color_att_count; i++) {
5305bf215546Sopenharmony_ci      if (!info->cb.att[i].color_write_mask)
5306bf215546Sopenharmony_ci         continue;
5307bf215546Sopenharmony_ci
5308bf215546Sopenharmony_ci      if (info->ri.color_att_formats[i] == VK_FORMAT_UNDEFINED)
5309bf215546Sopenharmony_ci         continue;
5310bf215546Sopenharmony_ci
5311bf215546Sopenharmony_ci      color_bytes_per_pixel += vk_format_get_blocksize(info->ri.color_att_formats[i]);
5312bf215546Sopenharmony_ci   }
5313bf215546Sopenharmony_ci
5314bf215546Sopenharmony_ci   /* MSAA images typically don't use all samples all the time. */
5315bf215546Sopenharmony_ci   if (effective_samples >= 2 && ps_iter_samples <= 1)
5316bf215546Sopenharmony_ci      effective_samples = 2;
5317bf215546Sopenharmony_ci   color_bytes_per_pixel *= effective_samples;
5318bf215546Sopenharmony_ci
5319bf215546Sopenharmony_ci   const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
5320bf215546Sopenharmony_ci   while (color_entry[1].bpp <= color_bytes_per_pixel)
5321bf215546Sopenharmony_ci      ++color_entry;
5322bf215546Sopenharmony_ci
5323bf215546Sopenharmony_ci   extent = color_entry->extent;
5324bf215546Sopenharmony_ci
5325bf215546Sopenharmony_ci   if (radv_pipeline_has_ds_attachments(&info->ri)) {
5326bf215546Sopenharmony_ci      /* Coefficients taken from AMDVLK */
5327bf215546Sopenharmony_ci      unsigned depth_coeff = info->ri.depth_att_format != VK_FORMAT_UNDEFINED ? 5 : 0;
5328bf215546Sopenharmony_ci      unsigned stencil_coeff = info->ri.stencil_att_format != VK_FORMAT_UNDEFINED ? 1 : 0;
5329bf215546Sopenharmony_ci      unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
5330bf215546Sopenharmony_ci
5331bf215546Sopenharmony_ci      const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
5332bf215546Sopenharmony_ci      while (ds_entry[1].bpp <= ds_bytes_per_pixel)
5333bf215546Sopenharmony_ci         ++ds_entry;
5334bf215546Sopenharmony_ci
5335bf215546Sopenharmony_ci      if (ds_entry->extent.width * ds_entry->extent.height < extent.width * extent.height)
5336bf215546Sopenharmony_ci         extent = ds_entry->extent;
5337bf215546Sopenharmony_ci   }
5338bf215546Sopenharmony_ci
5339bf215546Sopenharmony_ci   return extent;
5340bf215546Sopenharmony_ci}
5341bf215546Sopenharmony_ci
5342bf215546Sopenharmony_cistatic VkExtent2D
5343bf215546Sopenharmony_ciradv_gfx10_compute_bin_size(const struct radv_graphics_pipeline *pipeline,
5344bf215546Sopenharmony_ci                            const struct radv_graphics_pipeline_info *info)
5345bf215546Sopenharmony_ci{
5346bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5347bf215546Sopenharmony_ci   VkExtent2D extent = {512, 512};
5348bf215546Sopenharmony_ci
5349bf215546Sopenharmony_ci   const unsigned db_tag_size = 64;
5350bf215546Sopenharmony_ci   const unsigned db_tag_count = 312;
5351bf215546Sopenharmony_ci   const unsigned color_tag_size = 1024;
5352bf215546Sopenharmony_ci   const unsigned color_tag_count = 31;
5353bf215546Sopenharmony_ci   const unsigned fmask_tag_size = 256;
5354bf215546Sopenharmony_ci   const unsigned fmask_tag_count = 44;
5355bf215546Sopenharmony_ci
5356bf215546Sopenharmony_ci   const unsigned rb_count = pdevice->rad_info.max_render_backends;
5357bf215546Sopenharmony_ci   const unsigned pipe_count = MAX2(rb_count, pdevice->rad_info.num_tcc_blocks);
5358bf215546Sopenharmony_ci
5359bf215546Sopenharmony_ci   const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
5360bf215546Sopenharmony_ci   const unsigned color_tag_part =
5361bf215546Sopenharmony_ci      (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
5362bf215546Sopenharmony_ci   const unsigned fmask_tag_part =
5363bf215546Sopenharmony_ci      (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
5364bf215546Sopenharmony_ci
5365bf215546Sopenharmony_ci   const unsigned total_samples =
5366bf215546Sopenharmony_ci      1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->ms.pa_sc_aa_config);
5367bf215546Sopenharmony_ci   const unsigned samples_log = util_logbase2_ceil(total_samples);
5368bf215546Sopenharmony_ci
5369bf215546Sopenharmony_ci   unsigned color_bytes_per_pixel = 0;
5370bf215546Sopenharmony_ci   unsigned fmask_bytes_per_pixel = 0;
5371bf215546Sopenharmony_ci
5372bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->ri.color_att_count; i++) {
5373bf215546Sopenharmony_ci      if (!info->cb.att[i].color_write_mask)
5374bf215546Sopenharmony_ci         continue;
5375bf215546Sopenharmony_ci
5376bf215546Sopenharmony_ci      if (info->ri.color_att_formats[i] == VK_FORMAT_UNDEFINED)
5377bf215546Sopenharmony_ci         continue;
5378bf215546Sopenharmony_ci
5379bf215546Sopenharmony_ci      color_bytes_per_pixel += vk_format_get_blocksize(info->ri.color_att_formats[i]);
5380bf215546Sopenharmony_ci
5381bf215546Sopenharmony_ci      if (total_samples > 1) {
5382bf215546Sopenharmony_ci         assert(samples_log <= 3);
5383bf215546Sopenharmony_ci         const unsigned fmask_array[] = {0, 1, 1, 4};
5384bf215546Sopenharmony_ci         fmask_bytes_per_pixel += fmask_array[samples_log];
5385bf215546Sopenharmony_ci      }
5386bf215546Sopenharmony_ci   }
5387bf215546Sopenharmony_ci
5388bf215546Sopenharmony_ci   color_bytes_per_pixel *= total_samples;
5389bf215546Sopenharmony_ci   color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1);
5390bf215546Sopenharmony_ci
5391bf215546Sopenharmony_ci   const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel);
5392bf215546Sopenharmony_ci   extent.width = 1ull << ((color_pixel_count_log + 1) / 2);
5393bf215546Sopenharmony_ci   extent.height = 1ull << (color_pixel_count_log / 2);
5394bf215546Sopenharmony_ci
5395bf215546Sopenharmony_ci   if (fmask_bytes_per_pixel) {
5396bf215546Sopenharmony_ci      const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel);
5397bf215546Sopenharmony_ci
5398bf215546Sopenharmony_ci      const VkExtent2D fmask_extent =
5399bf215546Sopenharmony_ci         (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2),
5400bf215546Sopenharmony_ci                      .height = 1ull << (color_pixel_count_log / 2)};
5401bf215546Sopenharmony_ci
5402bf215546Sopenharmony_ci      if (fmask_extent.width * fmask_extent.height < extent.width * extent.height)
5403bf215546Sopenharmony_ci         extent = fmask_extent;
5404bf215546Sopenharmony_ci   }
5405bf215546Sopenharmony_ci
5406bf215546Sopenharmony_ci   if (radv_pipeline_has_ds_attachments(&info->ri)) {
5407bf215546Sopenharmony_ci      /* Coefficients taken from AMDVLK */
5408bf215546Sopenharmony_ci      unsigned depth_coeff = info->ri.depth_att_format != VK_FORMAT_UNDEFINED ? 5 : 0;
5409bf215546Sopenharmony_ci      unsigned stencil_coeff = info->ri.stencil_att_format != VK_FORMAT_UNDEFINED ? 1 : 0;
5410bf215546Sopenharmony_ci      unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples;
5411bf215546Sopenharmony_ci
5412bf215546Sopenharmony_ci      const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel);
5413bf215546Sopenharmony_ci
5414bf215546Sopenharmony_ci      const VkExtent2D db_extent = (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2),
5415bf215546Sopenharmony_ci                                                .height = 1ull << (color_pixel_count_log / 2)};
5416bf215546Sopenharmony_ci
5417bf215546Sopenharmony_ci      if (db_extent.width * db_extent.height < extent.width * extent.height)
5418bf215546Sopenharmony_ci         extent = db_extent;
5419bf215546Sopenharmony_ci   }
5420bf215546Sopenharmony_ci
5421bf215546Sopenharmony_ci   extent.width = MAX2(extent.width, 128);
5422bf215546Sopenharmony_ci   extent.height = MAX2(extent.width, 64);
5423bf215546Sopenharmony_ci
5424bf215546Sopenharmony_ci   return extent;
5425bf215546Sopenharmony_ci}
5426bf215546Sopenharmony_ci
5427bf215546Sopenharmony_cistatic void
5428bf215546Sopenharmony_ciradv_pipeline_init_disabled_binning_state(struct radv_graphics_pipeline *pipeline,
5429bf215546Sopenharmony_ci                                          const struct radv_graphics_pipeline_info *info)
5430bf215546Sopenharmony_ci{
5431bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5432bf215546Sopenharmony_ci   uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
5433bf215546Sopenharmony_ci                                  S_028C44_DISABLE_START_OF_PRIM(1);
5434bf215546Sopenharmony_ci
5435bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10) {
5436bf215546Sopenharmony_ci      unsigned min_bytes_per_pixel = 0;
5437bf215546Sopenharmony_ci
5438bf215546Sopenharmony_ci      for (unsigned i = 0; i < info->ri.color_att_count; i++) {
5439bf215546Sopenharmony_ci         if (!info->cb.att[i].color_write_mask)
5440bf215546Sopenharmony_ci            continue;
5441bf215546Sopenharmony_ci
5442bf215546Sopenharmony_ci         if (info->ri.color_att_formats[i] == VK_FORMAT_UNDEFINED)
5443bf215546Sopenharmony_ci            continue;
5444bf215546Sopenharmony_ci
5445bf215546Sopenharmony_ci         unsigned bytes = vk_format_get_blocksize(info->ri.color_att_formats[i]);
5446bf215546Sopenharmony_ci         if (!min_bytes_per_pixel || bytes < min_bytes_per_pixel)
5447bf215546Sopenharmony_ci            min_bytes_per_pixel = bytes;
5448bf215546Sopenharmony_ci      }
5449bf215546Sopenharmony_ci
5450bf215546Sopenharmony_ci      pa_sc_binner_cntl_0 =
5451bf215546Sopenharmony_ci         S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) |
5452bf215546Sopenharmony_ci         S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) |       /* 128 */
5453bf215546Sopenharmony_ci         S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */
5454bf215546Sopenharmony_ci         S_028C44_DISABLE_START_OF_PRIM(1);
5455bf215546Sopenharmony_ci   }
5456bf215546Sopenharmony_ci
5457bf215546Sopenharmony_ci   pipeline->binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
5458bf215546Sopenharmony_ci}
5459bf215546Sopenharmony_ci
5460bf215546Sopenharmony_cistruct radv_binning_settings
5461bf215546Sopenharmony_ciradv_get_binning_settings(const struct radv_physical_device *pdev)
5462bf215546Sopenharmony_ci{
5463bf215546Sopenharmony_ci   struct radv_binning_settings settings;
5464bf215546Sopenharmony_ci   if (pdev->rad_info.has_dedicated_vram) {
5465bf215546Sopenharmony_ci      if (pdev->rad_info.max_render_backends > 4) {
5466bf215546Sopenharmony_ci         settings.context_states_per_bin = 1;
5467bf215546Sopenharmony_ci         settings.persistent_states_per_bin = 1;
5468bf215546Sopenharmony_ci      } else {
5469bf215546Sopenharmony_ci         settings.context_states_per_bin = 3;
5470bf215546Sopenharmony_ci         settings.persistent_states_per_bin = 8;
5471bf215546Sopenharmony_ci      }
5472bf215546Sopenharmony_ci      settings.fpovs_per_batch = 63;
5473bf215546Sopenharmony_ci   } else {
5474bf215546Sopenharmony_ci      /* The context states are affected by the scissor bug. */
5475bf215546Sopenharmony_ci      settings.context_states_per_bin = 6;
5476bf215546Sopenharmony_ci      /* 32 causes hangs for RAVEN. */
5477bf215546Sopenharmony_ci      settings.persistent_states_per_bin = 16;
5478bf215546Sopenharmony_ci      settings.fpovs_per_batch = 63;
5479bf215546Sopenharmony_ci   }
5480bf215546Sopenharmony_ci
5481bf215546Sopenharmony_ci   if (pdev->rad_info.has_gfx9_scissor_bug)
5482bf215546Sopenharmony_ci      settings.context_states_per_bin = 1;
5483bf215546Sopenharmony_ci
5484bf215546Sopenharmony_ci   return settings;
5485bf215546Sopenharmony_ci}
5486bf215546Sopenharmony_ci
5487bf215546Sopenharmony_cistatic void
5488bf215546Sopenharmony_ciradv_pipeline_init_binning_state(struct radv_graphics_pipeline *pipeline,
5489bf215546Sopenharmony_ci                                 const struct radv_blend_state *blend,
5490bf215546Sopenharmony_ci                                 const struct radv_graphics_pipeline_info *info)
5491bf215546Sopenharmony_ci{
5492bf215546Sopenharmony_ci   const struct radv_device *device = pipeline->base.device;
5493bf215546Sopenharmony_ci
5494bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level < GFX9)
5495bf215546Sopenharmony_ci      return;
5496bf215546Sopenharmony_ci
5497bf215546Sopenharmony_ci   VkExtent2D bin_size;
5498bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level >= GFX10) {
5499bf215546Sopenharmony_ci      bin_size = radv_gfx10_compute_bin_size(pipeline, info);
5500bf215546Sopenharmony_ci   } else if (device->physical_device->rad_info.gfx_level == GFX9) {
5501bf215546Sopenharmony_ci      bin_size = radv_gfx9_compute_bin_size(pipeline, info);
5502bf215546Sopenharmony_ci   } else
5503bf215546Sopenharmony_ci      unreachable("Unhandled generation for binning bin size calculation");
5504bf215546Sopenharmony_ci
5505bf215546Sopenharmony_ci   if (device->pbb_allowed && bin_size.width && bin_size.height) {
5506bf215546Sopenharmony_ci      struct radv_binning_settings settings = radv_get_binning_settings(device->physical_device);
5507bf215546Sopenharmony_ci
5508bf215546Sopenharmony_ci      const uint32_t pa_sc_binner_cntl_0 =
5509bf215546Sopenharmony_ci         S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
5510bf215546Sopenharmony_ci         S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
5511bf215546Sopenharmony_ci         S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
5512bf215546Sopenharmony_ci         S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
5513bf215546Sopenharmony_ci         S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
5514bf215546Sopenharmony_ci         S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
5515bf215546Sopenharmony_ci         S_028C44_DISABLE_START_OF_PRIM(1) |
5516bf215546Sopenharmony_ci         S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1);
5517bf215546Sopenharmony_ci
5518bf215546Sopenharmony_ci      pipeline->binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
5519bf215546Sopenharmony_ci   } else
5520bf215546Sopenharmony_ci      radv_pipeline_init_disabled_binning_state(pipeline, info);
5521bf215546Sopenharmony_ci}
5522bf215546Sopenharmony_ci
5523bf215546Sopenharmony_cistatic void
5524bf215546Sopenharmony_ciradv_pipeline_emit_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
5525bf215546Sopenharmony_ci                                       const struct radv_depth_stencil_state *ds_state)
5526bf215546Sopenharmony_ci{
5527bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, ds_state->db_render_control);
5528bf215546Sopenharmony_ci
5529bf215546Sopenharmony_ci   radeon_set_context_reg_seq(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, 2);
5530bf215546Sopenharmony_ci   radeon_emit(ctx_cs, ds_state->db_render_override);
5531bf215546Sopenharmony_ci   radeon_emit(ctx_cs, ds_state->db_render_override2);
5532bf215546Sopenharmony_ci}
5533bf215546Sopenharmony_ci
5534bf215546Sopenharmony_cistatic void
5535bf215546Sopenharmony_ciradv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs,
5536bf215546Sopenharmony_ci                               const struct radv_graphics_pipeline *pipeline,
5537bf215546Sopenharmony_ci                               const struct radv_blend_state *blend)
5538bf215546Sopenharmony_ci{
5539bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5540bf215546Sopenharmony_ci
5541bf215546Sopenharmony_ci   radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8);
5542bf215546Sopenharmony_ci   radeon_emit_array(ctx_cs, blend->cb_blend_control, 8);
5543bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
5544bf215546Sopenharmony_ci
5545bf215546Sopenharmony_ci   if (pdevice->rad_info.has_rbplus) {
5546bf215546Sopenharmony_ci
5547bf215546Sopenharmony_ci      radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
5548bf215546Sopenharmony_ci      radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
5549bf215546Sopenharmony_ci   }
5550bf215546Sopenharmony_ci
5551bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
5552bf215546Sopenharmony_ci
5553bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
5554bf215546Sopenharmony_ci}
5555bf215546Sopenharmony_ci
5556bf215546Sopenharmony_cistatic void
5557bf215546Sopenharmony_ciradv_pipeline_emit_raster_state(struct radeon_cmdbuf *ctx_cs,
5558bf215546Sopenharmony_ci                                const struct radv_graphics_pipeline *pipeline,
5559bf215546Sopenharmony_ci                                const struct radv_graphics_pipeline_info *info)
5560bf215546Sopenharmony_ci{
5561bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5562bf215546Sopenharmony_ci   const VkConservativeRasterizationModeEXT mode = info->rs.conservative_mode;
5563bf215546Sopenharmony_ci   uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
5564bf215546Sopenharmony_ci
5565bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX9) {
5566bf215546Sopenharmony_ci      /* Conservative rasterization. */
5567bf215546Sopenharmony_ci      if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
5568bf215546Sopenharmony_ci         pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) |
5569bf215546Sopenharmony_ci                                   S_028C4C_CENTROID_SAMPLE_OVERRIDE(1);
5570bf215546Sopenharmony_ci
5571bf215546Sopenharmony_ci         if (mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) {
5572bf215546Sopenharmony_ci            pa_sc_conservative_rast |=
5573bf215546Sopenharmony_ci               S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) |
5574bf215546Sopenharmony_ci               S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) |
5575bf215546Sopenharmony_ci               S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1);
5576bf215546Sopenharmony_ci         } else {
5577bf215546Sopenharmony_ci            assert(mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT);
5578bf215546Sopenharmony_ci            pa_sc_conservative_rast |=
5579bf215546Sopenharmony_ci               S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) |
5580bf215546Sopenharmony_ci               S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) |
5581bf215546Sopenharmony_ci               S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0);
5582bf215546Sopenharmony_ci         }
5583bf215546Sopenharmony_ci      }
5584bf215546Sopenharmony_ci
5585bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
5586bf215546Sopenharmony_ci                             pa_sc_conservative_rast);
5587bf215546Sopenharmony_ci   }
5588bf215546Sopenharmony_ci}
5589bf215546Sopenharmony_ci
5590bf215546Sopenharmony_cistatic void
5591bf215546Sopenharmony_ciradv_pipeline_emit_multisample_state(struct radeon_cmdbuf *ctx_cs,
5592bf215546Sopenharmony_ci                                     const struct radv_graphics_pipeline *pipeline)
5593bf215546Sopenharmony_ci{
5594bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5595bf215546Sopenharmony_ci   const struct radv_multisample_state *ms = &pipeline->ms;
5596bf215546Sopenharmony_ci
5597bf215546Sopenharmony_ci   radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
5598bf215546Sopenharmony_ci   radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]);
5599bf215546Sopenharmony_ci   radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]);
5600bf215546Sopenharmony_ci
5601bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa);
5602bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028BE0_PA_SC_AA_CONFIG, ms->pa_sc_aa_config);
5603bf215546Sopenharmony_ci
5604bf215546Sopenharmony_ci   radeon_set_context_reg_seq(ctx_cs, R_028A48_PA_SC_MODE_CNTL_0, 2);
5605bf215546Sopenharmony_ci   radeon_emit(ctx_cs, ms->pa_sc_mode_cntl_0);
5606bf215546Sopenharmony_ci   radeon_emit(ctx_cs, ms->pa_sc_mode_cntl_1);
5607bf215546Sopenharmony_ci
5608bf215546Sopenharmony_ci   /* The exclusion bits can be set to improve rasterization efficiency
5609bf215546Sopenharmony_ci    * if no sample lies on the pixel boundary (-8 sample offset). It's
5610bf215546Sopenharmony_ci    * currently always TRUE because the driver doesn't support 16 samples.
5611bf215546Sopenharmony_ci    */
5612bf215546Sopenharmony_ci   bool exclusion = pdevice->rad_info.gfx_level >= GFX7;
5613bf215546Sopenharmony_ci   radeon_set_context_reg(
5614bf215546Sopenharmony_ci      ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
5615bf215546Sopenharmony_ci      S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
5616bf215546Sopenharmony_ci}
5617bf215546Sopenharmony_ci
5618bf215546Sopenharmony_cistatic void
5619bf215546Sopenharmony_ciradv_pipeline_emit_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
5620bf215546Sopenharmony_ci                               const struct radv_graphics_pipeline *pipeline)
5621bf215546Sopenharmony_ci{
5622bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5623bf215546Sopenharmony_ci   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
5624bf215546Sopenharmony_ci   const struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]
5625bf215546Sopenharmony_ci                                  ? pipeline->base.shaders[MESA_SHADER_TESS_EVAL]
5626bf215546Sopenharmony_ci                                  : pipeline->base.shaders[MESA_SHADER_VERTEX];
5627bf215546Sopenharmony_ci   unsigned vgt_primitiveid_en = 0;
5628bf215546Sopenharmony_ci   uint32_t vgt_gs_mode = 0;
5629bf215546Sopenharmony_ci
5630bf215546Sopenharmony_ci   if (radv_pipeline_has_ngg(pipeline))
5631bf215546Sopenharmony_ci      return;
5632bf215546Sopenharmony_ci
5633bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
5634bf215546Sopenharmony_ci      const struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
5635bf215546Sopenharmony_ci
5636bf215546Sopenharmony_ci      vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out, pdevice->rad_info.gfx_level);
5637bf215546Sopenharmony_ci   } else if (outinfo->export_prim_id || vs->info.uses_prim_id) {
5638bf215546Sopenharmony_ci      vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
5639bf215546Sopenharmony_ci      vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1);
5640bf215546Sopenharmony_ci   }
5641bf215546Sopenharmony_ci
5642bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
5643bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
5644bf215546Sopenharmony_ci}
5645bf215546Sopenharmony_ci
5646bf215546Sopenharmony_cistatic void
5647bf215546Sopenharmony_ciradv_pipeline_emit_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
5648bf215546Sopenharmony_ci                         const struct radv_graphics_pipeline *pipeline, const struct radv_shader *shader)
5649bf215546Sopenharmony_ci{
5650bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5651bf215546Sopenharmony_ci   uint64_t va = radv_shader_get_va(shader);
5652bf215546Sopenharmony_ci
5653bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
5654bf215546Sopenharmony_ci   radeon_emit(cs, va >> 8);
5655bf215546Sopenharmony_ci   radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
5656bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc1);
5657bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc2);
5658bf215546Sopenharmony_ci
5659bf215546Sopenharmony_ci   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
5660bf215546Sopenharmony_ci   unsigned clip_dist_mask, cull_dist_mask, total_mask;
5661bf215546Sopenharmony_ci   clip_dist_mask = outinfo->clip_dist_mask;
5662bf215546Sopenharmony_ci   cull_dist_mask = outinfo->cull_dist_mask;
5663bf215546Sopenharmony_ci   total_mask = clip_dist_mask | cull_dist_mask;
5664bf215546Sopenharmony_ci
5665bf215546Sopenharmony_ci   bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
5666bf215546Sopenharmony_ci                       outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
5667bf215546Sopenharmony_ci   unsigned spi_vs_out_config, nparams;
5668bf215546Sopenharmony_ci
5669bf215546Sopenharmony_ci   /* VS is required to export at least one param. */
5670bf215546Sopenharmony_ci   nparams = MAX2(outinfo->param_exports, 1);
5671bf215546Sopenharmony_ci   spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1);
5672bf215546Sopenharmony_ci
5673bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10) {
5674bf215546Sopenharmony_ci      spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0);
5675bf215546Sopenharmony_ci   }
5676bf215546Sopenharmony_ci
5677bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, spi_vs_out_config);
5678bf215546Sopenharmony_ci
5679bf215546Sopenharmony_ci   radeon_set_context_reg(
5680bf215546Sopenharmony_ci      ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
5681bf215546Sopenharmony_ci      S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
5682bf215546Sopenharmony_ci         S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP
5683bf215546Sopenharmony_ci                                                              : V_02870C_SPI_SHADER_NONE) |
5684bf215546Sopenharmony_ci         S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP
5685bf215546Sopenharmony_ci                                                              : V_02870C_SPI_SHADER_NONE) |
5686bf215546Sopenharmony_ci         S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
5687bf215546Sopenharmony_ci                                                              : V_02870C_SPI_SHADER_NONE));
5688bf215546Sopenharmony_ci
5689bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
5690bf215546Sopenharmony_ci                          S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
5691bf215546Sopenharmony_ci                             S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
5692bf215546Sopenharmony_ci                             S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
5693bf215546Sopenharmony_ci                             S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
5694bf215546Sopenharmony_ci                             S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
5695bf215546Sopenharmony_ci                             S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
5696bf215546Sopenharmony_ci                             S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
5697bf215546Sopenharmony_ci                             S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
5698bf215546Sopenharmony_ci                             total_mask << 8 | clip_dist_mask);
5699bf215546Sopenharmony_ci
5700bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level <= GFX8)
5701bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index);
5702bf215546Sopenharmony_ci
5703bf215546Sopenharmony_ci   unsigned late_alloc_wave64, cu_mask;
5704bf215546Sopenharmony_ci   ac_compute_late_alloc(&pdevice->rad_info, false, false, shader->config.scratch_bytes_per_wave > 0,
5705bf215546Sopenharmony_ci                         &late_alloc_wave64, &cu_mask);
5706bf215546Sopenharmony_ci
5707bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX7) {
5708bf215546Sopenharmony_ci      if (pdevice->rad_info.gfx_level >= GFX10) {
5709bf215546Sopenharmony_ci         ac_set_reg_cu_en(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
5710bf215546Sopenharmony_ci                          S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F),
5711bf215546Sopenharmony_ci                          C_00B118_CU_EN, 0, &pdevice->rad_info,
5712bf215546Sopenharmony_ci                          (void*)gfx10_set_sh_reg_idx3);
5713bf215546Sopenharmony_ci      } else {
5714bf215546Sopenharmony_ci         radeon_set_sh_reg_idx(pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
5715bf215546Sopenharmony_ci                               S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F));
5716bf215546Sopenharmony_ci      }
5717bf215546Sopenharmony_ci      radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
5718bf215546Sopenharmony_ci   }
5719bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10) {
5720bf215546Sopenharmony_ci      uint32_t oversub_pc_lines = late_alloc_wave64 ? pdevice->rad_info.pc_lines / 4 : 0;
5721bf215546Sopenharmony_ci      gfx10_emit_ge_pc_alloc(cs, pdevice->rad_info.gfx_level, oversub_pc_lines);
5722bf215546Sopenharmony_ci   }
5723bf215546Sopenharmony_ci}
5724bf215546Sopenharmony_ci
5725bf215546Sopenharmony_cistatic void
5726bf215546Sopenharmony_ciradv_pipeline_emit_hw_es(struct radeon_cmdbuf *cs, const struct radv_graphics_pipeline *pipeline,
5727bf215546Sopenharmony_ci                         const struct radv_shader *shader)
5728bf215546Sopenharmony_ci{
5729bf215546Sopenharmony_ci   uint64_t va = radv_shader_get_va(shader);
5730bf215546Sopenharmony_ci
5731bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
5732bf215546Sopenharmony_ci   radeon_emit(cs, va >> 8);
5733bf215546Sopenharmony_ci   radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
5734bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc1);
5735bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc2);
5736bf215546Sopenharmony_ci}
5737bf215546Sopenharmony_ci
5738bf215546Sopenharmony_cistatic void
5739bf215546Sopenharmony_ciradv_pipeline_emit_hw_ls(struct radeon_cmdbuf *cs, const struct radv_graphics_pipeline *pipeline,
5740bf215546Sopenharmony_ci                         const struct radv_shader *shader)
5741bf215546Sopenharmony_ci{
5742bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5743bf215546Sopenharmony_ci   unsigned num_lds_blocks = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks;
5744bf215546Sopenharmony_ci   uint64_t va = radv_shader_get_va(shader);
5745bf215546Sopenharmony_ci   uint32_t rsrc2 = shader->config.rsrc2;
5746bf215546Sopenharmony_ci
5747bf215546Sopenharmony_ci   radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
5748bf215546Sopenharmony_ci
5749bf215546Sopenharmony_ci   rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
5750bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level == GFX7 && pdevice->rad_info.family != CHIP_HAWAII)
5751bf215546Sopenharmony_ci      radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
5752bf215546Sopenharmony_ci
5753bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
5754bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc1);
5755bf215546Sopenharmony_ci   radeon_emit(cs, rsrc2);
5756bf215546Sopenharmony_ci}
5757bf215546Sopenharmony_ci
5758bf215546Sopenharmony_cistatic void
5759bf215546Sopenharmony_ciradv_pipeline_emit_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
5760bf215546Sopenharmony_ci                          const struct radv_graphics_pipeline *pipeline,
5761bf215546Sopenharmony_ci                          const struct radv_shader *shader)
5762bf215546Sopenharmony_ci{
5763bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5764bf215546Sopenharmony_ci   uint64_t va = radv_shader_get_va(shader);
5765bf215546Sopenharmony_ci   gl_shader_stage es_type =
5766bf215546Sopenharmony_ci      radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH :
5767bf215546Sopenharmony_ci      radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
5768bf215546Sopenharmony_ci   struct radv_shader *es = pipeline->base.shaders[es_type];
5769bf215546Sopenharmony_ci   const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
5770bf215546Sopenharmony_ci
5771bf215546Sopenharmony_ci   radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
5772bf215546Sopenharmony_ci
5773bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
5774bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc1);
5775bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc2);
5776bf215546Sopenharmony_ci
5777bf215546Sopenharmony_ci   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
5778bf215546Sopenharmony_ci   unsigned clip_dist_mask, cull_dist_mask, total_mask;
5779bf215546Sopenharmony_ci   clip_dist_mask = outinfo->clip_dist_mask;
5780bf215546Sopenharmony_ci   cull_dist_mask = outinfo->cull_dist_mask;
5781bf215546Sopenharmony_ci   total_mask = clip_dist_mask | cull_dist_mask;
5782bf215546Sopenharmony_ci
5783bf215546Sopenharmony_ci   bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
5784bf215546Sopenharmony_ci                       outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
5785bf215546Sopenharmony_ci   bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id);
5786bf215546Sopenharmony_ci   bool break_wave_at_eoi = false;
5787bf215546Sopenharmony_ci   unsigned ge_cntl;
5788bf215546Sopenharmony_ci
5789bf215546Sopenharmony_ci   if (es_type == MESA_SHADER_TESS_EVAL) {
5790bf215546Sopenharmony_ci      struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
5791bf215546Sopenharmony_ci
5792bf215546Sopenharmony_ci      if (es_enable_prim_id || (gs && gs->info.uses_prim_id))
5793bf215546Sopenharmony_ci         break_wave_at_eoi = true;
5794bf215546Sopenharmony_ci   }
5795bf215546Sopenharmony_ci
5796bf215546Sopenharmony_ci   bool no_pc_export = outinfo->param_exports == 0 && outinfo->prim_param_exports == 0;
5797bf215546Sopenharmony_ci   unsigned num_params = MAX2(outinfo->param_exports, 1);
5798bf215546Sopenharmony_ci   unsigned num_prim_params = outinfo->prim_param_exports;
5799bf215546Sopenharmony_ci   radeon_set_context_reg(
5800bf215546Sopenharmony_ci      ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG,
5801bf215546Sopenharmony_ci      S_0286C4_VS_EXPORT_COUNT(num_params - 1) |
5802bf215546Sopenharmony_ci      S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) |
5803bf215546Sopenharmony_ci      S_0286C4_NO_PC_EXPORT(no_pc_export));
5804bf215546Sopenharmony_ci
5805bf215546Sopenharmony_ci   unsigned idx_format = V_028708_SPI_SHADER_1COMP;
5806bf215546Sopenharmony_ci   if (outinfo->writes_layer_per_primitive ||
5807bf215546Sopenharmony_ci       outinfo->writes_viewport_index_per_primitive ||
5808bf215546Sopenharmony_ci       outinfo->writes_primitive_shading_rate_per_primitive)
5809bf215546Sopenharmony_ci      idx_format = V_028708_SPI_SHADER_2COMP;
5810bf215546Sopenharmony_ci
5811bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT,
5812bf215546Sopenharmony_ci                          S_028708_IDX0_EXPORT_FORMAT(idx_format));
5813bf215546Sopenharmony_ci   radeon_set_context_reg(
5814bf215546Sopenharmony_ci      ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
5815bf215546Sopenharmony_ci      S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
5816bf215546Sopenharmony_ci         S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP
5817bf215546Sopenharmony_ci                                                              : V_02870C_SPI_SHADER_NONE) |
5818bf215546Sopenharmony_ci         S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP
5819bf215546Sopenharmony_ci                                                              : V_02870C_SPI_SHADER_NONE) |
5820bf215546Sopenharmony_ci         S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
5821bf215546Sopenharmony_ci                                                              : V_02870C_SPI_SHADER_NONE));
5822bf215546Sopenharmony_ci
5823bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
5824bf215546Sopenharmony_ci                          S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
5825bf215546Sopenharmony_ci                             S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
5826bf215546Sopenharmony_ci                             S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
5827bf215546Sopenharmony_ci                             S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
5828bf215546Sopenharmony_ci                             S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
5829bf215546Sopenharmony_ci                             S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
5830bf215546Sopenharmony_ci                             S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
5831bf215546Sopenharmony_ci                             S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
5832bf215546Sopenharmony_ci                             total_mask << 8 | clip_dist_mask);
5833bf215546Sopenharmony_ci
5834bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
5835bf215546Sopenharmony_ci                          S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
5836bf215546Sopenharmony_ci                             S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id));
5837bf215546Sopenharmony_ci
5838bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
5839bf215546Sopenharmony_ci                          ngg_state->vgt_esgs_ring_itemsize);
5840bf215546Sopenharmony_ci
5841bf215546Sopenharmony_ci   /* NGG specific registers. */
5842bf215546Sopenharmony_ci   struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
5843bf215546Sopenharmony_ci   uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1;
5844bf215546Sopenharmony_ci
5845bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level < GFX11) {
5846bf215546Sopenharmony_ci      radeon_set_context_reg(
5847bf215546Sopenharmony_ci         ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
5848bf215546Sopenharmony_ci         S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
5849bf215546Sopenharmony_ci            S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
5850bf215546Sopenharmony_ci            S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
5851bf215546Sopenharmony_ci   }
5852bf215546Sopenharmony_ci
5853bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
5854bf215546Sopenharmony_ci                          S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts));
5855bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL,
5856bf215546Sopenharmony_ci                          S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) |
5857bf215546Sopenharmony_ci                             S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */
5858bf215546Sopenharmony_ci   radeon_set_context_reg(
5859bf215546Sopenharmony_ci      ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
5860bf215546Sopenharmony_ci      S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) |
5861bf215546Sopenharmony_ci         S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance));
5862bf215546Sopenharmony_ci
5863bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX11) {
5864bf215546Sopenharmony_ci      ge_cntl = S_03096C_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
5865bf215546Sopenharmony_ci                S_03096C_VERTS_PER_SUBGRP(ngg_state->enable_vertex_grouping
5866bf215546Sopenharmony_ci                                          ? ngg_state->hw_max_esverts
5867bf215546Sopenharmony_ci                                          : 256) | /* 256 = disable vertex grouping */
5868bf215546Sopenharmony_ci                S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) |
5869bf215546Sopenharmony_ci                S_03096C_PRIM_GRP_SIZE_GFX11(256);
5870bf215546Sopenharmony_ci   } else {
5871bf215546Sopenharmony_ci      ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(ngg_state->max_gsprims) |
5872bf215546Sopenharmony_ci                S_03096C_VERT_GRP_SIZE(ngg_state->enable_vertex_grouping
5873bf215546Sopenharmony_ci                                          ? ngg_state->hw_max_esverts
5874bf215546Sopenharmony_ci                                          : 256) | /* 256 = disable vertex grouping */
5875bf215546Sopenharmony_ci                S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
5876bf215546Sopenharmony_ci   }
5877bf215546Sopenharmony_ci
5878bf215546Sopenharmony_ci   /* Bug workaround for a possible hang with non-tessellation cases.
5879bf215546Sopenharmony_ci    * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
5880bf215546Sopenharmony_ci    *
5881bf215546Sopenharmony_ci    * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
5882bf215546Sopenharmony_ci    */
5883bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level == GFX10 &&
5884bf215546Sopenharmony_ci       !radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) && ngg_state->hw_max_esverts != 256) {
5885bf215546Sopenharmony_ci      ge_cntl &= C_03096C_VERT_GRP_SIZE;
5886bf215546Sopenharmony_ci
5887bf215546Sopenharmony_ci      if (ngg_state->hw_max_esverts > 5) {
5888bf215546Sopenharmony_ci         ge_cntl |= S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts - 5);
5889bf215546Sopenharmony_ci      }
5890bf215546Sopenharmony_ci   }
5891bf215546Sopenharmony_ci
5892bf215546Sopenharmony_ci   radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, ge_cntl);
5893bf215546Sopenharmony_ci
5894bf215546Sopenharmony_ci   unsigned late_alloc_wave64, cu_mask;
5895bf215546Sopenharmony_ci   ac_compute_late_alloc(&pdevice->rad_info, true, shader->info.has_ngg_culling,
5896bf215546Sopenharmony_ci                         shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask);
5897bf215546Sopenharmony_ci
5898bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX11) {
5899bf215546Sopenharmony_ci      /* TODO: figure out how S_00B204_CU_EN_GFX11 interacts with ac_set_reg_cu_en */
5900bf215546Sopenharmony_ci      gfx10_set_sh_reg_idx3(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
5901bf215546Sopenharmony_ci                            S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F));
5902bf215546Sopenharmony_ci      gfx10_set_sh_reg_idx3(
5903bf215546Sopenharmony_ci         cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
5904bf215546Sopenharmony_ci         S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));
5905bf215546Sopenharmony_ci   } else if (pdevice->rad_info.gfx_level >= GFX10) {
5906bf215546Sopenharmony_ci      ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
5907bf215546Sopenharmony_ci                       S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F),
5908bf215546Sopenharmony_ci                       C_00B21C_CU_EN, 0, &pdevice->rad_info, (void*)gfx10_set_sh_reg_idx3);
5909bf215546Sopenharmony_ci      ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
5910bf215546Sopenharmony_ci                       S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64),
5911bf215546Sopenharmony_ci                       C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info,
5912bf215546Sopenharmony_ci                       (void*)gfx10_set_sh_reg_idx3);
5913bf215546Sopenharmony_ci   } else {
5914bf215546Sopenharmony_ci      radeon_set_sh_reg_idx(
5915bf215546Sopenharmony_ci         pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
5916bf215546Sopenharmony_ci         S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F));
5917bf215546Sopenharmony_ci      radeon_set_sh_reg_idx(
5918bf215546Sopenharmony_ci         pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
5919bf215546Sopenharmony_ci         S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));
5920bf215546Sopenharmony_ci   }
5921bf215546Sopenharmony_ci
5922bf215546Sopenharmony_ci   uint32_t oversub_pc_lines = late_alloc_wave64 ? pdevice->rad_info.pc_lines / 4 : 0;
5923bf215546Sopenharmony_ci   if (shader->info.has_ngg_culling) {
5924bf215546Sopenharmony_ci      unsigned oversub_factor = 2;
5925bf215546Sopenharmony_ci
5926bf215546Sopenharmony_ci      if (outinfo->param_exports > 4)
5927bf215546Sopenharmony_ci         oversub_factor = 4;
5928bf215546Sopenharmony_ci      else if (outinfo->param_exports > 2)
5929bf215546Sopenharmony_ci         oversub_factor = 3;
5930bf215546Sopenharmony_ci
5931bf215546Sopenharmony_ci      oversub_pc_lines *= oversub_factor;
5932bf215546Sopenharmony_ci   }
5933bf215546Sopenharmony_ci
5934bf215546Sopenharmony_ci   gfx10_emit_ge_pc_alloc(cs, pdevice->rad_info.gfx_level, oversub_pc_lines);
5935bf215546Sopenharmony_ci}
5936bf215546Sopenharmony_ci
5937bf215546Sopenharmony_cistatic void
5938bf215546Sopenharmony_ciradv_pipeline_emit_hw_hs(struct radeon_cmdbuf *cs, const struct radv_graphics_pipeline *pipeline,
5939bf215546Sopenharmony_ci                         const struct radv_shader *shader)
5940bf215546Sopenharmony_ci{
5941bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5942bf215546Sopenharmony_ci   uint64_t va = radv_shader_get_va(shader);
5943bf215546Sopenharmony_ci
5944bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX9) {
5945bf215546Sopenharmony_ci      if (pdevice->rad_info.gfx_level >= GFX10) {
5946bf215546Sopenharmony_ci         radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
5947bf215546Sopenharmony_ci      } else {
5948bf215546Sopenharmony_ci         radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
5949bf215546Sopenharmony_ci      }
5950bf215546Sopenharmony_ci
5951bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
5952bf215546Sopenharmony_ci      radeon_emit(cs, shader->config.rsrc1);
5953bf215546Sopenharmony_ci      radeon_emit(cs, shader->config.rsrc2);
5954bf215546Sopenharmony_ci   } else {
5955bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
5956bf215546Sopenharmony_ci      radeon_emit(cs, va >> 8);
5957bf215546Sopenharmony_ci      radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
5958bf215546Sopenharmony_ci      radeon_emit(cs, shader->config.rsrc1);
5959bf215546Sopenharmony_ci      radeon_emit(cs, shader->config.rsrc2);
5960bf215546Sopenharmony_ci   }
5961bf215546Sopenharmony_ci}
5962bf215546Sopenharmony_ci
5963bf215546Sopenharmony_cistatic void
5964bf215546Sopenharmony_ciradv_pipeline_emit_vertex_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
5965bf215546Sopenharmony_ci                                 const struct radv_graphics_pipeline *pipeline)
5966bf215546Sopenharmony_ci{
5967bf215546Sopenharmony_ci   struct radv_shader *vs;
5968bf215546Sopenharmony_ci
5969bf215546Sopenharmony_ci   /* Skip shaders merged into HS/GS */
5970bf215546Sopenharmony_ci   vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
5971bf215546Sopenharmony_ci   if (!vs)
5972bf215546Sopenharmony_ci      return;
5973bf215546Sopenharmony_ci
5974bf215546Sopenharmony_ci   if (vs->info.vs.as_ls)
5975bf215546Sopenharmony_ci      radv_pipeline_emit_hw_ls(cs, pipeline, vs);
5976bf215546Sopenharmony_ci   else if (vs->info.vs.as_es)
5977bf215546Sopenharmony_ci      radv_pipeline_emit_hw_es(cs, pipeline, vs);
5978bf215546Sopenharmony_ci   else if (vs->info.is_ngg)
5979bf215546Sopenharmony_ci      radv_pipeline_emit_hw_ngg(ctx_cs, cs, pipeline, vs);
5980bf215546Sopenharmony_ci   else
5981bf215546Sopenharmony_ci      radv_pipeline_emit_hw_vs(ctx_cs, cs, pipeline, vs);
5982bf215546Sopenharmony_ci}
5983bf215546Sopenharmony_ci
5984bf215546Sopenharmony_cistatic void
5985bf215546Sopenharmony_ciradv_pipeline_emit_tess_shaders(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
5986bf215546Sopenharmony_ci                                const struct radv_graphics_pipeline *pipeline)
5987bf215546Sopenharmony_ci{
5988bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
5989bf215546Sopenharmony_ci   struct radv_shader *tes, *tcs;
5990bf215546Sopenharmony_ci
5991bf215546Sopenharmony_ci   tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
5992bf215546Sopenharmony_ci   tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
5993bf215546Sopenharmony_ci
5994bf215546Sopenharmony_ci   if (tes) {
5995bf215546Sopenharmony_ci      if (tes->info.is_ngg) {
5996bf215546Sopenharmony_ci         radv_pipeline_emit_hw_ngg(ctx_cs, cs, pipeline, tes);
5997bf215546Sopenharmony_ci      } else if (tes->info.tes.as_es)
5998bf215546Sopenharmony_ci         radv_pipeline_emit_hw_es(cs, pipeline, tes);
5999bf215546Sopenharmony_ci      else
6000bf215546Sopenharmony_ci         radv_pipeline_emit_hw_vs(ctx_cs, cs, pipeline, tes);
6001bf215546Sopenharmony_ci   }
6002bf215546Sopenharmony_ci
6003bf215546Sopenharmony_ci   radv_pipeline_emit_hw_hs(cs, pipeline, tcs);
6004bf215546Sopenharmony_ci
6005bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10 &&
6006bf215546Sopenharmony_ci       !radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && !radv_pipeline_has_ngg(pipeline)) {
6007bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
6008bf215546Sopenharmony_ci                             S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) |
6009bf215546Sopenharmony_ci                                S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
6010bf215546Sopenharmony_ci   }
6011bf215546Sopenharmony_ci}
6012bf215546Sopenharmony_ci
6013bf215546Sopenharmony_cistatic void
6014bf215546Sopenharmony_ciradv_pipeline_emit_tess_state(struct radeon_cmdbuf *ctx_cs,
6015bf215546Sopenharmony_ci                              const struct radv_graphics_pipeline *pipeline,
6016bf215546Sopenharmony_ci                              const struct radv_graphics_pipeline_info *info)
6017bf215546Sopenharmony_ci{
6018bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6019bf215546Sopenharmony_ci   struct radv_shader *tes = radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL);
6020bf215546Sopenharmony_ci   unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
6021bf215546Sopenharmony_ci   unsigned num_tcs_input_cp, num_tcs_output_cp, num_patches;
6022bf215546Sopenharmony_ci   unsigned ls_hs_config;
6023bf215546Sopenharmony_ci
6024bf215546Sopenharmony_ci   num_tcs_input_cp = info->ts.patch_control_points;
6025bf215546Sopenharmony_ci   num_tcs_output_cp =
6026bf215546Sopenharmony_ci      pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; // TCS VERTICES OUT
6027bf215546Sopenharmony_ci   num_patches = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
6028bf215546Sopenharmony_ci
6029bf215546Sopenharmony_ci   ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
6030bf215546Sopenharmony_ci                  S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
6031bf215546Sopenharmony_ci
6032bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX7) {
6033bf215546Sopenharmony_ci      radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
6034bf215546Sopenharmony_ci   } else {
6035bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
6036bf215546Sopenharmony_ci   }
6037bf215546Sopenharmony_ci
6038bf215546Sopenharmony_ci   switch (tes->info.tes._primitive_mode) {
6039bf215546Sopenharmony_ci   case TESS_PRIMITIVE_TRIANGLES:
6040bf215546Sopenharmony_ci      type = V_028B6C_TESS_TRIANGLE;
6041bf215546Sopenharmony_ci      break;
6042bf215546Sopenharmony_ci   case TESS_PRIMITIVE_QUADS:
6043bf215546Sopenharmony_ci      type = V_028B6C_TESS_QUAD;
6044bf215546Sopenharmony_ci      break;
6045bf215546Sopenharmony_ci   case TESS_PRIMITIVE_ISOLINES:
6046bf215546Sopenharmony_ci      type = V_028B6C_TESS_ISOLINE;
6047bf215546Sopenharmony_ci      break;
6048bf215546Sopenharmony_ci   default:
6049bf215546Sopenharmony_ci      break;
6050bf215546Sopenharmony_ci   }
6051bf215546Sopenharmony_ci
6052bf215546Sopenharmony_ci   switch (tes->info.tes.spacing) {
6053bf215546Sopenharmony_ci   case TESS_SPACING_EQUAL:
6054bf215546Sopenharmony_ci      partitioning = V_028B6C_PART_INTEGER;
6055bf215546Sopenharmony_ci      break;
6056bf215546Sopenharmony_ci   case TESS_SPACING_FRACTIONAL_ODD:
6057bf215546Sopenharmony_ci      partitioning = V_028B6C_PART_FRAC_ODD;
6058bf215546Sopenharmony_ci      break;
6059bf215546Sopenharmony_ci   case TESS_SPACING_FRACTIONAL_EVEN:
6060bf215546Sopenharmony_ci      partitioning = V_028B6C_PART_FRAC_EVEN;
6061bf215546Sopenharmony_ci      break;
6062bf215546Sopenharmony_ci   default:
6063bf215546Sopenharmony_ci      break;
6064bf215546Sopenharmony_ci   }
6065bf215546Sopenharmony_ci
6066bf215546Sopenharmony_ci   bool ccw = tes->info.tes.ccw;
6067bf215546Sopenharmony_ci   if (info->ts.domain_origin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
6068bf215546Sopenharmony_ci      ccw = !ccw;
6069bf215546Sopenharmony_ci
6070bf215546Sopenharmony_ci   if (tes->info.tes.point_mode)
6071bf215546Sopenharmony_ci      topology = V_028B6C_OUTPUT_POINT;
6072bf215546Sopenharmony_ci   else if (tes->info.tes._primitive_mode == TESS_PRIMITIVE_ISOLINES)
6073bf215546Sopenharmony_ci      topology = V_028B6C_OUTPUT_LINE;
6074bf215546Sopenharmony_ci   else if (ccw)
6075bf215546Sopenharmony_ci      topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
6076bf215546Sopenharmony_ci   else
6077bf215546Sopenharmony_ci      topology = V_028B6C_OUTPUT_TRIANGLE_CW;
6078bf215546Sopenharmony_ci
6079bf215546Sopenharmony_ci   if (pdevice->rad_info.has_distributed_tess) {
6080bf215546Sopenharmony_ci      if (pdevice->rad_info.family == CHIP_FIJI || pdevice->rad_info.family >= CHIP_POLARIS10)
6081bf215546Sopenharmony_ci         distribution_mode = V_028B6C_TRAPEZOIDS;
6082bf215546Sopenharmony_ci      else
6083bf215546Sopenharmony_ci         distribution_mode = V_028B6C_DONUTS;
6084bf215546Sopenharmony_ci   } else
6085bf215546Sopenharmony_ci      distribution_mode = V_028B6C_NO_DIST;
6086bf215546Sopenharmony_ci
6087bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
6088bf215546Sopenharmony_ci                          S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) |
6089bf215546Sopenharmony_ci                             S_028B6C_TOPOLOGY(topology) |
6090bf215546Sopenharmony_ci                             S_028B6C_DISTRIBUTION_MODE(distribution_mode));
6091bf215546Sopenharmony_ci}
6092bf215546Sopenharmony_ci
6093bf215546Sopenharmony_cistatic void
6094bf215546Sopenharmony_ciradv_pipeline_emit_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
6095bf215546Sopenharmony_ci                         const struct radv_graphics_pipeline *pipeline, const struct radv_shader *gs)
6096bf215546Sopenharmony_ci{
6097bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6098bf215546Sopenharmony_ci   const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info;
6099bf215546Sopenharmony_ci   unsigned gs_max_out_vertices;
6100bf215546Sopenharmony_ci   const uint8_t *num_components;
6101bf215546Sopenharmony_ci   uint8_t max_stream;
6102bf215546Sopenharmony_ci   unsigned offset;
6103bf215546Sopenharmony_ci   uint64_t va;
6104bf215546Sopenharmony_ci
6105bf215546Sopenharmony_ci   gs_max_out_vertices = gs->info.gs.vertices_out;
6106bf215546Sopenharmony_ci   max_stream = gs->info.gs.max_stream;
6107bf215546Sopenharmony_ci   num_components = gs->info.gs.num_stream_output_components;
6108bf215546Sopenharmony_ci
6109bf215546Sopenharmony_ci   offset = num_components[0] * gs_max_out_vertices;
6110bf215546Sopenharmony_ci
6111bf215546Sopenharmony_ci   radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
6112bf215546Sopenharmony_ci   radeon_emit(ctx_cs, offset);
6113bf215546Sopenharmony_ci   if (max_stream >= 1)
6114bf215546Sopenharmony_ci      offset += num_components[1] * gs_max_out_vertices;
6115bf215546Sopenharmony_ci   radeon_emit(ctx_cs, offset);
6116bf215546Sopenharmony_ci   if (max_stream >= 2)
6117bf215546Sopenharmony_ci      offset += num_components[2] * gs_max_out_vertices;
6118bf215546Sopenharmony_ci   radeon_emit(ctx_cs, offset);
6119bf215546Sopenharmony_ci   if (max_stream >= 3)
6120bf215546Sopenharmony_ci      offset += num_components[3] * gs_max_out_vertices;
6121bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
6122bf215546Sopenharmony_ci
6123bf215546Sopenharmony_ci   radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
6124bf215546Sopenharmony_ci   radeon_emit(ctx_cs, num_components[0]);
6125bf215546Sopenharmony_ci   radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0);
6126bf215546Sopenharmony_ci   radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0);
6127bf215546Sopenharmony_ci   radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0);
6128bf215546Sopenharmony_ci
6129bf215546Sopenharmony_ci   uint32_t gs_num_invocations = gs->info.gs.invocations;
6130bf215546Sopenharmony_ci   radeon_set_context_reg(
6131bf215546Sopenharmony_ci      ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
6132bf215546Sopenharmony_ci      S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0));
6133bf215546Sopenharmony_ci
6134bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
6135bf215546Sopenharmony_ci                          gs_state->vgt_esgs_ring_itemsize);
6136bf215546Sopenharmony_ci
6137bf215546Sopenharmony_ci   va = radv_shader_get_va(gs);
6138bf215546Sopenharmony_ci
6139bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX9) {
6140bf215546Sopenharmony_ci      if (pdevice->rad_info.gfx_level >= GFX10) {
6141bf215546Sopenharmony_ci         radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
6142bf215546Sopenharmony_ci      } else {
6143bf215546Sopenharmony_ci         radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
6144bf215546Sopenharmony_ci      }
6145bf215546Sopenharmony_ci
6146bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
6147bf215546Sopenharmony_ci      radeon_emit(cs, gs->config.rsrc1);
6148bf215546Sopenharmony_ci      radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
6149bf215546Sopenharmony_ci
6150bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
6151bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
6152bf215546Sopenharmony_ci                             gs_state->vgt_gs_max_prims_per_subgroup);
6153bf215546Sopenharmony_ci   } else {
6154bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
6155bf215546Sopenharmony_ci      radeon_emit(cs, va >> 8);
6156bf215546Sopenharmony_ci      radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
6157bf215546Sopenharmony_ci      radeon_emit(cs, gs->config.rsrc1);
6158bf215546Sopenharmony_ci      radeon_emit(cs, gs->config.rsrc2);
6159bf215546Sopenharmony_ci   }
6160bf215546Sopenharmony_ci
6161bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10) {
6162bf215546Sopenharmony_ci      ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
6163bf215546Sopenharmony_ci                       S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F),
6164bf215546Sopenharmony_ci                       C_00B21C_CU_EN, 0, &pdevice->rad_info,
6165bf215546Sopenharmony_ci                       (void*)gfx10_set_sh_reg_idx3);
6166bf215546Sopenharmony_ci      ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
6167bf215546Sopenharmony_ci                       S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0),
6168bf215546Sopenharmony_ci                       C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info,
6169bf215546Sopenharmony_ci                       (void*)gfx10_set_sh_reg_idx3);
6170bf215546Sopenharmony_ci   } else if (pdevice->rad_info.gfx_level >= GFX7) {
6171bf215546Sopenharmony_ci      radeon_set_sh_reg_idx(
6172bf215546Sopenharmony_ci         pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
6173bf215546Sopenharmony_ci         S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
6174bf215546Sopenharmony_ci
6175bf215546Sopenharmony_ci      if (pdevice->rad_info.gfx_level >= GFX10) {
6176bf215546Sopenharmony_ci         radeon_set_sh_reg_idx(
6177bf215546Sopenharmony_ci            pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
6178bf215546Sopenharmony_ci            S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0));
6179bf215546Sopenharmony_ci      }
6180bf215546Sopenharmony_ci   }
6181bf215546Sopenharmony_ci
6182bf215546Sopenharmony_ci   radv_pipeline_emit_hw_vs(ctx_cs, cs, pipeline, pipeline->base.gs_copy_shader);
6183bf215546Sopenharmony_ci}
6184bf215546Sopenharmony_ci
6185bf215546Sopenharmony_cistatic void
6186bf215546Sopenharmony_ciradv_pipeline_emit_geometry_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
6187bf215546Sopenharmony_ci                                   const struct radv_graphics_pipeline *pipeline)
6188bf215546Sopenharmony_ci{
6189bf215546Sopenharmony_ci   struct radv_shader *gs;
6190bf215546Sopenharmony_ci
6191bf215546Sopenharmony_ci   gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
6192bf215546Sopenharmony_ci   if (!gs)
6193bf215546Sopenharmony_ci      return;
6194bf215546Sopenharmony_ci
6195bf215546Sopenharmony_ci   if (gs->info.is_ngg)
6196bf215546Sopenharmony_ci      radv_pipeline_emit_hw_ngg(ctx_cs, cs, pipeline, gs);
6197bf215546Sopenharmony_ci   else
6198bf215546Sopenharmony_ci      radv_pipeline_emit_hw_gs(ctx_cs, cs, pipeline, gs);
6199bf215546Sopenharmony_ci
6200bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
6201bf215546Sopenharmony_ci}
6202bf215546Sopenharmony_ci
6203bf215546Sopenharmony_cistatic void
6204bf215546Sopenharmony_ciradv_pipeline_emit_mesh_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
6205bf215546Sopenharmony_ci                               const struct radv_graphics_pipeline *pipeline)
6206bf215546Sopenharmony_ci{
6207bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6208bf215546Sopenharmony_ci   struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH];
6209bf215546Sopenharmony_ci   if (!ms)
6210bf215546Sopenharmony_ci      return;
6211bf215546Sopenharmony_ci
6212bf215546Sopenharmony_ci   radv_pipeline_emit_hw_ngg(ctx_cs, cs, pipeline, ms);
6213bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, ms->info.workgroup_size);
6214bf215546Sopenharmony_ci   radeon_set_uconfig_reg_idx(pdevice, ctx_cs,
6215bf215546Sopenharmony_ci                              R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST);
6216bf215546Sopenharmony_ci}
6217bf215546Sopenharmony_ci
6218bf215546Sopenharmony_cistatic uint32_t
6219bf215546Sopenharmony_cioffset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool float16)
6220bf215546Sopenharmony_ci{
6221bf215546Sopenharmony_ci   uint32_t ps_input_cntl;
6222bf215546Sopenharmony_ci   if (offset <= AC_EXP_PARAM_OFFSET_31) {
6223bf215546Sopenharmony_ci      ps_input_cntl = S_028644_OFFSET(offset);
6224bf215546Sopenharmony_ci      if (flat_shade || explicit)
6225bf215546Sopenharmony_ci         ps_input_cntl |= S_028644_FLAT_SHADE(1);
6226bf215546Sopenharmony_ci      if (explicit) {
6227bf215546Sopenharmony_ci         /* Force parameter cache to be read in passthrough
6228bf215546Sopenharmony_ci          * mode.
6229bf215546Sopenharmony_ci          */
6230bf215546Sopenharmony_ci         ps_input_cntl |= S_028644_OFFSET(1 << 5);
6231bf215546Sopenharmony_ci      }
6232bf215546Sopenharmony_ci      if (float16) {
6233bf215546Sopenharmony_ci         ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | S_028644_ATTR0_VALID(1);
6234bf215546Sopenharmony_ci      }
6235bf215546Sopenharmony_ci   } else {
6236bf215546Sopenharmony_ci      /* The input is a DEFAULT_VAL constant. */
6237bf215546Sopenharmony_ci      assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
6238bf215546Sopenharmony_ci      offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
6239bf215546Sopenharmony_ci      ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
6240bf215546Sopenharmony_ci   }
6241bf215546Sopenharmony_ci   return ps_input_cntl;
6242bf215546Sopenharmony_ci}
6243bf215546Sopenharmony_ci
6244bf215546Sopenharmony_cistatic void
6245bf215546Sopenharmony_cisingle_slot_to_ps_input(const struct radv_vs_output_info *outinfo,
6246bf215546Sopenharmony_ci                        unsigned slot, uint32_t *ps_input_cntl, unsigned *ps_offset,
6247bf215546Sopenharmony_ci                        bool skip_undef, bool use_default_0, bool flat_shade)
6248bf215546Sopenharmony_ci{
6249bf215546Sopenharmony_ci   unsigned vs_offset = outinfo->vs_output_param_offset[slot];
6250bf215546Sopenharmony_ci
6251bf215546Sopenharmony_ci   if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
6252bf215546Sopenharmony_ci      if (skip_undef)
6253bf215546Sopenharmony_ci         return;
6254bf215546Sopenharmony_ci      else if (use_default_0)
6255bf215546Sopenharmony_ci         vs_offset = AC_EXP_PARAM_DEFAULT_VAL_0000;
6256bf215546Sopenharmony_ci      else
6257bf215546Sopenharmony_ci         unreachable("vs_offset should not be AC_EXP_PARAM_UNDEFINED.");
6258bf215546Sopenharmony_ci   }
6259bf215546Sopenharmony_ci
6260bf215546Sopenharmony_ci   ps_input_cntl[*ps_offset] = offset_to_ps_input(vs_offset, flat_shade, false, false);
6261bf215546Sopenharmony_ci   ++(*ps_offset);
6262bf215546Sopenharmony_ci}
6263bf215546Sopenharmony_ci
6264bf215546Sopenharmony_cistatic void
6265bf215546Sopenharmony_ciinput_mask_to_ps_inputs(const struct radv_vs_output_info *outinfo, const struct radv_shader *ps,
6266bf215546Sopenharmony_ci                        uint32_t input_mask, uint32_t *ps_input_cntl, unsigned *ps_offset)
6267bf215546Sopenharmony_ci{
6268bf215546Sopenharmony_ci   u_foreach_bit(i, input_mask) {
6269bf215546Sopenharmony_ci      unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
6270bf215546Sopenharmony_ci      if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
6271bf215546Sopenharmony_ci         ps_input_cntl[*ps_offset] = S_028644_OFFSET(0x20);
6272bf215546Sopenharmony_ci         ++(*ps_offset);
6273bf215546Sopenharmony_ci         continue;
6274bf215546Sopenharmony_ci      }
6275bf215546Sopenharmony_ci
6276bf215546Sopenharmony_ci      bool flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << *ps_offset));
6277bf215546Sopenharmony_ci      bool explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << *ps_offset));
6278bf215546Sopenharmony_ci      bool float16 = !!(ps->info.ps.float16_shaded_mask & (1u << *ps_offset));
6279bf215546Sopenharmony_ci
6280bf215546Sopenharmony_ci      ps_input_cntl[*ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16);
6281bf215546Sopenharmony_ci      ++(*ps_offset);
6282bf215546Sopenharmony_ci   }
6283bf215546Sopenharmony_ci}
6284bf215546Sopenharmony_ci
6285bf215546Sopenharmony_cistatic void
6286bf215546Sopenharmony_ciradv_pipeline_emit_ps_inputs(struct radeon_cmdbuf *ctx_cs,
6287bf215546Sopenharmony_ci                             const struct radv_graphics_pipeline *pipeline)
6288bf215546Sopenharmony_ci{
6289bf215546Sopenharmony_ci   struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
6290bf215546Sopenharmony_ci   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
6291bf215546Sopenharmony_ci   bool mesh = radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
6292bf215546Sopenharmony_ci   uint32_t ps_input_cntl[32];
6293bf215546Sopenharmony_ci
6294bf215546Sopenharmony_ci   unsigned ps_offset = 0;
6295bf215546Sopenharmony_ci
6296bf215546Sopenharmony_ci   if (ps->info.ps.prim_id_input && !mesh)
6297bf215546Sopenharmony_ci      single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset,
6298bf215546Sopenharmony_ci                              true, false, true);
6299bf215546Sopenharmony_ci
6300bf215546Sopenharmony_ci   if (ps->info.ps.layer_input && !mesh)
6301bf215546Sopenharmony_ci      single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset,
6302bf215546Sopenharmony_ci                              false, true, true);
6303bf215546Sopenharmony_ci
6304bf215546Sopenharmony_ci   if (ps->info.ps.viewport_index_input && !mesh)
6305bf215546Sopenharmony_ci      single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset,
6306bf215546Sopenharmony_ci                              false, false, true);
6307bf215546Sopenharmony_ci
6308bf215546Sopenharmony_ci   if (ps->info.ps.has_pcoord)
6309bf215546Sopenharmony_ci      ps_input_cntl[ps_offset++] = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
6310bf215546Sopenharmony_ci
6311bf215546Sopenharmony_ci   if (ps->info.ps.num_input_clips_culls) {
6312bf215546Sopenharmony_ci      single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST0, ps_input_cntl, &ps_offset,
6313bf215546Sopenharmony_ci                              true, false, false);
6314bf215546Sopenharmony_ci
6315bf215546Sopenharmony_ci      if (ps->info.ps.num_input_clips_culls > 4)
6316bf215546Sopenharmony_ci         single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST1, ps_input_cntl, &ps_offset,
6317bf215546Sopenharmony_ci                                 true, false, false);
6318bf215546Sopenharmony_ci   }
6319bf215546Sopenharmony_ci
6320bf215546Sopenharmony_ci   input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask,
6321bf215546Sopenharmony_ci                           ps_input_cntl, &ps_offset);
6322bf215546Sopenharmony_ci
6323bf215546Sopenharmony_ci   /* Per-primitive PS inputs: the HW needs these to be last. */
6324bf215546Sopenharmony_ci
6325bf215546Sopenharmony_ci   if (ps->info.ps.prim_id_input && mesh)
6326bf215546Sopenharmony_ci      single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset,
6327bf215546Sopenharmony_ci                              true, false, false);
6328bf215546Sopenharmony_ci
6329bf215546Sopenharmony_ci   if (ps->info.ps.layer_input && mesh)
6330bf215546Sopenharmony_ci      single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset,
6331bf215546Sopenharmony_ci                              false, true, false);
6332bf215546Sopenharmony_ci
6333bf215546Sopenharmony_ci   if (ps->info.ps.viewport_index_input && mesh)
6334bf215546Sopenharmony_ci      single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset,
6335bf215546Sopenharmony_ci                              false, false, false);
6336bf215546Sopenharmony_ci
6337bf215546Sopenharmony_ci   input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask,
6338bf215546Sopenharmony_ci                           ps_input_cntl, &ps_offset);
6339bf215546Sopenharmony_ci
6340bf215546Sopenharmony_ci   if (ps_offset) {
6341bf215546Sopenharmony_ci      radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset);
6342bf215546Sopenharmony_ci      for (unsigned i = 0; i < ps_offset; i++) {
6343bf215546Sopenharmony_ci         radeon_emit(ctx_cs, ps_input_cntl[i]);
6344bf215546Sopenharmony_ci      }
6345bf215546Sopenharmony_ci   }
6346bf215546Sopenharmony_ci}
6347bf215546Sopenharmony_ci
6348bf215546Sopenharmony_cistatic uint32_t
6349bf215546Sopenharmony_ciradv_compute_db_shader_control(const struct radv_physical_device *pdevice,
6350bf215546Sopenharmony_ci                               const struct radv_graphics_pipeline *pipeline,
6351bf215546Sopenharmony_ci                               const struct radv_shader *ps)
6352bf215546Sopenharmony_ci{
6353bf215546Sopenharmony_ci   unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z;
6354bf215546Sopenharmony_ci   unsigned z_order;
6355bf215546Sopenharmony_ci   if (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory)
6356bf215546Sopenharmony_ci      z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
6357bf215546Sopenharmony_ci   else
6358bf215546Sopenharmony_ci      z_order = V_02880C_LATE_Z;
6359bf215546Sopenharmony_ci
6360bf215546Sopenharmony_ci   if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
6361bf215546Sopenharmony_ci      conservative_z_export = V_02880C_EXPORT_GREATER_THAN_Z;
6362bf215546Sopenharmony_ci   else if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_LESS)
6363bf215546Sopenharmony_ci      conservative_z_export = V_02880C_EXPORT_LESS_THAN_Z;
6364bf215546Sopenharmony_ci
6365bf215546Sopenharmony_ci   bool disable_rbplus = pdevice->rad_info.has_rbplus && !pdevice->rad_info.rbplus_allowed;
6366bf215546Sopenharmony_ci
6367bf215546Sopenharmony_ci   /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
6368bf215546Sopenharmony_ci    * but this appears to break Project Cars (DXVK). See
6369bf215546Sopenharmony_ci    * https://bugs.freedesktop.org/show_bug.cgi?id=109401
6370bf215546Sopenharmony_ci    */
6371bf215546Sopenharmony_ci   bool mask_export_enable = ps->info.ps.writes_sample_mask;
6372bf215546Sopenharmony_ci
6373bf215546Sopenharmony_ci   return S_02880C_Z_EXPORT_ENABLE(ps->info.ps.writes_z) |
6374bf215546Sopenharmony_ci          S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) |
6375bf215546Sopenharmony_ci          S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) |
6376bf215546Sopenharmony_ci          S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
6377bf215546Sopenharmony_ci          S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_Z_ORDER(z_order) |
6378bf215546Sopenharmony_ci          S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) |
6379bf215546Sopenharmony_ci          S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) |
6380bf215546Sopenharmony_ci          S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) |
6381bf215546Sopenharmony_ci          S_02880C_EXEC_ON_NOOP(ps->info.ps.writes_memory) |
6382bf215546Sopenharmony_ci          S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
6383bf215546Sopenharmony_ci}
6384bf215546Sopenharmony_ci
6385bf215546Sopenharmony_cistatic void
6386bf215546Sopenharmony_ciradv_pipeline_emit_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
6387bf215546Sopenharmony_ci                                   const struct radv_graphics_pipeline *pipeline)
6388bf215546Sopenharmony_ci{
6389bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6390bf215546Sopenharmony_ci   struct radv_shader *ps;
6391bf215546Sopenharmony_ci   bool param_gen;
6392bf215546Sopenharmony_ci   uint64_t va;
6393bf215546Sopenharmony_ci   assert(pipeline->base.shaders[MESA_SHADER_FRAGMENT]);
6394bf215546Sopenharmony_ci
6395bf215546Sopenharmony_ci   ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
6396bf215546Sopenharmony_ci   va = radv_shader_get_va(ps);
6397bf215546Sopenharmony_ci
6398bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
6399bf215546Sopenharmony_ci   radeon_emit(cs, va >> 8);
6400bf215546Sopenharmony_ci   radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
6401bf215546Sopenharmony_ci   radeon_emit(cs, ps->config.rsrc1);
6402bf215546Sopenharmony_ci   radeon_emit(cs, ps->config.rsrc2);
6403bf215546Sopenharmony_ci
6404bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL,
6405bf215546Sopenharmony_ci                          radv_compute_db_shader_control(pdevice, pipeline, ps));
6406bf215546Sopenharmony_ci
6407bf215546Sopenharmony_ci   radeon_set_context_reg_seq(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA, 2);
6408bf215546Sopenharmony_ci   radeon_emit(ctx_cs, ps->config.spi_ps_input_ena);
6409bf215546Sopenharmony_ci   radeon_emit(ctx_cs, ps->config.spi_ps_input_addr);
6410bf215546Sopenharmony_ci
6411bf215546Sopenharmony_ci   /* Workaround when there are no PS inputs but LDS is used. */
6412bf215546Sopenharmony_ci   param_gen = pdevice->rad_info.gfx_level >= GFX11 &&
6413bf215546Sopenharmony_ci               !ps->info.ps.num_interp && ps->config.lds_size;
6414bf215546Sopenharmony_ci
6415bf215546Sopenharmony_ci   radeon_set_context_reg(
6416bf215546Sopenharmony_ci      ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
6417bf215546Sopenharmony_ci      S_0286D8_NUM_INTERP(ps->info.ps.num_interp) |
6418bf215546Sopenharmony_ci      S_0286D8_NUM_PRIM_INTERP(ps->info.ps.num_prim_interp) |
6419bf215546Sopenharmony_ci      S_0286D8_PS_W32_EN(ps->info.wave_size == 32) |
6420bf215546Sopenharmony_ci      S_0286D8_PARAM_GEN(param_gen));
6421bf215546Sopenharmony_ci
6422bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->spi_baryc_cntl);
6423bf215546Sopenharmony_ci
6424bf215546Sopenharmony_ci   radeon_set_context_reg(
6425bf215546Sopenharmony_ci      ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
6426bf215546Sopenharmony_ci      ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
6427bf215546Sopenharmony_ci                                 ps->info.ps.writes_sample_mask, false));
6428bf215546Sopenharmony_ci}
6429bf215546Sopenharmony_ci
6430bf215546Sopenharmony_cistatic void
6431bf215546Sopenharmony_ciradv_pipeline_emit_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs,
6432bf215546Sopenharmony_ci                                    const struct radv_graphics_pipeline *pipeline)
6433bf215546Sopenharmony_ci{
6434bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6435bf215546Sopenharmony_ci
6436bf215546Sopenharmony_ci   if (pdevice->rad_info.family < CHIP_POLARIS10 || pdevice->rad_info.gfx_level >= GFX10)
6437bf215546Sopenharmony_ci      return;
6438bf215546Sopenharmony_ci
6439bf215546Sopenharmony_ci   unsigned vtx_reuse_depth = 30;
6440bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) &&
6441bf215546Sopenharmony_ci       radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.tes.spacing ==
6442bf215546Sopenharmony_ci          TESS_SPACING_FRACTIONAL_ODD) {
6443bf215546Sopenharmony_ci      vtx_reuse_depth = 14;
6444bf215546Sopenharmony_ci   }
6445bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
6446bf215546Sopenharmony_ci                          S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
6447bf215546Sopenharmony_ci}
6448bf215546Sopenharmony_ci
6449bf215546Sopenharmony_cistatic void
6450bf215546Sopenharmony_ciradv_pipeline_emit_vgt_shader_config(struct radeon_cmdbuf *ctx_cs,
6451bf215546Sopenharmony_ci                                     const struct radv_graphics_pipeline *pipeline)
6452bf215546Sopenharmony_ci{
6453bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6454bf215546Sopenharmony_ci   uint32_t stages = 0;
6455bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
6456bf215546Sopenharmony_ci      stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
6457bf215546Sopenharmony_ci
6458bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
6459bf215546Sopenharmony_ci         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1);
6460bf215546Sopenharmony_ci      else if (radv_pipeline_has_ngg(pipeline))
6461bf215546Sopenharmony_ci         stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
6462bf215546Sopenharmony_ci      else
6463bf215546Sopenharmony_ci         stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
6464bf215546Sopenharmony_ci   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
6465bf215546Sopenharmony_ci      stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
6466bf215546Sopenharmony_ci   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
6467bf215546Sopenharmony_ci      assert(!radv_pipeline_has_ngg_passthrough(pipeline));
6468bf215546Sopenharmony_ci      stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1);
6469bf215546Sopenharmony_ci
6470bf215546Sopenharmony_ci      if (pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring)
6471bf215546Sopenharmony_ci         stages |= S_028B54_NGG_WAVE_ID_EN(1);
6472bf215546Sopenharmony_ci   } else if (radv_pipeline_has_ngg(pipeline)) {
6473bf215546Sopenharmony_ci      stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
6474bf215546Sopenharmony_ci   }
6475bf215546Sopenharmony_ci
6476bf215546Sopenharmony_ci   if (radv_pipeline_has_ngg(pipeline)) {
6477bf215546Sopenharmony_ci      stages |= S_028B54_PRIMGEN_EN(1);
6478bf215546Sopenharmony_ci      if (pipeline->streamout_shader)
6479bf215546Sopenharmony_ci         stages |= S_028B54_NGG_WAVE_ID_EN(1);
6480bf215546Sopenharmony_ci      if (radv_pipeline_has_ngg_passthrough(pipeline)) {
6481bf215546Sopenharmony_ci         stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
6482bf215546Sopenharmony_ci         if (pdevice->rad_info.family >= CHIP_NAVI23)
6483bf215546Sopenharmony_ci            stages |= S_028B54_PRIMGEN_PASSTHRU_NO_MSG(1);
6484bf215546Sopenharmony_ci      }
6485bf215546Sopenharmony_ci   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
6486bf215546Sopenharmony_ci      stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
6487bf215546Sopenharmony_ci   }
6488bf215546Sopenharmony_ci
6489bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX9)
6490bf215546Sopenharmony_ci      stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
6491bf215546Sopenharmony_ci
6492bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10) {
6493bf215546Sopenharmony_ci      uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
6494bf215546Sopenharmony_ci
6495bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
6496bf215546Sopenharmony_ci         hs_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
6497bf215546Sopenharmony_ci
6498bf215546Sopenharmony_ci      if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
6499bf215546Sopenharmony_ci         vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.wave_size;
6500bf215546Sopenharmony_ci         if (radv_pipeline_has_gs_copy_shader(&pipeline->base))
6501bf215546Sopenharmony_ci            vs_size = pipeline->base.gs_copy_shader->info.wave_size;
6502bf215546Sopenharmony_ci      } else if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL])
6503bf215546Sopenharmony_ci         vs_size = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
6504bf215546Sopenharmony_ci      else if (pipeline->base.shaders[MESA_SHADER_VERTEX])
6505bf215546Sopenharmony_ci         vs_size = pipeline->base.shaders[MESA_SHADER_VERTEX]->info.wave_size;
6506bf215546Sopenharmony_ci      else if (pipeline->base.shaders[MESA_SHADER_MESH])
6507bf215546Sopenharmony_ci         vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_MESH]->info.wave_size;
6508bf215546Sopenharmony_ci
6509bf215546Sopenharmony_ci      if (radv_pipeline_has_ngg(pipeline)) {
6510bf215546Sopenharmony_ci         assert(!radv_pipeline_has_gs_copy_shader(&pipeline->base));
6511bf215546Sopenharmony_ci         gs_size = vs_size;
6512bf215546Sopenharmony_ci      }
6513bf215546Sopenharmony_ci
6514bf215546Sopenharmony_ci      /* legacy GS only supports Wave64 */
6515bf215546Sopenharmony_ci      stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) |
6516bf215546Sopenharmony_ci                S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) |
6517bf215546Sopenharmony_ci                S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0);
6518bf215546Sopenharmony_ci   }
6519bf215546Sopenharmony_ci
6520bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
6521bf215546Sopenharmony_ci}
6522bf215546Sopenharmony_ci
6523bf215546Sopenharmony_cistatic void
6524bf215546Sopenharmony_ciradv_pipeline_emit_cliprect_rule(struct radeon_cmdbuf *ctx_cs,
6525bf215546Sopenharmony_ci                                 const struct radv_graphics_pipeline_info *info)
6526bf215546Sopenharmony_ci{
6527bf215546Sopenharmony_ci   uint32_t cliprect_rule = 0;
6528bf215546Sopenharmony_ci
6529bf215546Sopenharmony_ci   if (!info->dr.count) {
6530bf215546Sopenharmony_ci      cliprect_rule = 0xffff;
6531bf215546Sopenharmony_ci   } else {
6532bf215546Sopenharmony_ci      for (unsigned i = 0; i < (1u << MAX_DISCARD_RECTANGLES); ++i) {
6533bf215546Sopenharmony_ci         /* Interpret i as a bitmask, and then set the bit in
6534bf215546Sopenharmony_ci          * the mask if that combination of rectangles in which
6535bf215546Sopenharmony_ci          * the pixel is contained should pass the cliprect
6536bf215546Sopenharmony_ci          * test.
6537bf215546Sopenharmony_ci          */
6538bf215546Sopenharmony_ci         unsigned relevant_subset = i & ((1u << info->dr.count) - 1);
6539bf215546Sopenharmony_ci
6540bf215546Sopenharmony_ci         if (info->dr.mode == VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT && !relevant_subset)
6541bf215546Sopenharmony_ci            continue;
6542bf215546Sopenharmony_ci
6543bf215546Sopenharmony_ci         if (info->dr.mode == VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT && relevant_subset)
6544bf215546Sopenharmony_ci            continue;
6545bf215546Sopenharmony_ci
6546bf215546Sopenharmony_ci         cliprect_rule |= 1u << i;
6547bf215546Sopenharmony_ci      }
6548bf215546Sopenharmony_ci   }
6549bf215546Sopenharmony_ci
6550bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule);
6551bf215546Sopenharmony_ci}
6552bf215546Sopenharmony_ci
6553bf215546Sopenharmony_cistatic void
6554bf215546Sopenharmony_cigfx10_pipeline_emit_ge_cntl(struct radeon_cmdbuf *ctx_cs,
6555bf215546Sopenharmony_ci                            const struct radv_graphics_pipeline *pipeline)
6556bf215546Sopenharmony_ci{
6557bf215546Sopenharmony_ci   bool break_wave_at_eoi = false;
6558bf215546Sopenharmony_ci   unsigned primgroup_size;
6559bf215546Sopenharmony_ci   unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */
6560bf215546Sopenharmony_ci
6561bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
6562bf215546Sopenharmony_ci      primgroup_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
6563bf215546Sopenharmony_ci   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
6564bf215546Sopenharmony_ci      const struct gfx9_gs_info *gs_state =
6565bf215546Sopenharmony_ci         &pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
6566bf215546Sopenharmony_ci      unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl;
6567bf215546Sopenharmony_ci      primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl);
6568bf215546Sopenharmony_ci   } else {
6569bf215546Sopenharmony_ci      primgroup_size = 128; /* recommended without a GS and tess */
6570bf215546Sopenharmony_ci   }
6571bf215546Sopenharmony_ci
6572bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
6573bf215546Sopenharmony_ci      if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
6574bf215546Sopenharmony_ci          radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
6575bf215546Sopenharmony_ci         break_wave_at_eoi = true;
6576bf215546Sopenharmony_ci   }
6577bf215546Sopenharmony_ci
6578bf215546Sopenharmony_ci   radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL,
6579bf215546Sopenharmony_ci                          S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) |
6580bf215546Sopenharmony_ci                             S_03096C_VERT_GRP_SIZE(vertgroup_size) |
6581bf215546Sopenharmony_ci                             S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ |
6582bf215546Sopenharmony_ci                             S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi));
6583bf215546Sopenharmony_ci}
6584bf215546Sopenharmony_ci
6585bf215546Sopenharmony_cistatic void
6586bf215546Sopenharmony_ciradv_pipeline_emit_vgt_gs_out(struct radeon_cmdbuf *ctx_cs,
6587bf215546Sopenharmony_ci                              const struct radv_graphics_pipeline *pipeline,
6588bf215546Sopenharmony_ci                              uint32_t vgt_gs_out_prim_type)
6589bf215546Sopenharmony_ci{
6590bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6591bf215546Sopenharmony_ci
6592bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX11) {
6593bf215546Sopenharmony_ci      radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type);
6594bf215546Sopenharmony_ci   } else {
6595bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type);
6596bf215546Sopenharmony_ci   }
6597bf215546Sopenharmony_ci}
6598bf215546Sopenharmony_ci
6599bf215546Sopenharmony_cistatic void
6600bf215546Sopenharmony_cigfx103_pipeline_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs,
6601bf215546Sopenharmony_ci                                           const struct radv_graphics_pipeline *pipeline,
6602bf215546Sopenharmony_ci                                           const struct radv_graphics_pipeline_info *info)
6603bf215546Sopenharmony_ci{
6604bf215546Sopenharmony_ci   const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
6605bf215546Sopenharmony_ci
6606bf215546Sopenharmony_ci   bool enable_vrs = radv_is_vrs_enabled(pipeline, info);
6607bf215546Sopenharmony_ci
6608bf215546Sopenharmony_ci   /* Enables the second channel of the primitive export instruction.
6609bf215546Sopenharmony_ci    * This channel contains: VRS rate x, y, viewport and layer.
6610bf215546Sopenharmony_ci    */
6611bf215546Sopenharmony_ci   bool enable_prim_payload =
6612bf215546Sopenharmony_ci      outinfo &&
6613bf215546Sopenharmony_ci      (outinfo->writes_viewport_index_per_primitive ||
6614bf215546Sopenharmony_ci       outinfo->writes_layer_per_primitive ||
6615bf215546Sopenharmony_ci       outinfo->writes_primitive_shading_rate_per_primitive);
6616bf215546Sopenharmony_ci
6617bf215546Sopenharmony_ci   radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL,
6618bf215546Sopenharmony_ci                          S_028A98_EN_VRS_RATE(enable_vrs) |
6619bf215546Sopenharmony_ci                          S_028A98_EN_PRIM_PAYLOAD(enable_prim_payload));
6620bf215546Sopenharmony_ci}
6621bf215546Sopenharmony_ci
6622bf215546Sopenharmony_cistatic bool
6623bf215546Sopenharmony_cigfx103_pipeline_vrs_coarse_shading(const struct radv_graphics_pipeline *pipeline)
6624bf215546Sopenharmony_ci{
6625bf215546Sopenharmony_ci   struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
6626bf215546Sopenharmony_ci   struct radv_device *device = pipeline->base.device;
6627bf215546Sopenharmony_ci
6628bf215546Sopenharmony_ci   if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
6629bf215546Sopenharmony_ci      return false;
6630bf215546Sopenharmony_ci
6631bf215546Sopenharmony_ci   if (!ps->info.ps.allow_flat_shading)
6632bf215546Sopenharmony_ci      return false;
6633bf215546Sopenharmony_ci
6634bf215546Sopenharmony_ci   return true;
6635bf215546Sopenharmony_ci}
6636bf215546Sopenharmony_ci
6637bf215546Sopenharmony_cistatic void
6638bf215546Sopenharmony_cigfx103_pipeline_emit_vrs_state(struct radeon_cmdbuf *ctx_cs,
6639bf215546Sopenharmony_ci                               const struct radv_graphics_pipeline *pipeline,
6640bf215546Sopenharmony_ci                               const struct radv_graphics_pipeline_info *info)
6641bf215546Sopenharmony_ci{
6642bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6643bf215546Sopenharmony_ci   uint32_t mode = V_028064_VRS_COMB_MODE_PASSTHRU;
6644bf215546Sopenharmony_ci   uint8_t rate_x = 0, rate_y = 0;
6645bf215546Sopenharmony_ci   bool enable_vrs = radv_is_vrs_enabled(pipeline, info);
6646bf215546Sopenharmony_ci
6647bf215546Sopenharmony_ci   if (!enable_vrs && gfx103_pipeline_vrs_coarse_shading(pipeline)) {
6648bf215546Sopenharmony_ci      /* When per-draw VRS is not enabled at all, try enabling VRS coarse shading 2x2 if the driver
6649bf215546Sopenharmony_ci       * determined that it's safe to enable.
6650bf215546Sopenharmony_ci       */
6651bf215546Sopenharmony_ci      mode = V_028064_VRS_COMB_MODE_OVERRIDE;
6652bf215546Sopenharmony_ci      rate_x = rate_y = 1;
6653bf215546Sopenharmony_ci   } else if (!radv_is_static_vrs_enabled(pipeline, info) && pipeline->force_vrs_per_vertex &&
6654bf215546Sopenharmony_ci              get_vs_output_info(pipeline)->writes_primitive_shading_rate) {
6655bf215546Sopenharmony_ci      /* Otherwise, if per-draw VRS is not enabled statically, try forcing per-vertex VRS if
6656bf215546Sopenharmony_ci       * requested by the user. Note that vkd3d-proton always has to declare VRS as dynamic because
6657bf215546Sopenharmony_ci       * in DX12 it's fully dynamic.
6658bf215546Sopenharmony_ci       */
6659bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL,
6660bf215546Sopenharmony_ci         S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
6661bf215546Sopenharmony_ci         S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
6662bf215546Sopenharmony_ci
6663bf215546Sopenharmony_ci      /* If the shader is using discard, turn off coarse shading because discard at 2x2 pixel
6664bf215546Sopenharmony_ci       * granularity degrades quality too much. MIN allows sample shading but not coarse shading.
6665bf215546Sopenharmony_ci       */
6666bf215546Sopenharmony_ci      struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
6667bf215546Sopenharmony_ci
6668bf215546Sopenharmony_ci      mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU;
6669bf215546Sopenharmony_ci   }
6670bf215546Sopenharmony_ci
6671bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX11) {
6672bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
6673bf215546Sopenharmony_ci                             S_0283D0_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
6674bf215546Sopenharmony_ci                                S_0283D0_VRS_RATE((rate_x << 2) | rate_y));
6675bf215546Sopenharmony_ci   } else {
6676bf215546Sopenharmony_ci      radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL,
6677bf215546Sopenharmony_ci                             S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
6678bf215546Sopenharmony_ci                                S_028064_VRS_OVERRIDE_RATE_X(rate_x) |
6679bf215546Sopenharmony_ci                                S_028064_VRS_OVERRIDE_RATE_Y(rate_y));
6680bf215546Sopenharmony_ci   }
6681bf215546Sopenharmony_ci}
6682bf215546Sopenharmony_ci
6683bf215546Sopenharmony_cistatic void
6684bf215546Sopenharmony_ciradv_pipeline_emit_pm4(struct radv_graphics_pipeline *pipeline,
6685bf215546Sopenharmony_ci                       const struct radv_blend_state *blend,
6686bf215546Sopenharmony_ci                       const struct radv_depth_stencil_state *ds_state,
6687bf215546Sopenharmony_ci                       uint32_t vgt_gs_out_prim_type,
6688bf215546Sopenharmony_ci                       const struct radv_graphics_pipeline_info *info)
6689bf215546Sopenharmony_ci{
6690bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6691bf215546Sopenharmony_ci   struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs;
6692bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &pipeline->base.cs;
6693bf215546Sopenharmony_ci
6694bf215546Sopenharmony_ci   cs->max_dw = 64;
6695bf215546Sopenharmony_ci   ctx_cs->max_dw = 256;
6696bf215546Sopenharmony_ci   cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
6697bf215546Sopenharmony_ci   ctx_cs->buf = cs->buf + cs->max_dw;
6698bf215546Sopenharmony_ci
6699bf215546Sopenharmony_ci   radv_pipeline_emit_depth_stencil_state(ctx_cs, ds_state);
6700bf215546Sopenharmony_ci   radv_pipeline_emit_blend_state(ctx_cs, pipeline, blend);
6701bf215546Sopenharmony_ci   radv_pipeline_emit_raster_state(ctx_cs, pipeline, info);
6702bf215546Sopenharmony_ci   radv_pipeline_emit_multisample_state(ctx_cs, pipeline);
6703bf215546Sopenharmony_ci   radv_pipeline_emit_vgt_gs_mode(ctx_cs, pipeline);
6704bf215546Sopenharmony_ci   radv_pipeline_emit_vertex_shader(ctx_cs, cs, pipeline);
6705bf215546Sopenharmony_ci   radv_pipeline_emit_mesh_shader(ctx_cs, cs, pipeline);
6706bf215546Sopenharmony_ci
6707bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
6708bf215546Sopenharmony_ci      radv_pipeline_emit_tess_shaders(ctx_cs, cs, pipeline);
6709bf215546Sopenharmony_ci      radv_pipeline_emit_tess_state(ctx_cs, pipeline, info);
6710bf215546Sopenharmony_ci   }
6711bf215546Sopenharmony_ci
6712bf215546Sopenharmony_ci   radv_pipeline_emit_geometry_shader(ctx_cs, cs, pipeline);
6713bf215546Sopenharmony_ci   radv_pipeline_emit_fragment_shader(ctx_cs, cs, pipeline);
6714bf215546Sopenharmony_ci   radv_pipeline_emit_ps_inputs(ctx_cs, pipeline);
6715bf215546Sopenharmony_ci   radv_pipeline_emit_vgt_vertex_reuse(ctx_cs, pipeline);
6716bf215546Sopenharmony_ci   radv_pipeline_emit_vgt_shader_config(ctx_cs, pipeline);
6717bf215546Sopenharmony_ci   radv_pipeline_emit_cliprect_rule(ctx_cs, info);
6718bf215546Sopenharmony_ci   radv_pipeline_emit_vgt_gs_out(ctx_cs, pipeline, vgt_gs_out_prim_type);
6719bf215546Sopenharmony_ci
6720bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10 && !radv_pipeline_has_ngg(pipeline))
6721bf215546Sopenharmony_ci      gfx10_pipeline_emit_ge_cntl(ctx_cs, pipeline);
6722bf215546Sopenharmony_ci
6723bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10_3) {
6724bf215546Sopenharmony_ci      gfx103_pipeline_emit_vgt_draw_payload_cntl(ctx_cs, pipeline, info);
6725bf215546Sopenharmony_ci      gfx103_pipeline_emit_vrs_state(ctx_cs, pipeline, info);
6726bf215546Sopenharmony_ci   }
6727bf215546Sopenharmony_ci
6728bf215546Sopenharmony_ci   pipeline->base.ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4);
6729bf215546Sopenharmony_ci
6730bf215546Sopenharmony_ci   assert(ctx_cs->cdw <= ctx_cs->max_dw);
6731bf215546Sopenharmony_ci   assert(cs->cdw <= cs->max_dw);
6732bf215546Sopenharmony_ci}
6733bf215546Sopenharmony_ci
6734bf215546Sopenharmony_cistatic void
6735bf215546Sopenharmony_ciradv_pipeline_init_vertex_input_state(struct radv_graphics_pipeline *pipeline,
6736bf215546Sopenharmony_ci                                      const struct radv_graphics_pipeline_info *info)
6737bf215546Sopenharmony_ci{
6738bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
6739bf215546Sopenharmony_ci   const struct radv_shader_info *vs_info = &radv_get_shader(&pipeline->base, MESA_SHADER_VERTEX)->info;
6740bf215546Sopenharmony_ci
6741bf215546Sopenharmony_ci   for (uint32_t i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
6742bf215546Sopenharmony_ci      pipeline->attrib_ends[i] = info->vi.attrib_ends[i];
6743bf215546Sopenharmony_ci      pipeline->attrib_index_offset[i] = info->vi.attrib_index_offset[i];
6744bf215546Sopenharmony_ci      pipeline->attrib_bindings[i] = info->vi.attrib_bindings[i];
6745bf215546Sopenharmony_ci   }
6746bf215546Sopenharmony_ci
6747bf215546Sopenharmony_ci   for (uint32_t i = 0; i < MAX_VBS; i++) {
6748bf215546Sopenharmony_ci      pipeline->binding_stride[i] = info->vi.binding_stride[i];
6749bf215546Sopenharmony_ci   }
6750bf215546Sopenharmony_ci
6751bf215546Sopenharmony_ci   pipeline->use_per_attribute_vb_descs = vs_info->vs.use_per_attribute_vb_descs;
6752bf215546Sopenharmony_ci   pipeline->last_vertex_attrib_bit = util_last_bit(vs_info->vs.vb_desc_usage_mask);
6753bf215546Sopenharmony_ci   if (pipeline->base.shaders[MESA_SHADER_VERTEX])
6754bf215546Sopenharmony_ci      pipeline->next_vertex_stage = MESA_SHADER_VERTEX;
6755bf215546Sopenharmony_ci   else if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL])
6756bf215546Sopenharmony_ci      pipeline->next_vertex_stage = MESA_SHADER_TESS_CTRL;
6757bf215546Sopenharmony_ci   else
6758bf215546Sopenharmony_ci      pipeline->next_vertex_stage = MESA_SHADER_GEOMETRY;
6759bf215546Sopenharmony_ci   if (pipeline->next_vertex_stage == MESA_SHADER_VERTEX) {
6760bf215546Sopenharmony_ci      const struct radv_shader *vs_shader = pipeline->base.shaders[MESA_SHADER_VERTEX];
6761bf215546Sopenharmony_ci      pipeline->can_use_simple_input = vs_shader->info.is_ngg == pdevice->use_ngg &&
6762bf215546Sopenharmony_ci                                       vs_shader->info.wave_size == pdevice->ge_wave_size;
6763bf215546Sopenharmony_ci   } else {
6764bf215546Sopenharmony_ci      pipeline->can_use_simple_input = false;
6765bf215546Sopenharmony_ci   }
6766bf215546Sopenharmony_ci   if (vs_info->vs.dynamic_inputs)
6767bf215546Sopenharmony_ci      pipeline->vb_desc_usage_mask = BITFIELD_MASK(pipeline->last_vertex_attrib_bit);
6768bf215546Sopenharmony_ci   else
6769bf215546Sopenharmony_ci      pipeline->vb_desc_usage_mask = vs_info->vs.vb_desc_usage_mask;
6770bf215546Sopenharmony_ci   pipeline->vb_desc_alloc_size = util_bitcount(pipeline->vb_desc_usage_mask) * 16;
6771bf215546Sopenharmony_ci}
6772bf215546Sopenharmony_ci
6773bf215546Sopenharmony_cistatic struct radv_shader *
6774bf215546Sopenharmony_ciradv_pipeline_get_streamout_shader(struct radv_graphics_pipeline *pipeline)
6775bf215546Sopenharmony_ci{
6776bf215546Sopenharmony_ci   int i;
6777bf215546Sopenharmony_ci
6778bf215546Sopenharmony_ci   for (i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) {
6779bf215546Sopenharmony_ci      struct radv_shader *shader = radv_get_shader(&pipeline->base, i);
6780bf215546Sopenharmony_ci
6781bf215546Sopenharmony_ci      if (shader && shader->info.so.num_outputs > 0)
6782bf215546Sopenharmony_ci         return shader;
6783bf215546Sopenharmony_ci   }
6784bf215546Sopenharmony_ci
6785bf215546Sopenharmony_ci   return NULL;
6786bf215546Sopenharmony_ci}
6787bf215546Sopenharmony_ci
6788bf215546Sopenharmony_cistatic bool
6789bf215546Sopenharmony_ciradv_shader_need_indirect_descriptor_sets(struct radv_pipeline *pipeline, gl_shader_stage stage)
6790bf215546Sopenharmony_ci{
6791bf215546Sopenharmony_ci   struct radv_userdata_info *loc =
6792bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, stage, AC_UD_INDIRECT_DESCRIPTOR_SETS);
6793bf215546Sopenharmony_ci   return loc->sgpr_idx != -1;
6794bf215546Sopenharmony_ci}
6795bf215546Sopenharmony_ci
6796bf215546Sopenharmony_cistatic void
6797bf215546Sopenharmony_ciradv_pipeline_init_shader_stages_state(struct radv_graphics_pipeline *pipeline)
6798bf215546Sopenharmony_ci{
6799bf215546Sopenharmony_ci   struct radv_device *device = pipeline->base.device;
6800bf215546Sopenharmony_ci
6801bf215546Sopenharmony_ci   for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
6802bf215546Sopenharmony_ci      bool shader_exists = !!pipeline->base.shaders[i];
6803bf215546Sopenharmony_ci      if (shader_exists || i < MESA_SHADER_COMPUTE) {
6804bf215546Sopenharmony_ci         /* We need this info for some stages even when the shader doesn't exist. */
6805bf215546Sopenharmony_ci         pipeline->base.user_data_0[i] = radv_pipeline_stage_to_user_data_0(
6806bf215546Sopenharmony_ci            pipeline, i, device->physical_device->rad_info.gfx_level);
6807bf215546Sopenharmony_ci
6808bf215546Sopenharmony_ci         if (shader_exists)
6809bf215546Sopenharmony_ci            pipeline->base.need_indirect_descriptor_sets |=
6810bf215546Sopenharmony_ci               radv_shader_need_indirect_descriptor_sets(&pipeline->base, i);
6811bf215546Sopenharmony_ci      }
6812bf215546Sopenharmony_ci   }
6813bf215546Sopenharmony_ci
6814bf215546Sopenharmony_ci   gl_shader_stage first_stage =
6815bf215546Sopenharmony_ci      radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX;
6816bf215546Sopenharmony_ci
6817bf215546Sopenharmony_ci   struct radv_userdata_info *loc =
6818bf215546Sopenharmony_ci      radv_lookup_user_sgpr(&pipeline->base, first_stage, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
6819bf215546Sopenharmony_ci   if (loc->sgpr_idx != -1) {
6820bf215546Sopenharmony_ci      pipeline->vtx_base_sgpr = pipeline->base.user_data_0[first_stage];
6821bf215546Sopenharmony_ci      pipeline->vtx_base_sgpr += loc->sgpr_idx * 4;
6822bf215546Sopenharmony_ci      pipeline->vtx_emit_num = loc->num_sgprs;
6823bf215546Sopenharmony_ci      pipeline->uses_drawid =
6824bf215546Sopenharmony_ci         radv_get_shader(&pipeline->base, first_stage)->info.vs.needs_draw_id;
6825bf215546Sopenharmony_ci      pipeline->uses_baseinstance =
6826bf215546Sopenharmony_ci         radv_get_shader(&pipeline->base, first_stage)->info.vs.needs_base_instance;
6827bf215546Sopenharmony_ci
6828bf215546Sopenharmony_ci      assert(first_stage != MESA_SHADER_MESH || !pipeline->uses_baseinstance);
6829bf215546Sopenharmony_ci   }
6830bf215546Sopenharmony_ci}
6831bf215546Sopenharmony_ci
6832bf215546Sopenharmony_cistatic uint32_t
6833bf215546Sopenharmony_ciradv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline,
6834bf215546Sopenharmony_ci                              const struct radv_graphics_pipeline_info *info)
6835bf215546Sopenharmony_ci{
6836bf215546Sopenharmony_ci   uint32_t gs_out;
6837bf215546Sopenharmony_ci
6838bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
6839bf215546Sopenharmony_ci      gs_out =
6840bf215546Sopenharmony_ci         si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
6841bf215546Sopenharmony_ci   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
6842bf215546Sopenharmony_ci      if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
6843bf215546Sopenharmony_ci         gs_out = V_028A6C_POINTLIST;
6844bf215546Sopenharmony_ci      } else {
6845bf215546Sopenharmony_ci         gs_out = si_conv_tess_prim_to_gs_out(
6846bf215546Sopenharmony_ci            pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode);
6847bf215546Sopenharmony_ci      }
6848bf215546Sopenharmony_ci   } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
6849bf215546Sopenharmony_ci      gs_out =
6850bf215546Sopenharmony_ci         si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.output_prim);
6851bf215546Sopenharmony_ci   } else {
6852bf215546Sopenharmony_ci      gs_out = si_conv_prim_to_gs_out(info->ia.primitive_topology);
6853bf215546Sopenharmony_ci   }
6854bf215546Sopenharmony_ci
6855bf215546Sopenharmony_ci   return gs_out;
6856bf215546Sopenharmony_ci}
6857bf215546Sopenharmony_ci
6858bf215546Sopenharmony_cistatic void
6859bf215546Sopenharmony_ciradv_pipeline_init_extra(struct radv_graphics_pipeline *pipeline,
6860bf215546Sopenharmony_ci                         const struct radv_graphics_pipeline_create_info *extra,
6861bf215546Sopenharmony_ci                         struct radv_blend_state *blend_state,
6862bf215546Sopenharmony_ci                         struct radv_depth_stencil_state *ds_state,
6863bf215546Sopenharmony_ci                         const struct radv_graphics_pipeline_info *info,
6864bf215546Sopenharmony_ci                         uint32_t *vgt_gs_out_prim_type)
6865bf215546Sopenharmony_ci{
6866bf215546Sopenharmony_ci   if (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR ||
6867bf215546Sopenharmony_ci       extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS ||
6868bf215546Sopenharmony_ci       extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX8 ||
6869bf215546Sopenharmony_ci       extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11 ||
6870bf215546Sopenharmony_ci       extra->custom_blend_mode == V_028808_CB_RESOLVE) {
6871bf215546Sopenharmony_ci      /* According to the CB spec states, CB_SHADER_MASK should be set to enable writes to all four
6872bf215546Sopenharmony_ci       * channels of MRT0.
6873bf215546Sopenharmony_ci       */
6874bf215546Sopenharmony_ci      blend_state->cb_shader_mask = 0xf;
6875bf215546Sopenharmony_ci
6876bf215546Sopenharmony_ci      if (extra->custom_blend_mode == V_028808_CB_RESOLVE)
6877bf215546Sopenharmony_ci         pipeline->cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);
6878bf215546Sopenharmony_ci
6879bf215546Sopenharmony_ci      pipeline->cb_color_control &= C_028808_MODE;
6880bf215546Sopenharmony_ci      pipeline->cb_color_control |= S_028808_MODE(extra->custom_blend_mode);
6881bf215546Sopenharmony_ci   }
6882bf215546Sopenharmony_ci
6883bf215546Sopenharmony_ci   if (extra->use_rectlist) {
6884bf215546Sopenharmony_ci      struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
6885bf215546Sopenharmony_ci      dynamic->primitive_topology = V_008958_DI_PT_RECTLIST;
6886bf215546Sopenharmony_ci
6887bf215546Sopenharmony_ci      *vgt_gs_out_prim_type = V_028A6C_TRISTRIP;
6888bf215546Sopenharmony_ci      if (radv_pipeline_has_ngg(pipeline))
6889bf215546Sopenharmony_ci         *vgt_gs_out_prim_type = V_028A6C_RECTLIST;
6890bf215546Sopenharmony_ci
6891bf215546Sopenharmony_ci      pipeline->rast_prim = *vgt_gs_out_prim_type;
6892bf215546Sopenharmony_ci   }
6893bf215546Sopenharmony_ci
6894bf215546Sopenharmony_ci   if (radv_pipeline_has_ds_attachments(&info->ri)) {
6895bf215546Sopenharmony_ci      ds_state->db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
6896bf215546Sopenharmony_ci      ds_state->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
6897bf215546Sopenharmony_ci      ds_state->db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable);
6898bf215546Sopenharmony_ci      ds_state->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable);
6899bf215546Sopenharmony_ci      ds_state->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
6900bf215546Sopenharmony_ci   }
6901bf215546Sopenharmony_ci}
6902bf215546Sopenharmony_ci
6903bf215546Sopenharmony_civoid
6904bf215546Sopenharmony_ciradv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline,
6905bf215546Sopenharmony_ci                    enum radv_pipeline_type type)
6906bf215546Sopenharmony_ci{
6907bf215546Sopenharmony_ci   vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
6908bf215546Sopenharmony_ci
6909bf215546Sopenharmony_ci   pipeline->device = device;
6910bf215546Sopenharmony_ci   pipeline->type = type;
6911bf215546Sopenharmony_ci}
6912bf215546Sopenharmony_ci
6913bf215546Sopenharmony_cistatic VkResult
6914bf215546Sopenharmony_ciradv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device,
6915bf215546Sopenharmony_ci                            struct radv_pipeline_cache *cache,
6916bf215546Sopenharmony_ci                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
6917bf215546Sopenharmony_ci                            const struct radv_graphics_pipeline_create_info *extra)
6918bf215546Sopenharmony_ci{
6919bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
6920bf215546Sopenharmony_ci   VkResult result;
6921bf215546Sopenharmony_ci
6922bf215546Sopenharmony_ci   pipeline->last_vgt_api_stage = MESA_SHADER_NONE;
6923bf215546Sopenharmony_ci
6924bf215546Sopenharmony_ci   /* Mark all states declared dynamic at pipeline creation. */
6925bf215546Sopenharmony_ci   if (pCreateInfo->pDynamicState) {
6926bf215546Sopenharmony_ci      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
6927bf215546Sopenharmony_ci      for (uint32_t s = 0; s < count; s++) {
6928bf215546Sopenharmony_ci         pipeline->dynamic_states |=
6929bf215546Sopenharmony_ci            radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
6930bf215546Sopenharmony_ci      }
6931bf215546Sopenharmony_ci   }
6932bf215546Sopenharmony_ci
6933bf215546Sopenharmony_ci   /* Mark all active stages at pipeline creation. */
6934bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
6935bf215546Sopenharmony_ci      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
6936bf215546Sopenharmony_ci
6937bf215546Sopenharmony_ci      pipeline->active_stages |= sinfo->stage;
6938bf215546Sopenharmony_ci   }
6939bf215546Sopenharmony_ci
6940bf215546Sopenharmony_ci   struct radv_graphics_pipeline_info info = radv_pipeline_init_graphics_info(pipeline, pCreateInfo);
6941bf215546Sopenharmony_ci
6942bf215546Sopenharmony_ci   struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, &info);
6943bf215546Sopenharmony_ci
6944bf215546Sopenharmony_ci   const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
6945bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
6946bf215546Sopenharmony_ci
6947bf215546Sopenharmony_ci   struct radv_pipeline_key key =
6948bf215546Sopenharmony_ci      radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &info, &blend);
6949bf215546Sopenharmony_ci
6950bf215546Sopenharmony_ci   result = radv_create_shaders(&pipeline->base, pipeline_layout, device, cache, &key, pCreateInfo->pStages,
6951bf215546Sopenharmony_ci                                pCreateInfo->stageCount, pCreateInfo->flags, NULL,
6952bf215546Sopenharmony_ci                                creation_feedback, NULL, NULL, &pipeline->last_vgt_api_stage);
6953bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
6954bf215546Sopenharmony_ci      return result;
6955bf215546Sopenharmony_ci
6956bf215546Sopenharmony_ci   pipeline->spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
6957bf215546Sopenharmony_ci
6958bf215546Sopenharmony_ci   uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &info);
6959bf215546Sopenharmony_ci
6960bf215546Sopenharmony_ci   radv_pipeline_init_multisample_state(pipeline, &blend, &info, vgt_gs_out_prim_type);
6961bf215546Sopenharmony_ci
6962bf215546Sopenharmony_ci   if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
6963bf215546Sopenharmony_ci      radv_pipeline_init_input_assembly_state(pipeline, &info);
6964bf215546Sopenharmony_ci   radv_pipeline_init_dynamic_state(pipeline, &info);
6965bf215546Sopenharmony_ci
6966bf215546Sopenharmony_ci   pipeline->negative_one_to_one = info.vp.negative_one_to_one;
6967bf215546Sopenharmony_ci
6968bf215546Sopenharmony_ci   radv_pipeline_init_raster_state(pipeline, &info);
6969bf215546Sopenharmony_ci
6970bf215546Sopenharmony_ci   struct radv_depth_stencil_state ds_state =
6971bf215546Sopenharmony_ci      radv_pipeline_init_depth_stencil_state(pipeline, &info);
6972bf215546Sopenharmony_ci
6973bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level >= GFX10_3)
6974bf215546Sopenharmony_ci      gfx103_pipeline_init_vrs_state(pipeline, &info);
6975bf215546Sopenharmony_ci
6976bf215546Sopenharmony_ci   /* Ensure that some export memory is always allocated, for two reasons:
6977bf215546Sopenharmony_ci    *
6978bf215546Sopenharmony_ci    * 1) Correctness: The hardware ignores the EXEC mask if no export
6979bf215546Sopenharmony_ci    *    memory is allocated, so KILL and alpha test do not work correctly
6980bf215546Sopenharmony_ci    *    without this.
6981bf215546Sopenharmony_ci    * 2) Performance: Every shader needs at least a NULL export, even when
6982bf215546Sopenharmony_ci    *    it writes no color/depth output. The NULL export instruction
6983bf215546Sopenharmony_ci    *    stalls without this setting.
6984bf215546Sopenharmony_ci    *
6985bf215546Sopenharmony_ci    * Don't add this to CB_SHADER_MASK.
6986bf215546Sopenharmony_ci    *
6987bf215546Sopenharmony_ci    * GFX10 supports pixel shaders without exports by setting both the
6988bf215546Sopenharmony_ci    * color and Z formats to SPI_SHADER_ZERO. The hw will skip export
6989bf215546Sopenharmony_ci    * instructions if any are present.
6990bf215546Sopenharmony_ci    */
6991bf215546Sopenharmony_ci   struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
6992bf215546Sopenharmony_ci   if ((device->physical_device->rad_info.gfx_level <= GFX9 || ps->info.ps.can_discard) &&
6993bf215546Sopenharmony_ci       !blend.spi_shader_col_format) {
6994bf215546Sopenharmony_ci      if (!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask)
6995bf215546Sopenharmony_ci         blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
6996bf215546Sopenharmony_ci   }
6997bf215546Sopenharmony_ci
6998bf215546Sopenharmony_ci   pipeline->col_format = blend.spi_shader_col_format;
6999bf215546Sopenharmony_ci   pipeline->cb_target_mask = blend.cb_target_mask;
7000bf215546Sopenharmony_ci
7001bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && !radv_pipeline_has_ngg(pipeline)) {
7002bf215546Sopenharmony_ci      struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
7003bf215546Sopenharmony_ci
7004bf215546Sopenharmony_ci      radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info);
7005bf215546Sopenharmony_ci   }
7006bf215546Sopenharmony_ci
7007bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
7008bf215546Sopenharmony_ci      pipeline->tess_patch_control_points = info.ts.patch_control_points;
7009bf215546Sopenharmony_ci   }
7010bf215546Sopenharmony_ci
7011bf215546Sopenharmony_ci   if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
7012bf215546Sopenharmony_ci      radv_pipeline_init_vertex_input_state(pipeline, &info);
7013bf215546Sopenharmony_ci
7014bf215546Sopenharmony_ci   radv_pipeline_init_binning_state(pipeline, &blend, &info);
7015bf215546Sopenharmony_ci   radv_pipeline_init_shader_stages_state(pipeline);
7016bf215546Sopenharmony_ci   radv_pipeline_init_scratch(device, &pipeline->base);
7017bf215546Sopenharmony_ci
7018bf215546Sopenharmony_ci   /* Find the last vertex shader stage that eventually uses streamout. */
7019bf215546Sopenharmony_ci   pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
7020bf215546Sopenharmony_ci
7021bf215546Sopenharmony_ci   pipeline->is_ngg = radv_pipeline_has_ngg(pipeline);
7022bf215546Sopenharmony_ci   pipeline->has_ngg_culling =
7023bf215546Sopenharmony_ci      pipeline->is_ngg &&
7024bf215546Sopenharmony_ci      pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling;
7025bf215546Sopenharmony_ci   pipeline->force_vrs_per_vertex =
7026bf215546Sopenharmony_ci      pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex;
7027bf215546Sopenharmony_ci   pipeline->uses_user_sample_locations = info.ms.sample_locs_enable;
7028bf215546Sopenharmony_ci   pipeline->rast_prim = vgt_gs_out_prim_type;
7029bf215546Sopenharmony_ci
7030bf215546Sopenharmony_ci   if (!(pipeline->dynamic_states & RADV_DYNAMIC_LINE_WIDTH)) {
7031bf215546Sopenharmony_ci      pipeline->line_width = info.rs.line_width;
7032bf215546Sopenharmony_ci   }
7033bf215546Sopenharmony_ci
7034bf215546Sopenharmony_ci   pipeline->base.push_constant_size = pipeline_layout->push_constant_size;
7035bf215546Sopenharmony_ci   pipeline->base.dynamic_offset_count = pipeline_layout->dynamic_offset_count;
7036bf215546Sopenharmony_ci
7037bf215546Sopenharmony_ci   if (extra) {
7038bf215546Sopenharmony_ci      radv_pipeline_init_extra(pipeline, extra, &blend, &ds_state, &info, &vgt_gs_out_prim_type);
7039bf215546Sopenharmony_ci   }
7040bf215546Sopenharmony_ci
7041bf215546Sopenharmony_ci   radv_pipeline_emit_pm4(pipeline, &blend, &ds_state, vgt_gs_out_prim_type, &info);
7042bf215546Sopenharmony_ci
7043bf215546Sopenharmony_ci   return result;
7044bf215546Sopenharmony_ci}
7045bf215546Sopenharmony_ci
7046bf215546Sopenharmony_cistatic VkResult
7047bf215546Sopenharmony_ciradv_graphics_pipeline_create_nonlegacy(VkDevice _device, VkPipelineCache _cache,
7048bf215546Sopenharmony_ci                                        const VkGraphicsPipelineCreateInfo *pCreateInfo,
7049bf215546Sopenharmony_ci                                        const struct radv_graphics_pipeline_create_info *extra,
7050bf215546Sopenharmony_ci                                        const VkAllocationCallbacks *pAllocator,
7051bf215546Sopenharmony_ci                                        VkPipeline *pPipeline)
7052bf215546Sopenharmony_ci{
7053bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
7054bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
7055bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline;
7056bf215546Sopenharmony_ci   VkResult result;
7057bf215546Sopenharmony_ci
7058bf215546Sopenharmony_ci   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
7059bf215546Sopenharmony_ci                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7060bf215546Sopenharmony_ci   if (pipeline == NULL)
7061bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7062bf215546Sopenharmony_ci
7063bf215546Sopenharmony_ci   radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_GRAPHICS);
7064bf215546Sopenharmony_ci
7065bf215546Sopenharmony_ci   result = radv_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, extra);
7066bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
7067bf215546Sopenharmony_ci      radv_pipeline_destroy(device, &pipeline->base, pAllocator);
7068bf215546Sopenharmony_ci      return result;
7069bf215546Sopenharmony_ci   }
7070bf215546Sopenharmony_ci
7071bf215546Sopenharmony_ci   *pPipeline = radv_pipeline_to_handle(&pipeline->base);
7072bf215546Sopenharmony_ci
7073bf215546Sopenharmony_ci   return VK_SUCCESS;
7074bf215546Sopenharmony_ci}
7075bf215546Sopenharmony_ci
7076bf215546Sopenharmony_ci/* This is a wrapper for radv_graphics_pipeline_create_nonlegacy that does all legacy conversions
7077bf215546Sopenharmony_ci * for the VkGraphicsPipelineCreateInfo data. */
7078bf215546Sopenharmony_ciVkResult
7079bf215546Sopenharmony_ciradv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache,
7080bf215546Sopenharmony_ci                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
7081bf215546Sopenharmony_ci                              const struct radv_graphics_pipeline_create_info *extra,
7082bf215546Sopenharmony_ci                              const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
7083bf215546Sopenharmony_ci{
7084bf215546Sopenharmony_ci   VkGraphicsPipelineCreateInfo create_info = *pCreateInfo;
7085bf215546Sopenharmony_ci
7086bf215546Sopenharmony_ci   VkPipelineRenderingCreateInfo rendering_create_info;
7087bf215546Sopenharmony_ci   VkFormat color_formats[MAX_RTS];
7088bf215546Sopenharmony_ci   VkAttachmentSampleCountInfoAMD sample_info;
7089bf215546Sopenharmony_ci   VkSampleCountFlagBits samples[MAX_RTS];
7090bf215546Sopenharmony_ci   if (pCreateInfo->renderPass != VK_NULL_HANDLE) {
7091bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
7092bf215546Sopenharmony_ci      struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
7093bf215546Sopenharmony_ci
7094bf215546Sopenharmony_ci      rendering_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO;
7095bf215546Sopenharmony_ci      rendering_create_info.pNext = create_info.pNext;
7096bf215546Sopenharmony_ci      create_info.pNext = &rendering_create_info;
7097bf215546Sopenharmony_ci
7098bf215546Sopenharmony_ci      rendering_create_info.viewMask = subpass->view_mask;
7099bf215546Sopenharmony_ci
7100bf215546Sopenharmony_ci      VkFormat ds_format =
7101bf215546Sopenharmony_ci         subpass->depth_stencil_attachment
7102bf215546Sopenharmony_ci            ? pass->attachments[subpass->depth_stencil_attachment->attachment].format
7103bf215546Sopenharmony_ci            : VK_FORMAT_UNDEFINED;
7104bf215546Sopenharmony_ci
7105bf215546Sopenharmony_ci      rendering_create_info.depthAttachmentFormat =
7106bf215546Sopenharmony_ci         vk_format_has_depth(ds_format) ? ds_format : VK_FORMAT_UNDEFINED;
7107bf215546Sopenharmony_ci      rendering_create_info.stencilAttachmentFormat =
7108bf215546Sopenharmony_ci         vk_format_has_stencil(ds_format) ? ds_format : VK_FORMAT_UNDEFINED;
7109bf215546Sopenharmony_ci
7110bf215546Sopenharmony_ci      rendering_create_info.colorAttachmentCount = subpass->color_count;
7111bf215546Sopenharmony_ci      rendering_create_info.pColorAttachmentFormats = color_formats;
7112bf215546Sopenharmony_ci      for (unsigned i = 0; i < rendering_create_info.colorAttachmentCount; ++i) {
7113bf215546Sopenharmony_ci         if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
7114bf215546Sopenharmony_ci            color_formats[i] = pass->attachments[subpass->color_attachments[i].attachment].format;
7115bf215546Sopenharmony_ci         else
7116bf215546Sopenharmony_ci            color_formats[i] = VK_FORMAT_UNDEFINED;
7117bf215546Sopenharmony_ci      }
7118bf215546Sopenharmony_ci
7119bf215546Sopenharmony_ci      create_info.renderPass = VK_NULL_HANDLE;
7120bf215546Sopenharmony_ci
7121bf215546Sopenharmony_ci      sample_info.sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD;
7122bf215546Sopenharmony_ci      sample_info.pNext = create_info.pNext;
7123bf215546Sopenharmony_ci      create_info.pNext = &sample_info;
7124bf215546Sopenharmony_ci
7125bf215546Sopenharmony_ci      sample_info.colorAttachmentCount = rendering_create_info.colorAttachmentCount;
7126bf215546Sopenharmony_ci      sample_info.pColorAttachmentSamples = samples;
7127bf215546Sopenharmony_ci      for (unsigned i = 0; i < sample_info.colorAttachmentCount; ++i) {
7128bf215546Sopenharmony_ci         if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
7129bf215546Sopenharmony_ci            samples[i] = pass->attachments[subpass->color_attachments[i].attachment].samples;
7130bf215546Sopenharmony_ci         } else
7131bf215546Sopenharmony_ci            samples[i] = 1;
7132bf215546Sopenharmony_ci      }
7133bf215546Sopenharmony_ci      sample_info.depthStencilAttachmentSamples = subpass->depth_sample_count;
7134bf215546Sopenharmony_ci   }
7135bf215546Sopenharmony_ci
7136bf215546Sopenharmony_ci   return radv_graphics_pipeline_create_nonlegacy(_device, _cache, &create_info, extra, pAllocator,
7137bf215546Sopenharmony_ci                                                  pPipeline);
7138bf215546Sopenharmony_ci}
7139bf215546Sopenharmony_ci
7140bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
7141bf215546Sopenharmony_ciradv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
7142bf215546Sopenharmony_ci                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
7143bf215546Sopenharmony_ci                             const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
7144bf215546Sopenharmony_ci{
7145bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
7146bf215546Sopenharmony_ci   unsigned i = 0;
7147bf215546Sopenharmony_ci
7148bf215546Sopenharmony_ci   for (; i < count; i++) {
7149bf215546Sopenharmony_ci      VkResult r;
7150bf215546Sopenharmony_ci      r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator,
7151bf215546Sopenharmony_ci                                        &pPipelines[i]);
7152bf215546Sopenharmony_ci      if (r != VK_SUCCESS) {
7153bf215546Sopenharmony_ci         result = r;
7154bf215546Sopenharmony_ci         pPipelines[i] = VK_NULL_HANDLE;
7155bf215546Sopenharmony_ci
7156bf215546Sopenharmony_ci         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
7157bf215546Sopenharmony_ci            break;
7158bf215546Sopenharmony_ci      }
7159bf215546Sopenharmony_ci   }
7160bf215546Sopenharmony_ci
7161bf215546Sopenharmony_ci   for (; i < count; ++i)
7162bf215546Sopenharmony_ci      pPipelines[i] = VK_NULL_HANDLE;
7163bf215546Sopenharmony_ci
7164bf215546Sopenharmony_ci   return result;
7165bf215546Sopenharmony_ci}
7166bf215546Sopenharmony_ci
7167bf215546Sopenharmony_civoid
7168bf215546Sopenharmony_ciradv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
7169bf215546Sopenharmony_ci                         const struct radv_shader *shader)
7170bf215546Sopenharmony_ci{
7171bf215546Sopenharmony_ci   uint64_t va = radv_shader_get_va(shader);
7172bf215546Sopenharmony_ci
7173bf215546Sopenharmony_ci   radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
7174bf215546Sopenharmony_ci
7175bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
7176bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc1);
7177bf215546Sopenharmony_ci   radeon_emit(cs, shader->config.rsrc2);
7178bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10) {
7179bf215546Sopenharmony_ci      radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
7180bf215546Sopenharmony_ci   }
7181bf215546Sopenharmony_ci}
7182bf215546Sopenharmony_ci
7183bf215546Sopenharmony_civoid
7184bf215546Sopenharmony_ciradv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice,
7185bf215546Sopenharmony_ci                                 struct radeon_cmdbuf *cs, const struct radv_shader *shader)
7186bf215546Sopenharmony_ci{
7187bf215546Sopenharmony_ci   unsigned threads_per_threadgroup;
7188bf215546Sopenharmony_ci   unsigned threadgroups_per_cu = 1;
7189bf215546Sopenharmony_ci   unsigned waves_per_threadgroup;
7190bf215546Sopenharmony_ci   unsigned max_waves_per_sh = 0;
7191bf215546Sopenharmony_ci
7192bf215546Sopenharmony_ci   /* Calculate best compute resource limits. */
7193bf215546Sopenharmony_ci   threads_per_threadgroup =
7194bf215546Sopenharmony_ci      shader->info.cs.block_size[0] * shader->info.cs.block_size[1] * shader->info.cs.block_size[2];
7195bf215546Sopenharmony_ci   waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, shader->info.wave_size);
7196bf215546Sopenharmony_ci
7197bf215546Sopenharmony_ci   if (pdevice->rad_info.gfx_level >= GFX10 && waves_per_threadgroup == 1)
7198bf215546Sopenharmony_ci      threadgroups_per_cu = 2;
7199bf215546Sopenharmony_ci
7200bf215546Sopenharmony_ci   radeon_set_sh_reg(
7201bf215546Sopenharmony_ci      cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
7202bf215546Sopenharmony_ci      ac_get_compute_resource_limits(&pdevice->rad_info, waves_per_threadgroup,
7203bf215546Sopenharmony_ci                                     max_waves_per_sh, threadgroups_per_cu));
7204bf215546Sopenharmony_ci
7205bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
7206bf215546Sopenharmony_ci   radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
7207bf215546Sopenharmony_ci   radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
7208bf215546Sopenharmony_ci   radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
7209bf215546Sopenharmony_ci}
7210bf215546Sopenharmony_ci
7211bf215546Sopenharmony_cistatic void
7212bf215546Sopenharmony_ciradv_compute_generate_pm4(struct radv_compute_pipeline *pipeline)
7213bf215546Sopenharmony_ci{
7214bf215546Sopenharmony_ci   struct radv_physical_device *pdevice = pipeline->base.device->physical_device;
7215bf215546Sopenharmony_ci   struct radv_shader *shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
7216bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = &pipeline->base.cs;
7217bf215546Sopenharmony_ci
7218bf215546Sopenharmony_ci   cs->max_dw = pdevice->rad_info.gfx_level >= GFX10 ? 19 : 16;
7219bf215546Sopenharmony_ci   cs->buf = malloc(cs->max_dw * 4);
7220bf215546Sopenharmony_ci
7221bf215546Sopenharmony_ci   radv_pipeline_emit_hw_cs(pdevice, cs, shader);
7222bf215546Sopenharmony_ci   radv_pipeline_emit_compute_state(pdevice, cs, shader);
7223bf215546Sopenharmony_ci
7224bf215546Sopenharmony_ci   assert(pipeline->base.cs.cdw <= pipeline->base.cs.max_dw);
7225bf215546Sopenharmony_ci}
7226bf215546Sopenharmony_ci
7227bf215546Sopenharmony_cistatic struct radv_pipeline_key
7228bf215546Sopenharmony_ciradv_generate_compute_pipeline_key(struct radv_compute_pipeline *pipeline,
7229bf215546Sopenharmony_ci                                   const VkComputePipelineCreateInfo *pCreateInfo)
7230bf215546Sopenharmony_ci{
7231bf215546Sopenharmony_ci   const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage;
7232bf215546Sopenharmony_ci   struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base, pCreateInfo->flags);
7233bf215546Sopenharmony_ci
7234bf215546Sopenharmony_ci   const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size =
7235bf215546Sopenharmony_ci      vk_find_struct_const(stage->pNext,
7236bf215546Sopenharmony_ci                           PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
7237bf215546Sopenharmony_ci
7238bf215546Sopenharmony_ci   if (subgroup_size) {
7239bf215546Sopenharmony_ci      assert(subgroup_size->requiredSubgroupSize == 32 ||
7240bf215546Sopenharmony_ci             subgroup_size->requiredSubgroupSize == 64);
7241bf215546Sopenharmony_ci      key.cs.compute_subgroup_size = subgroup_size->requiredSubgroupSize;
7242bf215546Sopenharmony_ci   } else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) {
7243bf215546Sopenharmony_ci      key.cs.require_full_subgroups = true;
7244bf215546Sopenharmony_ci   }
7245bf215546Sopenharmony_ci
7246bf215546Sopenharmony_ci   return key;
7247bf215546Sopenharmony_ci}
7248bf215546Sopenharmony_ci
7249bf215546Sopenharmony_ciVkResult
7250bf215546Sopenharmony_ciradv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
7251bf215546Sopenharmony_ci                             const VkComputePipelineCreateInfo *pCreateInfo,
7252bf215546Sopenharmony_ci                             const VkAllocationCallbacks *pAllocator, const uint8_t *custom_hash,
7253bf215546Sopenharmony_ci                             struct radv_pipeline_shader_stack_size *rt_stack_sizes,
7254bf215546Sopenharmony_ci                             uint32_t rt_group_count, VkPipeline *pPipeline)
7255bf215546Sopenharmony_ci{
7256bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
7257bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
7258bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
7259bf215546Sopenharmony_ci   struct radv_compute_pipeline *pipeline;
7260bf215546Sopenharmony_ci   VkResult result;
7261bf215546Sopenharmony_ci
7262bf215546Sopenharmony_ci   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
7263bf215546Sopenharmony_ci                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7264bf215546Sopenharmony_ci   if (pipeline == NULL) {
7265bf215546Sopenharmony_ci      free(rt_stack_sizes);
7266bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7267bf215546Sopenharmony_ci   }
7268bf215546Sopenharmony_ci
7269bf215546Sopenharmony_ci   radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_COMPUTE);
7270bf215546Sopenharmony_ci
7271bf215546Sopenharmony_ci   pipeline->rt_stack_sizes = rt_stack_sizes;
7272bf215546Sopenharmony_ci   pipeline->group_count = rt_group_count;
7273bf215546Sopenharmony_ci
7274bf215546Sopenharmony_ci   const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
7275bf215546Sopenharmony_ci      vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
7276bf215546Sopenharmony_ci
7277bf215546Sopenharmony_ci   struct radv_pipeline_key key = radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
7278bf215546Sopenharmony_ci
7279bf215546Sopenharmony_ci   UNUSED gl_shader_stage last_vgt_api_stage = MESA_SHADER_NONE;
7280bf215546Sopenharmony_ci   result = radv_create_shaders(&pipeline->base, pipeline_layout, device, cache, &key, &pCreateInfo->stage,
7281bf215546Sopenharmony_ci                                1, pCreateInfo->flags, custom_hash, creation_feedback,
7282bf215546Sopenharmony_ci                                &pipeline->rt_stack_sizes, &pipeline->group_count,
7283bf215546Sopenharmony_ci                                &last_vgt_api_stage);
7284bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
7285bf215546Sopenharmony_ci      radv_pipeline_destroy(device, &pipeline->base, pAllocator);
7286bf215546Sopenharmony_ci      return result;
7287bf215546Sopenharmony_ci   }
7288bf215546Sopenharmony_ci
7289bf215546Sopenharmony_ci   pipeline->base.user_data_0[MESA_SHADER_COMPUTE] = R_00B900_COMPUTE_USER_DATA_0;
7290bf215546Sopenharmony_ci   pipeline->base.need_indirect_descriptor_sets |=
7291bf215546Sopenharmony_ci      radv_shader_need_indirect_descriptor_sets(&pipeline->base, MESA_SHADER_COMPUTE);
7292bf215546Sopenharmony_ci   radv_pipeline_init_scratch(device, &pipeline->base);
7293bf215546Sopenharmony_ci
7294bf215546Sopenharmony_ci   pipeline->base.push_constant_size = pipeline_layout->push_constant_size;
7295bf215546Sopenharmony_ci   pipeline->base.dynamic_offset_count = pipeline_layout->dynamic_offset_count;
7296bf215546Sopenharmony_ci
7297bf215546Sopenharmony_ci   if (device->physical_device->rad_info.has_cs_regalloc_hang_bug) {
7298bf215546Sopenharmony_ci      struct radv_shader *compute_shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
7299bf215546Sopenharmony_ci      unsigned *cs_block_size = compute_shader->info.cs.block_size;
7300bf215546Sopenharmony_ci
7301bf215546Sopenharmony_ci      pipeline->cs_regalloc_hang_bug = cs_block_size[0] * cs_block_size[1] * cs_block_size[2] > 256;
7302bf215546Sopenharmony_ci   }
7303bf215546Sopenharmony_ci
7304bf215546Sopenharmony_ci   radv_compute_generate_pm4(pipeline);
7305bf215546Sopenharmony_ci
7306bf215546Sopenharmony_ci   *pPipeline = radv_pipeline_to_handle(&pipeline->base);
7307bf215546Sopenharmony_ci
7308bf215546Sopenharmony_ci   return VK_SUCCESS;
7309bf215546Sopenharmony_ci}
7310bf215546Sopenharmony_ci
7311bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
7312bf215546Sopenharmony_ciradv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
7313bf215546Sopenharmony_ci                            const VkComputePipelineCreateInfo *pCreateInfos,
7314bf215546Sopenharmony_ci                            const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
7315bf215546Sopenharmony_ci{
7316bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
7317bf215546Sopenharmony_ci
7318bf215546Sopenharmony_ci   unsigned i = 0;
7319bf215546Sopenharmony_ci   for (; i < count; i++) {
7320bf215546Sopenharmony_ci      VkResult r;
7321bf215546Sopenharmony_ci      r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, NULL,
7322bf215546Sopenharmony_ci                                       NULL, 0, &pPipelines[i]);
7323bf215546Sopenharmony_ci      if (r != VK_SUCCESS) {
7324bf215546Sopenharmony_ci         result = r;
7325bf215546Sopenharmony_ci         pPipelines[i] = VK_NULL_HANDLE;
7326bf215546Sopenharmony_ci
7327bf215546Sopenharmony_ci         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
7328bf215546Sopenharmony_ci            break;
7329bf215546Sopenharmony_ci      }
7330bf215546Sopenharmony_ci   }
7331bf215546Sopenharmony_ci
7332bf215546Sopenharmony_ci   for (; i < count; ++i)
7333bf215546Sopenharmony_ci      pPipelines[i] = VK_NULL_HANDLE;
7334bf215546Sopenharmony_ci
7335bf215546Sopenharmony_ci   return result;
7336bf215546Sopenharmony_ci}
7337bf215546Sopenharmony_ci
7338bf215546Sopenharmony_cistatic uint32_t
7339bf215546Sopenharmony_ciradv_get_executable_count(struct radv_pipeline *pipeline)
7340bf215546Sopenharmony_ci{
7341bf215546Sopenharmony_ci   uint32_t ret = 0;
7342bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
7343bf215546Sopenharmony_ci      if (!pipeline->shaders[i])
7344bf215546Sopenharmony_ci         continue;
7345bf215546Sopenharmony_ci
7346bf215546Sopenharmony_ci      if (i == MESA_SHADER_GEOMETRY &&
7347bf215546Sopenharmony_ci          !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
7348bf215546Sopenharmony_ci         ret += 2u;
7349bf215546Sopenharmony_ci      } else {
7350bf215546Sopenharmony_ci         ret += 1u;
7351bf215546Sopenharmony_ci      }
7352bf215546Sopenharmony_ci   }
7353bf215546Sopenharmony_ci   return ret;
7354bf215546Sopenharmony_ci}
7355bf215546Sopenharmony_ci
7356bf215546Sopenharmony_cistatic struct radv_shader *
7357bf215546Sopenharmony_ciradv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index,
7358bf215546Sopenharmony_ci                                      gl_shader_stage *stage)
7359bf215546Sopenharmony_ci{
7360bf215546Sopenharmony_ci   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
7361bf215546Sopenharmony_ci      if (!pipeline->shaders[i])
7362bf215546Sopenharmony_ci         continue;
7363bf215546Sopenharmony_ci      if (!index) {
7364bf215546Sopenharmony_ci         *stage = i;
7365bf215546Sopenharmony_ci         return pipeline->shaders[i];
7366bf215546Sopenharmony_ci      }
7367bf215546Sopenharmony_ci
7368bf215546Sopenharmony_ci      --index;
7369bf215546Sopenharmony_ci
7370bf215546Sopenharmony_ci      if (i == MESA_SHADER_GEOMETRY &&
7371bf215546Sopenharmony_ci          !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
7372bf215546Sopenharmony_ci         if (!index) {
7373bf215546Sopenharmony_ci            *stage = i;
7374bf215546Sopenharmony_ci            return pipeline->gs_copy_shader;
7375bf215546Sopenharmony_ci         }
7376bf215546Sopenharmony_ci         --index;
7377bf215546Sopenharmony_ci      }
7378bf215546Sopenharmony_ci   }
7379bf215546Sopenharmony_ci
7380bf215546Sopenharmony_ci   *stage = -1;
7381bf215546Sopenharmony_ci   return NULL;
7382bf215546Sopenharmony_ci}
7383bf215546Sopenharmony_ci
7384bf215546Sopenharmony_ci/* Basically strlcpy (which does not exist on linux) specialized for
7385bf215546Sopenharmony_ci * descriptions. */
7386bf215546Sopenharmony_cistatic void
7387bf215546Sopenharmony_cidesc_copy(char *desc, const char *src)
7388bf215546Sopenharmony_ci{
7389bf215546Sopenharmony_ci   int len = strlen(src);
7390bf215546Sopenharmony_ci   assert(len < VK_MAX_DESCRIPTION_SIZE);
7391bf215546Sopenharmony_ci   memcpy(desc, src, len);
7392bf215546Sopenharmony_ci   memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len);
7393bf215546Sopenharmony_ci}
7394bf215546Sopenharmony_ci
7395bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
7396bf215546Sopenharmony_ciradv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo,
7397bf215546Sopenharmony_ci                                        uint32_t *pExecutableCount,
7398bf215546Sopenharmony_ci                                        VkPipelineExecutablePropertiesKHR *pProperties)
7399bf215546Sopenharmony_ci{
7400bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
7401bf215546Sopenharmony_ci   const uint32_t total_count = radv_get_executable_count(pipeline);
7402bf215546Sopenharmony_ci
7403bf215546Sopenharmony_ci   if (!pProperties) {
7404bf215546Sopenharmony_ci      *pExecutableCount = total_count;
7405bf215546Sopenharmony_ci      return VK_SUCCESS;
7406bf215546Sopenharmony_ci   }
7407bf215546Sopenharmony_ci
7408bf215546Sopenharmony_ci   const uint32_t count = MIN2(total_count, *pExecutableCount);
7409bf215546Sopenharmony_ci   for (unsigned i = 0, executable_idx = 0; i < MESA_VULKAN_SHADER_STAGES && executable_idx < count; ++i) {
7410bf215546Sopenharmony_ci      if (!pipeline->shaders[i])
7411bf215546Sopenharmony_ci         continue;
7412bf215546Sopenharmony_ci      pProperties[executable_idx].stages = mesa_to_vk_shader_stage(i);
7413bf215546Sopenharmony_ci      const char *name = NULL;
7414bf215546Sopenharmony_ci      const char *description = NULL;
7415bf215546Sopenharmony_ci      switch (i) {
7416bf215546Sopenharmony_ci      case MESA_SHADER_VERTEX:
7417bf215546Sopenharmony_ci         name = "Vertex Shader";
7418bf215546Sopenharmony_ci         description = "Vulkan Vertex Shader";
7419bf215546Sopenharmony_ci         break;
7420bf215546Sopenharmony_ci      case MESA_SHADER_TESS_CTRL:
7421bf215546Sopenharmony_ci         if (!pipeline->shaders[MESA_SHADER_VERTEX]) {
7422bf215546Sopenharmony_ci            pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
7423bf215546Sopenharmony_ci            name = "Vertex + Tessellation Control Shaders";
7424bf215546Sopenharmony_ci            description = "Combined Vulkan Vertex and Tessellation Control Shaders";
7425bf215546Sopenharmony_ci         } else {
7426bf215546Sopenharmony_ci            name = "Tessellation Control Shader";
7427bf215546Sopenharmony_ci            description = "Vulkan Tessellation Control Shader";
7428bf215546Sopenharmony_ci         }
7429bf215546Sopenharmony_ci         break;
7430bf215546Sopenharmony_ci      case MESA_SHADER_TESS_EVAL:
7431bf215546Sopenharmony_ci         name = "Tessellation Evaluation Shader";
7432bf215546Sopenharmony_ci         description = "Vulkan Tessellation Evaluation Shader";
7433bf215546Sopenharmony_ci         break;
7434bf215546Sopenharmony_ci      case MESA_SHADER_GEOMETRY:
7435bf215546Sopenharmony_ci         if (pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
7436bf215546Sopenharmony_ci            pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
7437bf215546Sopenharmony_ci            name = "Tessellation Evaluation + Geometry Shaders";
7438bf215546Sopenharmony_ci            description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders";
7439bf215546Sopenharmony_ci         } else if (!pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_VERTEX]) {
7440bf215546Sopenharmony_ci            pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
7441bf215546Sopenharmony_ci            name = "Vertex + Geometry Shader";
7442bf215546Sopenharmony_ci            description = "Combined Vulkan Vertex and Geometry Shaders";
7443bf215546Sopenharmony_ci         } else {
7444bf215546Sopenharmony_ci            name = "Geometry Shader";
7445bf215546Sopenharmony_ci            description = "Vulkan Geometry Shader";
7446bf215546Sopenharmony_ci         }
7447bf215546Sopenharmony_ci         break;
7448bf215546Sopenharmony_ci      case MESA_SHADER_FRAGMENT:
7449bf215546Sopenharmony_ci         name = "Fragment Shader";
7450bf215546Sopenharmony_ci         description = "Vulkan Fragment Shader";
7451bf215546Sopenharmony_ci         break;
7452bf215546Sopenharmony_ci      case MESA_SHADER_COMPUTE:
7453bf215546Sopenharmony_ci         name = "Compute Shader";
7454bf215546Sopenharmony_ci         description = "Vulkan Compute Shader";
7455bf215546Sopenharmony_ci         break;
7456bf215546Sopenharmony_ci      case MESA_SHADER_MESH:
7457bf215546Sopenharmony_ci         name = "Mesh Shader";
7458bf215546Sopenharmony_ci         description = "Vulkan Mesh Shader";
7459bf215546Sopenharmony_ci         break;
7460bf215546Sopenharmony_ci      case MESA_SHADER_TASK:
7461bf215546Sopenharmony_ci         name = "Task Shader";
7462bf215546Sopenharmony_ci         description = "Vulkan Task Shader";
7463bf215546Sopenharmony_ci         break;
7464bf215546Sopenharmony_ci      }
7465bf215546Sopenharmony_ci
7466bf215546Sopenharmony_ci      pProperties[executable_idx].subgroupSize = pipeline->shaders[i]->info.wave_size;
7467bf215546Sopenharmony_ci      desc_copy(pProperties[executable_idx].name, name);
7468bf215546Sopenharmony_ci      desc_copy(pProperties[executable_idx].description, description);
7469bf215546Sopenharmony_ci
7470bf215546Sopenharmony_ci      ++executable_idx;
7471bf215546Sopenharmony_ci      if (i == MESA_SHADER_GEOMETRY &&
7472bf215546Sopenharmony_ci          !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) {
7473bf215546Sopenharmony_ci         assert(pipeline->gs_copy_shader);
7474bf215546Sopenharmony_ci         if (executable_idx >= count)
7475bf215546Sopenharmony_ci            break;
7476bf215546Sopenharmony_ci
7477bf215546Sopenharmony_ci         pProperties[executable_idx].stages = VK_SHADER_STAGE_GEOMETRY_BIT;
7478bf215546Sopenharmony_ci         pProperties[executable_idx].subgroupSize = 64;
7479bf215546Sopenharmony_ci         desc_copy(pProperties[executable_idx].name, "GS Copy Shader");
7480bf215546Sopenharmony_ci         desc_copy(pProperties[executable_idx].description,
7481bf215546Sopenharmony_ci                   "Extra shader stage that loads the GS output ringbuffer into the rasterizer");
7482bf215546Sopenharmony_ci
7483bf215546Sopenharmony_ci         ++executable_idx;
7484bf215546Sopenharmony_ci      }
7485bf215546Sopenharmony_ci   }
7486bf215546Sopenharmony_ci
7487bf215546Sopenharmony_ci   VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS;
7488bf215546Sopenharmony_ci   *pExecutableCount = count;
7489bf215546Sopenharmony_ci   return result;
7490bf215546Sopenharmony_ci}
7491bf215546Sopenharmony_ci
7492bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
7493bf215546Sopenharmony_ciradv_GetPipelineExecutableStatisticsKHR(VkDevice _device,
7494bf215546Sopenharmony_ci                                        const VkPipelineExecutableInfoKHR *pExecutableInfo,
7495bf215546Sopenharmony_ci                                        uint32_t *pStatisticCount,
7496bf215546Sopenharmony_ci                                        VkPipelineExecutableStatisticKHR *pStatistics)
7497bf215546Sopenharmony_ci{
7498bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
7499bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
7500bf215546Sopenharmony_ci   gl_shader_stage stage;
7501bf215546Sopenharmony_ci   struct radv_shader *shader =
7502bf215546Sopenharmony_ci      radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
7503bf215546Sopenharmony_ci
7504bf215546Sopenharmony_ci   const struct radv_physical_device *pdevice = device->physical_device;
7505bf215546Sopenharmony_ci
7506bf215546Sopenharmony_ci   unsigned lds_increment = pdevice->rad_info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT
7507bf215546Sopenharmony_ci      ? 1024 : pdevice->rad_info.lds_encode_granularity;
7508bf215546Sopenharmony_ci   unsigned max_waves = radv_get_max_waves(device, shader, stage);
7509bf215546Sopenharmony_ci
7510bf215546Sopenharmony_ci   VkPipelineExecutableStatisticKHR *s = pStatistics;
7511bf215546Sopenharmony_ci   VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
7512bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
7513bf215546Sopenharmony_ci
7514bf215546Sopenharmony_ci   if (s < end) {
7515bf215546Sopenharmony_ci      desc_copy(s->name, "Driver pipeline hash");
7516bf215546Sopenharmony_ci      desc_copy(s->description, "Driver pipeline hash used by RGP");
7517bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7518bf215546Sopenharmony_ci      s->value.u64 = pipeline->pipeline_hash;
7519bf215546Sopenharmony_ci   }
7520bf215546Sopenharmony_ci   ++s;
7521bf215546Sopenharmony_ci
7522bf215546Sopenharmony_ci   if (s < end) {
7523bf215546Sopenharmony_ci      desc_copy(s->name, "SGPRs");
7524bf215546Sopenharmony_ci      desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
7525bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7526bf215546Sopenharmony_ci      s->value.u64 = shader->config.num_sgprs;
7527bf215546Sopenharmony_ci   }
7528bf215546Sopenharmony_ci   ++s;
7529bf215546Sopenharmony_ci
7530bf215546Sopenharmony_ci   if (s < end) {
7531bf215546Sopenharmony_ci      desc_copy(s->name, "VGPRs");
7532bf215546Sopenharmony_ci      desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
7533bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7534bf215546Sopenharmony_ci      s->value.u64 = shader->config.num_vgprs;
7535bf215546Sopenharmony_ci   }
7536bf215546Sopenharmony_ci   ++s;
7537bf215546Sopenharmony_ci
7538bf215546Sopenharmony_ci   if (s < end) {
7539bf215546Sopenharmony_ci      desc_copy(s->name, "Spilled SGPRs");
7540bf215546Sopenharmony_ci      desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
7541bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7542bf215546Sopenharmony_ci      s->value.u64 = shader->config.spilled_sgprs;
7543bf215546Sopenharmony_ci   }
7544bf215546Sopenharmony_ci   ++s;
7545bf215546Sopenharmony_ci
7546bf215546Sopenharmony_ci   if (s < end) {
7547bf215546Sopenharmony_ci      desc_copy(s->name, "Spilled VGPRs");
7548bf215546Sopenharmony_ci      desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
7549bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7550bf215546Sopenharmony_ci      s->value.u64 = shader->config.spilled_vgprs;
7551bf215546Sopenharmony_ci   }
7552bf215546Sopenharmony_ci   ++s;
7553bf215546Sopenharmony_ci
7554bf215546Sopenharmony_ci   if (s < end) {
7555bf215546Sopenharmony_ci      desc_copy(s->name, "Code size");
7556bf215546Sopenharmony_ci      desc_copy(s->description, "Code size in bytes");
7557bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7558bf215546Sopenharmony_ci      s->value.u64 = shader->exec_size;
7559bf215546Sopenharmony_ci   }
7560bf215546Sopenharmony_ci   ++s;
7561bf215546Sopenharmony_ci
7562bf215546Sopenharmony_ci   if (s < end) {
7563bf215546Sopenharmony_ci      desc_copy(s->name, "LDS size");
7564bf215546Sopenharmony_ci      desc_copy(s->description, "LDS size in bytes per workgroup");
7565bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7566bf215546Sopenharmony_ci      s->value.u64 = shader->config.lds_size * lds_increment;
7567bf215546Sopenharmony_ci   }
7568bf215546Sopenharmony_ci   ++s;
7569bf215546Sopenharmony_ci
7570bf215546Sopenharmony_ci   if (s < end) {
7571bf215546Sopenharmony_ci      desc_copy(s->name, "Scratch size");
7572bf215546Sopenharmony_ci      desc_copy(s->description, "Private memory in bytes per subgroup");
7573bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7574bf215546Sopenharmony_ci      s->value.u64 = shader->config.scratch_bytes_per_wave;
7575bf215546Sopenharmony_ci   }
7576bf215546Sopenharmony_ci   ++s;
7577bf215546Sopenharmony_ci
7578bf215546Sopenharmony_ci   if (s < end) {
7579bf215546Sopenharmony_ci      desc_copy(s->name, "Subgroups per SIMD");
7580bf215546Sopenharmony_ci      desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
7581bf215546Sopenharmony_ci      s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7582bf215546Sopenharmony_ci      s->value.u64 = max_waves;
7583bf215546Sopenharmony_ci   }
7584bf215546Sopenharmony_ci   ++s;
7585bf215546Sopenharmony_ci
7586bf215546Sopenharmony_ci   if (shader->statistics) {
7587bf215546Sopenharmony_ci      for (unsigned i = 0; i < aco_num_statistics; i++) {
7588bf215546Sopenharmony_ci         const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
7589bf215546Sopenharmony_ci         if (s < end) {
7590bf215546Sopenharmony_ci            desc_copy(s->name, info->name);
7591bf215546Sopenharmony_ci            desc_copy(s->description, info->desc);
7592bf215546Sopenharmony_ci            s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
7593bf215546Sopenharmony_ci            s->value.u64 = shader->statistics[i];
7594bf215546Sopenharmony_ci         }
7595bf215546Sopenharmony_ci         ++s;
7596bf215546Sopenharmony_ci      }
7597bf215546Sopenharmony_ci   }
7598bf215546Sopenharmony_ci
7599bf215546Sopenharmony_ci   if (!pStatistics)
7600bf215546Sopenharmony_ci      *pStatisticCount = s - pStatistics;
7601bf215546Sopenharmony_ci   else if (s > end) {
7602bf215546Sopenharmony_ci      *pStatisticCount = end - pStatistics;
7603bf215546Sopenharmony_ci      result = VK_INCOMPLETE;
7604bf215546Sopenharmony_ci   } else {
7605bf215546Sopenharmony_ci      *pStatisticCount = s - pStatistics;
7606bf215546Sopenharmony_ci   }
7607bf215546Sopenharmony_ci
7608bf215546Sopenharmony_ci   return result;
7609bf215546Sopenharmony_ci}
7610bf215546Sopenharmony_ci
7611bf215546Sopenharmony_cistatic VkResult
7612bf215546Sopenharmony_ciradv_copy_representation(void *data, size_t *data_size, const char *src)
7613bf215546Sopenharmony_ci{
7614bf215546Sopenharmony_ci   size_t total_size = strlen(src) + 1;
7615bf215546Sopenharmony_ci
7616bf215546Sopenharmony_ci   if (!data) {
7617bf215546Sopenharmony_ci      *data_size = total_size;
7618bf215546Sopenharmony_ci      return VK_SUCCESS;
7619bf215546Sopenharmony_ci   }
7620bf215546Sopenharmony_ci
7621bf215546Sopenharmony_ci   size_t size = MIN2(total_size, *data_size);
7622bf215546Sopenharmony_ci
7623bf215546Sopenharmony_ci   memcpy(data, src, size);
7624bf215546Sopenharmony_ci   if (size)
7625bf215546Sopenharmony_ci      *((char *)data + size - 1) = 0;
7626bf215546Sopenharmony_ci   return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
7627bf215546Sopenharmony_ci}
7628bf215546Sopenharmony_ci
7629bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
7630bf215546Sopenharmony_ciradv_GetPipelineExecutableInternalRepresentationsKHR(
7631bf215546Sopenharmony_ci   VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo,
7632bf215546Sopenharmony_ci   uint32_t *pInternalRepresentationCount,
7633bf215546Sopenharmony_ci   VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
7634bf215546Sopenharmony_ci{
7635bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
7636bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
7637bf215546Sopenharmony_ci   gl_shader_stage stage;
7638bf215546Sopenharmony_ci   struct radv_shader *shader =
7639bf215546Sopenharmony_ci      radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
7640bf215546Sopenharmony_ci
7641bf215546Sopenharmony_ci   VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
7642bf215546Sopenharmony_ci   VkPipelineExecutableInternalRepresentationKHR *end =
7643bf215546Sopenharmony_ci      p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
7644bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
7645bf215546Sopenharmony_ci   /* optimized NIR */
7646bf215546Sopenharmony_ci   if (p < end) {
7647bf215546Sopenharmony_ci      p->isText = true;
7648bf215546Sopenharmony_ci      desc_copy(p->name, "NIR Shader(s)");
7649bf215546Sopenharmony_ci      desc_copy(p->description, "The optimized NIR shader(s)");
7650bf215546Sopenharmony_ci      if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
7651bf215546Sopenharmony_ci         result = VK_INCOMPLETE;
7652bf215546Sopenharmony_ci   }
7653bf215546Sopenharmony_ci   ++p;
7654bf215546Sopenharmony_ci
7655bf215546Sopenharmony_ci   /* backend IR */
7656bf215546Sopenharmony_ci   if (p < end) {
7657bf215546Sopenharmony_ci      p->isText = true;
7658bf215546Sopenharmony_ci      if (radv_use_llvm_for_stage(device, stage)) {
7659bf215546Sopenharmony_ci         desc_copy(p->name, "LLVM IR");
7660bf215546Sopenharmony_ci         desc_copy(p->description, "The LLVM IR after some optimizations");
7661bf215546Sopenharmony_ci      } else {
7662bf215546Sopenharmony_ci         desc_copy(p->name, "ACO IR");
7663bf215546Sopenharmony_ci         desc_copy(p->description, "The ACO IR after some optimizations");
7664bf215546Sopenharmony_ci      }
7665bf215546Sopenharmony_ci      if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
7666bf215546Sopenharmony_ci         result = VK_INCOMPLETE;
7667bf215546Sopenharmony_ci   }
7668bf215546Sopenharmony_ci   ++p;
7669bf215546Sopenharmony_ci
7670bf215546Sopenharmony_ci   /* Disassembler */
7671bf215546Sopenharmony_ci   if (p < end && shader->disasm_string) {
7672bf215546Sopenharmony_ci      p->isText = true;
7673bf215546Sopenharmony_ci      desc_copy(p->name, "Assembly");
7674bf215546Sopenharmony_ci      desc_copy(p->description, "Final Assembly");
7675bf215546Sopenharmony_ci      if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
7676bf215546Sopenharmony_ci         result = VK_INCOMPLETE;
7677bf215546Sopenharmony_ci   }
7678bf215546Sopenharmony_ci   ++p;
7679bf215546Sopenharmony_ci
7680bf215546Sopenharmony_ci   if (!pInternalRepresentations)
7681bf215546Sopenharmony_ci      *pInternalRepresentationCount = p - pInternalRepresentations;
7682bf215546Sopenharmony_ci   else if (p > end) {
7683bf215546Sopenharmony_ci      result = VK_INCOMPLETE;
7684bf215546Sopenharmony_ci      *pInternalRepresentationCount = end - pInternalRepresentations;
7685bf215546Sopenharmony_ci   } else {
7686bf215546Sopenharmony_ci      *pInternalRepresentationCount = p - pInternalRepresentations;
7687bf215546Sopenharmony_ci   }
7688bf215546Sopenharmony_ci
7689bf215546Sopenharmony_ci   return result;
7690bf215546Sopenharmony_ci}
7691