1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Google
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "radv_meta.h"
25bf215546Sopenharmony_ci#include "radv_private.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "nir_builder.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_cistatic void
30bf215546Sopenharmony_ciradv_get_sequence_size(const struct radv_indirect_command_layout *layout,
31bf215546Sopenharmony_ci                       const struct radv_graphics_pipeline *pipeline, uint32_t *cmd_size,
32bf215546Sopenharmony_ci                       uint32_t *upload_size)
33bf215546Sopenharmony_ci{
34bf215546Sopenharmony_ci   *cmd_size = 0;
35bf215546Sopenharmony_ci   *upload_size = 0;
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci   if (layout->bind_vbo_mask) {
38bf215546Sopenharmony_ci      *upload_size += 16 * util_bitcount(pipeline->vb_desc_usage_mask);
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci     /* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */
41bf215546Sopenharmony_ci      *cmd_size += 3 * 4;
42bf215546Sopenharmony_ci   }
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci   if (layout->push_constant_mask) {
45bf215546Sopenharmony_ci      bool need_copy = false;
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci      for (unsigned i = 0; i < ARRAY_SIZE(pipeline->base.shaders); ++i) {
48bf215546Sopenharmony_ci         if (!pipeline->base.shaders[i])
49bf215546Sopenharmony_ci            continue;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci         struct radv_userdata_locations *locs = &pipeline->base.shaders[i]->info.user_sgprs_locs;
52bf215546Sopenharmony_ci         if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
53bf215546Sopenharmony_ci            /* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */
54bf215546Sopenharmony_ci            *cmd_size += 3 * 4;
55bf215546Sopenharmony_ci            need_copy = true;
56bf215546Sopenharmony_ci         }
57bf215546Sopenharmony_ci         if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0)
58bf215546Sopenharmony_ci            /* One PKT3_SET_SH_REG writing all inline push constants. */
59bf215546Sopenharmony_ci            *cmd_size += (2 + locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].num_sgprs) * 4;
60bf215546Sopenharmony_ci      }
61bf215546Sopenharmony_ci      if (need_copy)
62bf215546Sopenharmony_ci         *upload_size +=
63bf215546Sopenharmony_ci            align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16);
64bf215546Sopenharmony_ci   }
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   if (layout->binds_index_buffer) {
67bf215546Sopenharmony_ci      /* Index type write (normal reg write) + index buffer base write (64-bits, but special packet
68bf215546Sopenharmony_ci       * so only 1 word overhead) + index buffer size (again, special packet so only 1 word
69bf215546Sopenharmony_ci       * overhead)
70bf215546Sopenharmony_ci       */
71bf215546Sopenharmony_ci      *cmd_size += (3 + 3 + 2) * 4;
72bf215546Sopenharmony_ci   }
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   if (layout->indexed) {
75bf215546Sopenharmony_ci      /* userdata writes + instance count + indexed draw */
76bf215546Sopenharmony_ci      *cmd_size += (5 + 2 + 5) * 4;
77bf215546Sopenharmony_ci   } else {
78bf215546Sopenharmony_ci      /* userdata writes + instance count + non-indexed draw */
79bf215546Sopenharmony_ci      *cmd_size += (5 + 2 + 3) * 4;
80bf215546Sopenharmony_ci   }
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci   if (layout->binds_state) {
83bf215546Sopenharmony_ci      /* One PKT3_SET_CONTEXT_REG (PA_SU_SC_MODE_CNTL) */
84bf215546Sopenharmony_ci      *cmd_size += 3 * 4;
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci      if (pipeline->base.device->physical_device->rad_info.has_gfx9_scissor_bug) {
87bf215546Sopenharmony_ci         /* 1 reg write of 4 regs + 1 reg write of 2 regs per scissor */
88bf215546Sopenharmony_ci         *cmd_size += (8 + 2 * MAX_SCISSORS) * 4;
89bf215546Sopenharmony_ci      }
90bf215546Sopenharmony_ci   }
91bf215546Sopenharmony_ci}
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_cistatic uint32_t
94bf215546Sopenharmony_ciradv_align_cmdbuf_size(uint32_t size)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   return align(MAX2(1, size), 256);
97bf215546Sopenharmony_ci}
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ciuint32_t
100bf215546Sopenharmony_ciradv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info)
101bf215546Sopenharmony_ci{
102bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_indirect_command_layout, layout, cmd_info->indirectCommandsLayout);
103bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_pipeline, pipeline, cmd_info->pipeline);
104bf215546Sopenharmony_ci   struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   uint32_t cmd_size, upload_size;
107bf215546Sopenharmony_ci   radv_get_sequence_size(layout, graphics_pipeline, &cmd_size, &upload_size);
108bf215546Sopenharmony_ci   return radv_align_cmdbuf_size(cmd_size * cmd_info->sequencesCount);
109bf215546Sopenharmony_ci}
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_cienum radv_dgc_token_type {
112bf215546Sopenharmony_ci   RADV_DGC_INDEX_BUFFER,
113bf215546Sopenharmony_ci   RADV_DGC_DRAW,
114bf215546Sopenharmony_ci   RADV_DGC_INDEXED_DRAW,
115bf215546Sopenharmony_ci};
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_cistruct radv_dgc_token {
118bf215546Sopenharmony_ci   uint16_t type; /* enum radv_dgc_token_type, but making the size explicit */
119bf215546Sopenharmony_ci   uint16_t offset; /* offset in the input stream */
120bf215546Sopenharmony_ci   union {
121bf215546Sopenharmony_ci      struct {
122bf215546Sopenharmony_ci         uint16_t vtx_base_sgpr;
123bf215546Sopenharmony_ci      } draw;
124bf215546Sopenharmony_ci      struct {
125bf215546Sopenharmony_ci         uint16_t index_size;
126bf215546Sopenharmony_ci         uint16_t vtx_base_sgpr;
127bf215546Sopenharmony_ci         uint32_t max_index_count;
128bf215546Sopenharmony_ci      } indexed_draw;
129bf215546Sopenharmony_ci   };
130bf215546Sopenharmony_ci};
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_cistruct radv_dgc_params {
133bf215546Sopenharmony_ci   uint32_t cmd_buf_stride;
134bf215546Sopenharmony_ci   uint32_t cmd_buf_size;
135bf215546Sopenharmony_ci   uint32_t upload_stride;
136bf215546Sopenharmony_ci   uint32_t upload_addr;
137bf215546Sopenharmony_ci   uint32_t sequence_count;
138bf215546Sopenharmony_ci   uint32_t stream_stride;
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   /* draw info */
141bf215546Sopenharmony_ci   uint16_t draw_indexed;
142bf215546Sopenharmony_ci   uint16_t draw_params_offset;
143bf215546Sopenharmony_ci   uint16_t base_index_size;
144bf215546Sopenharmony_ci   uint16_t vtx_base_sgpr;
145bf215546Sopenharmony_ci   uint32_t max_index_count;
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci   /* bind index buffer info. Valid if base_index_size == 0 && draw_indexed */
148bf215546Sopenharmony_ci   uint16_t index_buffer_offset;
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci   /* Top bit is DGC_DYNAMIC_VERTEX_INPUT */
151bf215546Sopenharmony_ci   uint8_t vbo_cnt;
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci   uint8_t const_copy;
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   /* Which VBOs are set in this indirect layout. */
156bf215546Sopenharmony_ci   uint32_t vbo_bind_mask;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci   uint16_t vbo_reg;
159bf215546Sopenharmony_ci   uint16_t const_copy_size;
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   uint64_t push_constant_mask;
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   uint32_t ibo_type_32;
164bf215546Sopenharmony_ci   uint32_t ibo_type_8;
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   uint16_t push_constant_shader_cnt;
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   uint16_t emit_state;
169bf215546Sopenharmony_ci   uint32_t pa_su_sc_mode_cntl_base;
170bf215546Sopenharmony_ci   uint16_t state_offset;
171bf215546Sopenharmony_ci   uint16_t scissor_count;
172bf215546Sopenharmony_ci   uint16_t scissor_offset; /* in parameter buffer. */
173bf215546Sopenharmony_ci};
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_cienum {
176bf215546Sopenharmony_ci   DGC_USES_DRAWID = 1u << 14,
177bf215546Sopenharmony_ci   DGC_USES_BASEINSTANCE = 1u << 15,
178bf215546Sopenharmony_ci};
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_cienum {
181bf215546Sopenharmony_ci   DGC_DYNAMIC_STRIDE = 1u << 15,
182bf215546Sopenharmony_ci};
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_cienum {
185bf215546Sopenharmony_ci   DGC_DYNAMIC_VERTEX_INPUT = 1u << 7,
186bf215546Sopenharmony_ci};
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_cienum {
189bf215546Sopenharmony_ci   DGC_DESC_STREAM,
190bf215546Sopenharmony_ci   DGC_DESC_PREPARE,
191bf215546Sopenharmony_ci   DGC_DESC_PARAMS,
192bf215546Sopenharmony_ci   DGC_DESC_COUNT,
193bf215546Sopenharmony_ci   DGC_NUM_DESCS,
194bf215546Sopenharmony_ci};
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_cistruct dgc_cmdbuf {
197bf215546Sopenharmony_ci   nir_ssa_def *descriptor;
198bf215546Sopenharmony_ci   nir_variable *offset;
199bf215546Sopenharmony_ci};
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_cistatic void
202bf215546Sopenharmony_cidgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value)
203bf215546Sopenharmony_ci{
204bf215546Sopenharmony_ci   assert(value->bit_size >= 32);
205bf215546Sopenharmony_ci   nir_ssa_def *offset = nir_load_var(b, cs->offset);
206bf215546Sopenharmony_ci   nir_store_ssbo(b, value, cs->descriptor, offset,.access = ACCESS_NON_READABLE);
207bf215546Sopenharmony_ci   nir_store_var(b, cs->offset, nir_iadd_imm(b, offset, value->num_components * value->bit_size / 8), 0x1);
208bf215546Sopenharmony_ci}
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci#define load_param32(b, field)                                                                     \
212bf215546Sopenharmony_ci   nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0),                                         \
213bf215546Sopenharmony_ci                          .base = offsetof(struct radv_dgc_params, field), .range = 4)
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci#define load_param16(b, field)                                                                     \
216bf215546Sopenharmony_ci   nir_ubfe(                                                                                       \
217bf215546Sopenharmony_ci      (b),                                                                                         \
218bf215546Sopenharmony_ci      nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0),                                      \
219bf215546Sopenharmony_ci                             .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4),  \
220bf215546Sopenharmony_ci      nir_imm_int((b), (offsetof(struct radv_dgc_params, field) & 2) * 8), nir_imm_int((b), 16))
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci#define load_param8(b, field)                                                                      \
223bf215546Sopenharmony_ci   nir_ubfe(                                                                                       \
224bf215546Sopenharmony_ci      (b),                                                                                         \
225bf215546Sopenharmony_ci      nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0),                                      \
226bf215546Sopenharmony_ci                             .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4),  \
227bf215546Sopenharmony_ci      nir_imm_int((b), (offsetof(struct radv_dgc_params, field) & 3) * 8), nir_imm_int((b), 8))
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci#define load_param64(b, field)                                                                     \
230bf215546Sopenharmony_ci   nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0),                   \
231bf215546Sopenharmony_ci                          .base = offsetof(struct radv_dgc_params, field), .range = 8))
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_cistatic nir_ssa_def *
234bf215546Sopenharmony_cinir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len)
235bf215546Sopenharmony_ci{
236bf215546Sopenharmony_ci   len = nir_iand_imm(b, len, 0x3fff);
237bf215546Sopenharmony_ci   return nir_ior_imm(b, nir_ishl_imm(b, len, 16), PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op));
238bf215546Sopenharmony_ci}
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_cistatic void
241bf215546Sopenharmony_cidgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr,
242bf215546Sopenharmony_ci                         nir_ssa_def *first_vertex, nir_ssa_def *first_instance, nir_ssa_def *drawid)
243bf215546Sopenharmony_ci{
244bf215546Sopenharmony_ci   vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr);
245bf215546Sopenharmony_ci   nir_ssa_def *has_drawid =
246bf215546Sopenharmony_ci      nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
247bf215546Sopenharmony_ci   nir_ssa_def *has_baseinstance =
248bf215546Sopenharmony_ci      nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   nir_ssa_def *pkt_cnt = nir_imm_int(b, 1);
251bf215546Sopenharmony_ci   pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
252bf215546Sopenharmony_ci   pkt_cnt = nir_bcsel(b, has_baseinstance, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   nir_ssa_def *values[5] = {
255bf215546Sopenharmony_ci      nir_pkt3(b, PKT3_SET_SH_REG, pkt_cnt), nir_iand_imm(b, vtx_base_sgpr, 0x3FFF), first_vertex,
256bf215546Sopenharmony_ci      nir_imm_int(b, PKT3_NOP_PAD),          nir_imm_int(b, PKT3_NOP_PAD),
257bf215546Sopenharmony_ci   };
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   values[3] = nir_bcsel(b, nir_ior(b, has_drawid, has_baseinstance),
260bf215546Sopenharmony_ci                         nir_bcsel(b, has_drawid, drawid, first_instance), values[4]);
261bf215546Sopenharmony_ci   values[4] = nir_bcsel(b, nir_iand(b, has_drawid, has_baseinstance), first_instance, values[4]);
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci   dgc_emit(b, cs, nir_vec(b, values, 5));
264bf215546Sopenharmony_ci}
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_cistatic void
267bf215546Sopenharmony_cidgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *instance_count)
268bf215546Sopenharmony_ci{
269bf215546Sopenharmony_ci   nir_ssa_def *values[2] = {nir_imm_int(b, PKT3(PKT3_NUM_INSTANCES, 0, false)), instance_count};
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   dgc_emit(b, cs, nir_vec(b, values, 2));
272bf215546Sopenharmony_ci}
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_cistatic void
275bf215546Sopenharmony_cidgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset,
276bf215546Sopenharmony_ci                      nir_ssa_def *index_count, nir_ssa_def *max_index_count)
277bf215546Sopenharmony_ci{
278bf215546Sopenharmony_ci   nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)),
279bf215546Sopenharmony_ci                             max_index_count, index_offset, index_count,
280bf215546Sopenharmony_ci                             nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)};
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci   dgc_emit(b, cs, nir_vec(b, values, 5));
283bf215546Sopenharmony_ci}
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_cistatic void
286bf215546Sopenharmony_cidgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vertex_count)
287bf215546Sopenharmony_ci{
288bf215546Sopenharmony_ci   nir_ssa_def *values[3] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_AUTO, 1, false)), vertex_count,
289bf215546Sopenharmony_ci                             nir_imm_int(b, V_0287F0_DI_SRC_SEL_AUTO_INDEX)};
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci   dgc_emit(b, cs, nir_vec(b, values, 3));
292bf215546Sopenharmony_ci}
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_cistatic void
295bf215546Sopenharmony_cibuild_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count)
296bf215546Sopenharmony_ci{
297bf215546Sopenharmony_ci   nir_ssa_def *global_id = get_global_ids(b, 1);
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci   nir_ssa_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
300bf215546Sopenharmony_ci   nir_ssa_def *cmd_buf_size = load_param32(b, cmd_buf_size);
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   nir_push_if(b, nir_ieq_imm(b, global_id, 0));
303bf215546Sopenharmony_ci   {
304bf215546Sopenharmony_ci      nir_ssa_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count);
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci      nir_variable *offset =
307bf215546Sopenharmony_ci         nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset");
308bf215546Sopenharmony_ci      nir_store_var(b, offset, cmd_buf_tail_start, 0x1);
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci      nir_ssa_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE);
311bf215546Sopenharmony_ci      nir_push_loop(b);
312bf215546Sopenharmony_ci      {
313bf215546Sopenharmony_ci         nir_ssa_def *curr_offset = nir_load_var(b, offset);
314bf215546Sopenharmony_ci         const unsigned MAX_PACKET_WORDS = 0x3FFC;
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci         nir_push_if(b, nir_ieq(b, curr_offset, cmd_buf_size));
317bf215546Sopenharmony_ci         {
318bf215546Sopenharmony_ci            nir_jump(b, nir_jump_break);
319bf215546Sopenharmony_ci         }
320bf215546Sopenharmony_ci         nir_pop_if(b, NULL);
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci         nir_ssa_def *packet_size = nir_isub(b, cmd_buf_size, curr_offset);
323bf215546Sopenharmony_ci         packet_size = nir_umin(b, packet_size, nir_imm_int(b, MAX_PACKET_WORDS * 4));
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci         nir_ssa_def *len = nir_ushr_imm(b, packet_size, 2);
326bf215546Sopenharmony_ci         len = nir_iadd_imm(b, len, -2);
327bf215546Sopenharmony_ci         nir_ssa_def *packet = nir_pkt3(b, PKT3_NOP, len);
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci         nir_store_ssbo(b, packet, dst_buf, curr_offset, .access = ACCESS_NON_READABLE);
330bf215546Sopenharmony_ci         nir_store_var(b, offset, nir_iadd(b, curr_offset, packet_size), 0x1);
331bf215546Sopenharmony_ci      }
332bf215546Sopenharmony_ci      nir_pop_loop(b, NULL);
333bf215546Sopenharmony_ci   }
334bf215546Sopenharmony_ci   nir_pop_if(b, NULL);
335bf215546Sopenharmony_ci}
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_cistatic nir_shader *
338bf215546Sopenharmony_cibuild_dgc_prepare_shader(struct radv_device *dev)
339bf215546Sopenharmony_ci{
340bf215546Sopenharmony_ci   nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare");
341bf215546Sopenharmony_ci   b.shader->info.workgroup_size[0] = 64;
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci   nir_ssa_def *global_id = get_global_ids(&b, 1);
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci   nir_ssa_def *sequence_id = global_id;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   nir_ssa_def *cmd_buf_stride = load_param32(&b, cmd_buf_stride);
348bf215546Sopenharmony_ci   nir_ssa_def *sequence_count = load_param32(&b, sequence_count);
349bf215546Sopenharmony_ci   nir_ssa_def *stream_stride = load_param32(&b, stream_stride);
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci   nir_variable *count_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "sequence_count");
352bf215546Sopenharmony_ci   nir_store_var(&b, count_var, sequence_count, 0x1);
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci   nir_push_if(&b, nir_ieq_imm(&b, sequence_count, UINT32_MAX));
355bf215546Sopenharmony_ci   {
356bf215546Sopenharmony_ci      nir_ssa_def *count_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_COUNT);
357bf215546Sopenharmony_ci      nir_ssa_def *cnt = nir_load_ssbo(&b, 1, 32, count_buf, nir_imm_int(&b, 0), .align_mul = 4);
358bf215546Sopenharmony_ci      nir_store_var(&b, count_var, cnt, 0x1);
359bf215546Sopenharmony_ci   }
360bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   sequence_count = nir_load_var(&b, count_var);
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   nir_push_if(&b, nir_ult(&b, sequence_id, sequence_count));
365bf215546Sopenharmony_ci   {
366bf215546Sopenharmony_ci      struct dgc_cmdbuf cmd_buf = {
367bf215546Sopenharmony_ci         .descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE),
368bf215546Sopenharmony_ci         .offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"),
369bf215546Sopenharmony_ci      };
370bf215546Sopenharmony_ci      nir_store_var(&b, cmd_buf.offset, nir_imul(&b, global_id, cmd_buf_stride), 1);
371bf215546Sopenharmony_ci      nir_ssa_def *cmd_buf_end = nir_iadd(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_stride);
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci      nir_ssa_def *stream_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_STREAM);
374bf215546Sopenharmony_ci      nir_ssa_def *stream_base = nir_imul(&b, sequence_id, stream_stride);
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci      nir_variable *upload_offset =
377bf215546Sopenharmony_ci         nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "upload_offset");
378bf215546Sopenharmony_ci      nir_store_var(&b, upload_offset,
379bf215546Sopenharmony_ci                    nir_iadd(&b, load_param32(&b, cmd_buf_size),
380bf215546Sopenharmony_ci                             nir_imul(&b, load_param32(&b, upload_stride), sequence_id)),
381bf215546Sopenharmony_ci                    0x1);
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci      nir_ssa_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask);
384bf215546Sopenharmony_ci      nir_ssa_def *vbo_cnt = nir_iand_imm(&b, load_param8(&b, vbo_cnt), 0x7F);
385bf215546Sopenharmony_ci      nir_push_if(&b, nir_ine_imm(&b, vbo_bind_mask, 0));
386bf215546Sopenharmony_ci      {
387bf215546Sopenharmony_ci         nir_variable *vbo_idx =
388bf215546Sopenharmony_ci            nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
389bf215546Sopenharmony_ci         nir_store_var(&b, vbo_idx, nir_imm_int(&b, 0), 0x1);
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci         nir_push_loop(&b);
392bf215546Sopenharmony_ci         {
393bf215546Sopenharmony_ci            nir_push_if(&b, nir_uge(&b, nir_load_var(&b, vbo_idx), vbo_cnt));
394bf215546Sopenharmony_ci            {
395bf215546Sopenharmony_ci               nir_jump(&b, nir_jump_break);
396bf215546Sopenharmony_ci            }
397bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci            nir_ssa_def *vbo_offset = nir_imul_imm(&b, nir_load_var(&b, vbo_idx), 16);
400bf215546Sopenharmony_ci            nir_variable *vbo_data =
401bf215546Sopenharmony_ci               nir_variable_create(b.shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data");
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci            nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS);
404bf215546Sopenharmony_ci            nir_store_var(&b, vbo_data,
405bf215546Sopenharmony_ci                          nir_load_ssbo(&b, 4, 32, param_buf, vbo_offset, .align_mul = 4), 0xf);
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci            nir_ssa_def *vbo_override =
408bf215546Sopenharmony_ci               nir_ine_imm(&b,
409bf215546Sopenharmony_ci                       nir_iand(&b, vbo_bind_mask,
410bf215546Sopenharmony_ci                                nir_ishl(&b, nir_imm_int(&b, 1), nir_load_var(&b, vbo_idx))),
411bf215546Sopenharmony_ci                       0);
412bf215546Sopenharmony_ci            nir_push_if(&b, vbo_override);
413bf215546Sopenharmony_ci            {
414bf215546Sopenharmony_ci               nir_ssa_def *vbo_offset_offset =
415bf215546Sopenharmony_ci                  nir_iadd(&b, nir_imul_imm(&b, vbo_cnt, 16),
416bf215546Sopenharmony_ci                           nir_imul_imm(&b, nir_load_var(&b, vbo_idx), 8));
417bf215546Sopenharmony_ci               nir_ssa_def *vbo_over_data =
418bf215546Sopenharmony_ci                  nir_load_ssbo(&b, 2, 32, param_buf, vbo_offset_offset, .align_mul = 4);
419bf215546Sopenharmony_ci               nir_ssa_def *stream_offset = nir_iadd(
420bf215546Sopenharmony_ci                  &b, stream_base, nir_iand_imm(&b, nir_channel(&b, vbo_over_data, 0), 0x7FFF));
421bf215546Sopenharmony_ci               nir_ssa_def *stream_data =
422bf215546Sopenharmony_ci                  nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4);
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci               nir_ssa_def *va = nir_pack_64_2x32(&b, nir_channels(&b, stream_data, 0x3));
425bf215546Sopenharmony_ci               nir_ssa_def *size = nir_channel(&b, stream_data, 2);
426bf215546Sopenharmony_ci               nir_ssa_def *stride = nir_channel(&b, stream_data, 3);
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci               nir_ssa_def *vs_state_offset = nir_ubfe(&b, nir_channel(&b, vbo_over_data, 0), nir_imm_int(&b, 16), nir_imm_int(&b, 15));
429bf215546Sopenharmony_ci               va = nir_iadd(&b, va, nir_u2u64(&b, vs_state_offset));
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci               nir_ssa_def *dyn_stride = nir_test_mask(&b, nir_channel(&b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE);
432bf215546Sopenharmony_ci               nir_ssa_def *old_stride =
433bf215546Sopenharmony_ci                  nir_ubfe(&b, nir_channel(&b, nir_load_var(&b, vbo_data), 1), nir_imm_int(&b, 16),
434bf215546Sopenharmony_ci                           nir_imm_int(&b, 14));
435bf215546Sopenharmony_ci               stride = nir_bcsel(&b, dyn_stride, stride, old_stride);
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci               nir_ssa_def *use_per_attribute_vb_descs =
438bf215546Sopenharmony_ci                  nir_test_mask(&b, nir_channel(&b, vbo_over_data, 0), 1u << 31);
439bf215546Sopenharmony_ci               nir_variable *num_records = nir_variable_create(b.shader, nir_var_shader_temp,
440bf215546Sopenharmony_ci                                                               glsl_uint_type(), "num_records");
441bf215546Sopenharmony_ci               nir_store_var(&b, num_records, size, 0x1);
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci               nir_push_if(&b, use_per_attribute_vb_descs);
444bf215546Sopenharmony_ci               {
445bf215546Sopenharmony_ci                  nir_ssa_def *attrib_end = nir_ubfe(&b, nir_channel(&b, vbo_over_data, 1),
446bf215546Sopenharmony_ci                                                     nir_imm_int(&b, 16), nir_imm_int(&b, 16));
447bf215546Sopenharmony_ci                  nir_ssa_def *attrib_index_offset =
448bf215546Sopenharmony_ci                     nir_ubfe(&b, nir_channel(&b, vbo_over_data, 1), nir_imm_int(&b, 0),
449bf215546Sopenharmony_ci                              nir_imm_int(&b, 16));
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci                  nir_push_if(&b, nir_ult(&b, nir_load_var(&b, num_records), attrib_end));
452bf215546Sopenharmony_ci                  {
453bf215546Sopenharmony_ci                     nir_store_var(&b, num_records, nir_imm_int(&b, 0), 0x1);
454bf215546Sopenharmony_ci                  }
455bf215546Sopenharmony_ci                  nir_push_else(&b, NULL);
456bf215546Sopenharmony_ci                  nir_push_if(&b, nir_ieq_imm(&b, stride, 0));
457bf215546Sopenharmony_ci                  {
458bf215546Sopenharmony_ci                     nir_store_var(&b, num_records, nir_imm_int(&b, 1), 0x1);
459bf215546Sopenharmony_ci                  }
460bf215546Sopenharmony_ci                  nir_push_else(&b, NULL);
461bf215546Sopenharmony_ci                  {
462bf215546Sopenharmony_ci                     nir_ssa_def *r = nir_iadd(
463bf215546Sopenharmony_ci                        &b,
464bf215546Sopenharmony_ci                        nir_iadd_imm(
465bf215546Sopenharmony_ci                           &b,
466bf215546Sopenharmony_ci                           nir_udiv(&b, nir_isub(&b, nir_load_var(&b, num_records), attrib_end),
467bf215546Sopenharmony_ci                                    stride),
468bf215546Sopenharmony_ci                           1),
469bf215546Sopenharmony_ci                        attrib_index_offset);
470bf215546Sopenharmony_ci                     nir_store_var(&b, num_records, r, 0x1);
471bf215546Sopenharmony_ci                  }
472bf215546Sopenharmony_ci                  nir_pop_if(&b, NULL);
473bf215546Sopenharmony_ci                  nir_pop_if(&b, NULL);
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci                  nir_ssa_def *convert_cond =
476bf215546Sopenharmony_ci                     nir_ine_imm(&b, nir_load_var(&b, num_records), 0);
477bf215546Sopenharmony_ci                  if (dev->physical_device->rad_info.gfx_level == GFX9)
478bf215546Sopenharmony_ci                     convert_cond = nir_imm_bool(&b, false);
479bf215546Sopenharmony_ci                  else if (dev->physical_device->rad_info.gfx_level != GFX8)
480bf215546Sopenharmony_ci                     convert_cond =
481bf215546Sopenharmony_ci                        nir_iand(&b, convert_cond, nir_ieq_imm(&b, stride, 0));
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_ci                  nir_ssa_def *new_records = nir_iadd(
484bf215546Sopenharmony_ci                     &b, nir_imul(&b, nir_iadd_imm(&b, nir_load_var(&b, num_records), -1), stride),
485bf215546Sopenharmony_ci                     attrib_end);
486bf215546Sopenharmony_ci                  new_records =
487bf215546Sopenharmony_ci                     nir_bcsel(&b, convert_cond, new_records, nir_load_var(&b, num_records));
488bf215546Sopenharmony_ci                  nir_store_var(&b, num_records, new_records, 0x1);
489bf215546Sopenharmony_ci               }
490bf215546Sopenharmony_ci               nir_push_else(&b, NULL);
491bf215546Sopenharmony_ci               {
492bf215546Sopenharmony_ci                  if (dev->physical_device->rad_info.gfx_level != GFX8) {
493bf215546Sopenharmony_ci                     nir_push_if(&b, nir_ine_imm(&b, stride, 0));
494bf215546Sopenharmony_ci                     {
495bf215546Sopenharmony_ci                        nir_ssa_def *r = nir_iadd(&b, nir_load_var(&b, num_records),
496bf215546Sopenharmony_ci                                                  nir_iadd_imm(&b, stride, -1));
497bf215546Sopenharmony_ci                        nir_store_var(&b, num_records, nir_udiv(&b, r, stride), 0x1);
498bf215546Sopenharmony_ci                     }
499bf215546Sopenharmony_ci                     nir_pop_if(&b, NULL);
500bf215546Sopenharmony_ci                  }
501bf215546Sopenharmony_ci               }
502bf215546Sopenharmony_ci               nir_pop_if(&b, NULL);
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci               nir_ssa_def *rsrc_word3 = nir_channel(&b, nir_load_var(&b, vbo_data), 3);
505bf215546Sopenharmony_ci               if (dev->physical_device->rad_info.gfx_level >= GFX10) {
506bf215546Sopenharmony_ci                  nir_ssa_def *oob_select = nir_bcsel(
507bf215546Sopenharmony_ci                     &b, nir_ieq_imm(&b, stride, 0), nir_imm_int(&b, V_008F0C_OOB_SELECT_RAW),
508bf215546Sopenharmony_ci                     nir_imm_int(&b, V_008F0C_OOB_SELECT_STRUCTURED));
509bf215546Sopenharmony_ci                  rsrc_word3 = nir_iand_imm(&b, rsrc_word3, C_008F0C_OOB_SELECT);
510bf215546Sopenharmony_ci                  rsrc_word3 = nir_ior(&b, rsrc_word3, nir_ishl_imm(&b, oob_select, 28));
511bf215546Sopenharmony_ci               }
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci               nir_ssa_def *va_hi = nir_iand_imm(&b, nir_unpack_64_2x32_split_y(&b, va), 0xFFFF);
514bf215546Sopenharmony_ci               stride = nir_iand_imm(&b, stride, 0x3FFF);
515bf215546Sopenharmony_ci               nir_ssa_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(&b, va),
516bf215546Sopenharmony_ci                                               nir_ior(&b, nir_ishl_imm(&b, stride, 16), va_hi),
517bf215546Sopenharmony_ci                                               nir_load_var(&b, num_records), rsrc_word3};
518bf215546Sopenharmony_ci               nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf);
519bf215546Sopenharmony_ci            }
520bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci            /* On GFX9, it seems bounds checking is disabled if both
523bf215546Sopenharmony_ci             * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
524bf215546Sopenharmony_ci             * GFX10.3 but it doesn't hurt.
525bf215546Sopenharmony_ci             */
526bf215546Sopenharmony_ci            nir_ssa_def *num_records = nir_channel(&b, nir_load_var(&b, vbo_data), 2);
527bf215546Sopenharmony_ci            nir_ssa_def *buf_va = nir_iand_imm(
528bf215546Sopenharmony_ci               &b, nir_pack_64_2x32(&b, nir_channels(&b, nir_load_var(&b, vbo_data), 0x3)),
529bf215546Sopenharmony_ci               (1ull << 48) - 1ull);
530bf215546Sopenharmony_ci            nir_push_if(&b,
531bf215546Sopenharmony_ci                        nir_ior(&b, nir_ieq_imm(&b, num_records, 0), nir_ieq_imm(&b, buf_va, 0)));
532bf215546Sopenharmony_ci            {
533bf215546Sopenharmony_ci               nir_ssa_def *use_dynamic_vertex_input =
534bf215546Sopenharmony_ci                  nir_test_mask(&b, load_param8(&b, vbo_cnt), DGC_DYNAMIC_VERTEX_INPUT);
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci               nir_push_if(&b, use_dynamic_vertex_input);
537bf215546Sopenharmony_ci               {
538bf215546Sopenharmony_ci                  nir_ssa_def *new_vbo_data[4] = {
539bf215546Sopenharmony_ci                     nir_imm_int(&b, 0), nir_imm_int(&b, S_008F04_STRIDE(16)), nir_imm_int(&b, 0),
540bf215546Sopenharmony_ci                     nir_channel(&b, nir_load_var(&b, vbo_data), 3)};
541bf215546Sopenharmony_ci                  nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf);
542bf215546Sopenharmony_ci               }
543bf215546Sopenharmony_ci               nir_push_else(&b, NULL);
544bf215546Sopenharmony_ci               {
545bf215546Sopenharmony_ci                  nir_ssa_def *new_vbo_data[4] = {nir_imm_int(&b, 0), nir_imm_int(&b, 0),
546bf215546Sopenharmony_ci                                                  nir_imm_int(&b, 0), nir_imm_int(&b, 0)};
547bf215546Sopenharmony_ci                  nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf);
548bf215546Sopenharmony_ci               }
549bf215546Sopenharmony_ci               nir_pop_if(&b, NULL);
550bf215546Sopenharmony_ci            }
551bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci            nir_ssa_def *upload_off = nir_iadd(&b, nir_load_var(&b, upload_offset), vbo_offset);
554bf215546Sopenharmony_ci            nir_store_ssbo(&b, nir_load_var(&b, vbo_data), cmd_buf.descriptor, upload_off, .access = ACCESS_NON_READABLE);
555bf215546Sopenharmony_ci            nir_store_var(&b, vbo_idx, nir_iadd_imm(&b, nir_load_var(&b, vbo_idx), 1), 0x1);
556bf215546Sopenharmony_ci         }
557bf215546Sopenharmony_ci         nir_pop_loop(&b, NULL);
558bf215546Sopenharmony_ci         nir_ssa_def *packet[3] = {
559bf215546Sopenharmony_ci            nir_imm_int(&b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(&b, vbo_reg),
560bf215546Sopenharmony_ci            nir_iadd(&b, load_param32(&b, upload_addr), nir_load_var(&b, upload_offset))};
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci         dgc_emit(&b, &cmd_buf, nir_vec(&b, packet, 3));
563bf215546Sopenharmony_ci
564bf215546Sopenharmony_ci         nir_store_var(&b, upload_offset,
565bf215546Sopenharmony_ci                       nir_iadd(&b, nir_load_var(&b, upload_offset), nir_imul_imm(&b, vbo_cnt, 16)),
566bf215546Sopenharmony_ci                       0x1);
567bf215546Sopenharmony_ci      }
568bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci      nir_ssa_def *push_const_mask = load_param64(&b, push_constant_mask);
572bf215546Sopenharmony_ci      nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0));
573bf215546Sopenharmony_ci      {
574bf215546Sopenharmony_ci         nir_ssa_def *const_copy = nir_ine_imm(&b, load_param8(&b, const_copy), 0);
575bf215546Sopenharmony_ci         nir_ssa_def *const_copy_size = load_param16(&b, const_copy_size);
576bf215546Sopenharmony_ci         nir_ssa_def *const_copy_words = nir_ushr_imm(&b, const_copy_size, 2);
577bf215546Sopenharmony_ci         const_copy_words = nir_bcsel(&b, const_copy, const_copy_words, nir_imm_int(&b, 0));
578bf215546Sopenharmony_ci
579bf215546Sopenharmony_ci         nir_variable *idx =
580bf215546Sopenharmony_ci            nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx");
581bf215546Sopenharmony_ci         nir_store_var(&b, idx, nir_imm_int(&b, 0), 0x1);
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci         nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS);
584bf215546Sopenharmony_ci         nir_ssa_def *param_offset = nir_imul_imm(&b, vbo_cnt, 24);
585bf215546Sopenharmony_ci         nir_ssa_def *param_offset_offset = nir_iadd_imm(&b, param_offset, MESA_VULKAN_SHADER_STAGES * 12);
586bf215546Sopenharmony_ci         nir_ssa_def *param_const_offset = nir_iadd_imm(&b, param_offset, MAX_PUSH_CONSTANTS_SIZE + MESA_VULKAN_SHADER_STAGES * 12);
587bf215546Sopenharmony_ci         nir_push_loop(&b);
588bf215546Sopenharmony_ci         {
589bf215546Sopenharmony_ci            nir_ssa_def *cur_idx = nir_load_var(&b, idx);
590bf215546Sopenharmony_ci            nir_push_if(&b, nir_uge(&b, cur_idx, const_copy_words));
591bf215546Sopenharmony_ci            {
592bf215546Sopenharmony_ci               nir_jump(&b, nir_jump_break);
593bf215546Sopenharmony_ci            }
594bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci            nir_variable *data = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci            nir_ssa_def *update = nir_iand(&b, push_const_mask, nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx));
599bf215546Sopenharmony_ci            update = nir_bcsel(
600bf215546Sopenharmony_ci               &b, nir_ult(&b, cur_idx, nir_imm_int(&b, 64 /* bits in push_const_mask */)), update,
601bf215546Sopenharmony_ci               nir_imm_int64(&b, 0));
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci            nir_push_if(&b, nir_ine_imm(&b, update, 0));
604bf215546Sopenharmony_ci            {
605bf215546Sopenharmony_ci               nir_ssa_def *stream_offset = nir_load_ssbo(
606bf215546Sopenharmony_ci                  &b, 1, 32, param_buf,
607bf215546Sopenharmony_ci                  nir_iadd(&b, param_offset_offset, nir_ishl_imm(&b, cur_idx, 2)), .align_mul = 4);
608bf215546Sopenharmony_ci               nir_ssa_def *new_data = nir_load_ssbo(&b, 1, 32, stream_buf, nir_iadd(&b, stream_base, stream_offset), .align_mul = 4);
609bf215546Sopenharmony_ci               nir_store_var(&b, data, new_data, 0x1);
610bf215546Sopenharmony_ci            }
611bf215546Sopenharmony_ci            nir_push_else(&b, NULL);
612bf215546Sopenharmony_ci            {
613bf215546Sopenharmony_ci               nir_store_var(
614bf215546Sopenharmony_ci                  &b, data,
615bf215546Sopenharmony_ci                  nir_load_ssbo(&b, 1, 32, param_buf,
616bf215546Sopenharmony_ci                                nir_iadd(&b, param_const_offset, nir_ishl_imm(&b, cur_idx, 2)),
617bf215546Sopenharmony_ci                                .align_mul = 4),
618bf215546Sopenharmony_ci                  0x1);
619bf215546Sopenharmony_ci            }
620bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci            nir_store_ssbo(
623bf215546Sopenharmony_ci               &b, nir_load_var(&b, data), cmd_buf.descriptor,
624bf215546Sopenharmony_ci               nir_iadd(&b, nir_load_var(&b, upload_offset), nir_ishl_imm(&b, cur_idx, 2)),
625bf215546Sopenharmony_ci               .access = ACCESS_NON_READABLE);
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_ci            nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1);
628bf215546Sopenharmony_ci         }
629bf215546Sopenharmony_ci         nir_pop_loop(&b, NULL);
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_ci         nir_variable *shader_idx =
632bf215546Sopenharmony_ci            nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
633bf215546Sopenharmony_ci         nir_store_var(&b, shader_idx, nir_imm_int(&b, 0), 0x1);
634bf215546Sopenharmony_ci         nir_ssa_def *shader_cnt = load_param16(&b, push_constant_shader_cnt);
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci         nir_push_loop(&b);
637bf215546Sopenharmony_ci         {
638bf215546Sopenharmony_ci            nir_ssa_def *cur_shader_idx = nir_load_var(&b, shader_idx);
639bf215546Sopenharmony_ci            nir_push_if(&b, nir_uge(&b, cur_shader_idx, shader_cnt));
640bf215546Sopenharmony_ci            {
641bf215546Sopenharmony_ci               nir_jump(&b, nir_jump_break);
642bf215546Sopenharmony_ci            }
643bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci            nir_ssa_def *reg_info = nir_load_ssbo(&b, 3, 32, param_buf, nir_iadd(&b, param_offset, nir_imul_imm(&b, cur_shader_idx, 12)), .align_mul = 4);
646bf215546Sopenharmony_ci            nir_ssa_def *upload_sgpr = nir_ubfe(&b, nir_channel(&b, reg_info, 0), nir_imm_int(&b, 0), nir_imm_int(&b, 16));
647bf215546Sopenharmony_ci            nir_ssa_def *inline_sgpr = nir_ubfe(&b, nir_channel(&b, reg_info, 0), nir_imm_int(&b, 16), nir_imm_int(&b, 16));
648bf215546Sopenharmony_ci            nir_ssa_def *inline_mask = nir_pack_64_2x32(&b, nir_channels(&b, reg_info, 0x6));
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci            nir_push_if(&b, nir_ine_imm(&b, upload_sgpr, 0));
651bf215546Sopenharmony_ci            {
652bf215546Sopenharmony_ci               nir_ssa_def *pkt[3] = {
653bf215546Sopenharmony_ci                  nir_imm_int(&b, PKT3(PKT3_SET_SH_REG, 1, 0)),
654bf215546Sopenharmony_ci                  upload_sgpr,
655bf215546Sopenharmony_ci                  nir_iadd(&b, load_param32(&b, upload_addr), nir_load_var(&b, upload_offset))
656bf215546Sopenharmony_ci               };
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_ci               dgc_emit(&b, &cmd_buf, nir_vec(&b, pkt, 3));
659bf215546Sopenharmony_ci            }
660bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci            nir_push_if(&b, nir_ine_imm(&b, inline_sgpr, 0));
663bf215546Sopenharmony_ci            {
664bf215546Sopenharmony_ci               nir_ssa_def *inline_len = nir_bit_count(&b, inline_mask);
665bf215546Sopenharmony_ci               nir_store_var(&b, idx, nir_imm_int(&b, 0), 0x1);
666bf215546Sopenharmony_ci
667bf215546Sopenharmony_ci               nir_ssa_def *pkt[2] = {
668bf215546Sopenharmony_ci                  nir_pkt3(&b, PKT3_SET_SH_REG, inline_len),
669bf215546Sopenharmony_ci                  inline_sgpr
670bf215546Sopenharmony_ci               };
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci               dgc_emit(&b, &cmd_buf, nir_vec(&b, pkt, 2));
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_ci               nir_push_loop(&b);
675bf215546Sopenharmony_ci               {
676bf215546Sopenharmony_ci                  nir_ssa_def *cur_idx = nir_load_var(&b, idx);
677bf215546Sopenharmony_ci                  nir_push_if(&b,
678bf215546Sopenharmony_ci                              nir_uge(&b, cur_idx, nir_imm_int(&b, 64 /* bits in inline_mask */)));
679bf215546Sopenharmony_ci                  {
680bf215546Sopenharmony_ci                     nir_jump(&b, nir_jump_break);
681bf215546Sopenharmony_ci                  }
682bf215546Sopenharmony_ci                  nir_pop_if(&b, NULL);
683bf215546Sopenharmony_ci
684bf215546Sopenharmony_ci                  nir_ssa_def *l = nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx);
685bf215546Sopenharmony_ci                  nir_push_if(&b, nir_ieq_imm(&b, nir_iand(&b, l, inline_mask), 0));
686bf215546Sopenharmony_ci                  {
687bf215546Sopenharmony_ci                     nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1);
688bf215546Sopenharmony_ci                     nir_jump(&b, nir_jump_continue);
689bf215546Sopenharmony_ci                  }
690bf215546Sopenharmony_ci                  nir_pop_if(&b, NULL);
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci                  nir_variable *data = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_ci                  nir_ssa_def *update = nir_iand(&b, push_const_mask, nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx));
695bf215546Sopenharmony_ci                  update = nir_bcsel(
696bf215546Sopenharmony_ci                     &b, nir_ult(&b, cur_idx, nir_imm_int(&b, 64 /* bits in push_const_mask */)),
697bf215546Sopenharmony_ci                     update, nir_imm_int64(&b, 0));
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci                  nir_push_if(&b, nir_ine_imm(&b, update, 0));
700bf215546Sopenharmony_ci                  {
701bf215546Sopenharmony_ci                     nir_ssa_def *stream_offset = nir_load_ssbo(
702bf215546Sopenharmony_ci                        &b, 1, 32, param_buf,
703bf215546Sopenharmony_ci                        nir_iadd(&b, param_offset_offset, nir_ishl_imm(&b, cur_idx, 2)),
704bf215546Sopenharmony_ci                        .align_mul = 4);
705bf215546Sopenharmony_ci                     nir_ssa_def *new_data = nir_load_ssbo(&b, 1, 32, stream_buf, nir_iadd(&b, stream_base, stream_offset), .align_mul = 4);
706bf215546Sopenharmony_ci                     nir_store_var(&b, data, new_data, 0x1);
707bf215546Sopenharmony_ci                  }
708bf215546Sopenharmony_ci                  nir_push_else(&b, NULL);
709bf215546Sopenharmony_ci                  {
710bf215546Sopenharmony_ci                     nir_store_var(&b, data,
711bf215546Sopenharmony_ci                                   nir_load_ssbo(&b, 1, 32, param_buf,
712bf215546Sopenharmony_ci                                                 nir_iadd(&b, param_const_offset,
713bf215546Sopenharmony_ci                                                          nir_ishl_imm(&b, cur_idx, 2)),
714bf215546Sopenharmony_ci                                                 .align_mul = 4),
715bf215546Sopenharmony_ci                                   0x1);
716bf215546Sopenharmony_ci                  }
717bf215546Sopenharmony_ci                  nir_pop_if(&b, NULL);
718bf215546Sopenharmony_ci
719bf215546Sopenharmony_ci                  dgc_emit(&b, &cmd_buf, nir_load_var(&b, data));
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci                  nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1);
722bf215546Sopenharmony_ci               }
723bf215546Sopenharmony_ci               nir_pop_loop(&b, NULL);
724bf215546Sopenharmony_ci            }
725bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
726bf215546Sopenharmony_ci            nir_store_var(&b, shader_idx, nir_iadd_imm(&b, cur_shader_idx, 1), 0x1);
727bf215546Sopenharmony_ci         }
728bf215546Sopenharmony_ci         nir_pop_loop(&b, NULL);
729bf215546Sopenharmony_ci      }
730bf215546Sopenharmony_ci      nir_pop_if(&b, 0);
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci      nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, emit_state), 1));
733bf215546Sopenharmony_ci      {
734bf215546Sopenharmony_ci         nir_ssa_def *stream_offset = nir_iadd(&b, load_param16(&b, state_offset), stream_base);
735bf215546Sopenharmony_ci         nir_ssa_def *state = nir_load_ssbo(&b, 1, 32, stream_buf, stream_offset, .align_mul = 4);
736bf215546Sopenharmony_ci         state = nir_iand_imm(&b, state, 1);
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci         nir_ssa_def *reg =
739bf215546Sopenharmony_ci            nir_ior(&b, load_param32(&b, pa_su_sc_mode_cntl_base), nir_ishl_imm(&b, state, 2));
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci         nir_ssa_def *cmd_values[3] = {
742bf215546Sopenharmony_ci            nir_imm_int(&b, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)),
743bf215546Sopenharmony_ci            nir_imm_int(&b, (R_028814_PA_SU_SC_MODE_CNTL - SI_CONTEXT_REG_OFFSET) >> 2), reg};
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_ci         dgc_emit(&b, &cmd_buf, nir_vec(&b, cmd_values, 3));
746bf215546Sopenharmony_ci      }
747bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_ci      nir_ssa_def *scissor_count = load_param16(&b, scissor_count);
750bf215546Sopenharmony_ci      nir_push_if(&b, nir_ine_imm(&b, scissor_count, 0));
751bf215546Sopenharmony_ci      {
752bf215546Sopenharmony_ci         nir_ssa_def *scissor_offset = load_param16(&b, scissor_offset);
753bf215546Sopenharmony_ci         nir_variable *idx = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(),
754bf215546Sopenharmony_ci                                                 "scissor_copy_idx");
755bf215546Sopenharmony_ci         nir_store_var(&b, idx, nir_imm_int(&b, 0), 1);
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci         nir_push_loop(&b);
758bf215546Sopenharmony_ci         {
759bf215546Sopenharmony_ci            nir_ssa_def *cur_idx = nir_load_var(&b, idx);
760bf215546Sopenharmony_ci            nir_push_if(&b, nir_uge(&b, cur_idx, scissor_count));
761bf215546Sopenharmony_ci            {
762bf215546Sopenharmony_ci               nir_jump(&b, nir_jump_break);
763bf215546Sopenharmony_ci            }
764bf215546Sopenharmony_ci            nir_pop_if(&b, NULL);
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci            nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS);
767bf215546Sopenharmony_ci            nir_ssa_def *param_offset = nir_iadd(&b, scissor_offset, nir_imul_imm(&b, cur_idx, 4));
768bf215546Sopenharmony_ci            nir_ssa_def *value = nir_load_ssbo(&b, 1, 32, param_buf, param_offset, .align_mul = 4);
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci            dgc_emit(&b, &cmd_buf, value);
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci            nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 1);
773bf215546Sopenharmony_ci         }
774bf215546Sopenharmony_ci         nir_pop_loop(&b, NULL);
775bf215546Sopenharmony_ci      }
776bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci      nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, draw_indexed), 0));
779bf215546Sopenharmony_ci      {
780bf215546Sopenharmony_ci         nir_ssa_def *vtx_base_sgpr = load_param16(&b, vtx_base_sgpr);
781bf215546Sopenharmony_ci         nir_ssa_def *stream_offset =
782bf215546Sopenharmony_ci            nir_iadd(&b, load_param16(&b, draw_params_offset), stream_base);
783bf215546Sopenharmony_ci
784bf215546Sopenharmony_ci         nir_ssa_def *draw_data0 =
785bf215546Sopenharmony_ci            nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4);
786bf215546Sopenharmony_ci         nir_ssa_def *vertex_count = nir_channel(&b, draw_data0, 0);
787bf215546Sopenharmony_ci         nir_ssa_def *instance_count = nir_channel(&b, draw_data0, 1);
788bf215546Sopenharmony_ci         nir_ssa_def *vertex_offset = nir_channel(&b, draw_data0, 2);
789bf215546Sopenharmony_ci         nir_ssa_def *first_instance = nir_channel(&b, draw_data0, 3);
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci         nir_push_if(&b, nir_iand(&b, nir_ine_imm(&b, vertex_count, 0), nir_ine_imm(&b, instance_count, 0)));
792bf215546Sopenharmony_ci         {
793bf215546Sopenharmony_ci            dgc_emit_userdata_vertex(&b, &cmd_buf, vtx_base_sgpr, vertex_offset, first_instance, sequence_id);
794bf215546Sopenharmony_ci            dgc_emit_instance_count(&b, &cmd_buf, instance_count);
795bf215546Sopenharmony_ci            dgc_emit_draw(&b, &cmd_buf, vertex_count);
796bf215546Sopenharmony_ci         }
797bf215546Sopenharmony_ci         nir_pop_if(&b, 0);
798bf215546Sopenharmony_ci      }
799bf215546Sopenharmony_ci      nir_push_else(&b, NULL);
800bf215546Sopenharmony_ci      {
801bf215546Sopenharmony_ci         nir_variable *index_size_var =
802bf215546Sopenharmony_ci            nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "index_size");
803bf215546Sopenharmony_ci         nir_store_var(&b, index_size_var, load_param16(&b, base_index_size), 0x1);
804bf215546Sopenharmony_ci         nir_variable *max_index_count_var =
805bf215546Sopenharmony_ci            nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "max_index_count");
806bf215546Sopenharmony_ci         nir_store_var(&b, max_index_count_var, load_param32(&b, max_index_count), 0x1);
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci         nir_ssa_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0);
809bf215546Sopenharmony_ci         nir_push_if(&b, bind_index_buffer);
810bf215546Sopenharmony_ci         {
811bf215546Sopenharmony_ci            nir_ssa_def *index_stream_offset =
812bf215546Sopenharmony_ci               nir_iadd(&b, load_param16(&b, index_buffer_offset), stream_base);
813bf215546Sopenharmony_ci            nir_ssa_def *data =
814bf215546Sopenharmony_ci               nir_load_ssbo(&b, 4, 32, stream_buf, index_stream_offset, .align_mul = 4);
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci            nir_ssa_def *vk_index_type = nir_channel(&b, data, 3);
817bf215546Sopenharmony_ci            nir_ssa_def *index_type = nir_bcsel(
818bf215546Sopenharmony_ci               &b, nir_ieq(&b, vk_index_type, load_param32(&b, ibo_type_32)),
819bf215546Sopenharmony_ci               nir_imm_int(&b, V_028A7C_VGT_INDEX_32), nir_imm_int(&b, V_028A7C_VGT_INDEX_16));
820bf215546Sopenharmony_ci            index_type = nir_bcsel(&b, nir_ieq(&b, vk_index_type, load_param32(&b, ibo_type_8)),
821bf215546Sopenharmony_ci                                   nir_imm_int(&b, V_028A7C_VGT_INDEX_8), index_type);
822bf215546Sopenharmony_ci
823bf215546Sopenharmony_ci            nir_ssa_def *index_size = nir_iand_imm(
824bf215546Sopenharmony_ci               &b, nir_ushr(&b, nir_imm_int(&b, 0x142), nir_imul_imm(&b, index_type, 4)), 0xf);
825bf215546Sopenharmony_ci            nir_store_var(&b, index_size_var, index_size, 0x1);
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci            nir_ssa_def *max_index_count = nir_udiv(&b, nir_channel(&b, data, 2), index_size);
828bf215546Sopenharmony_ci            nir_store_var(&b, max_index_count_var, max_index_count, 0x1);
829bf215546Sopenharmony_ci
830bf215546Sopenharmony_ci            nir_ssa_def *cmd_values[3 + 2 + 3];
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci            if (dev->physical_device->rad_info.gfx_level >= GFX9) {
833bf215546Sopenharmony_ci               unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
834bf215546Sopenharmony_ci               if (dev->physical_device->rad_info.gfx_level < GFX9 ||
835bf215546Sopenharmony_ci                   (dev->physical_device->rad_info.gfx_level == GFX9 &&
836bf215546Sopenharmony_ci                    dev->physical_device->rad_info.me_fw_version < 26))
837bf215546Sopenharmony_ci                  opcode = PKT3_SET_UCONFIG_REG;
838bf215546Sopenharmony_ci               cmd_values[0] = nir_imm_int(&b, PKT3(opcode, 1, 0));
839bf215546Sopenharmony_ci               cmd_values[1] = nir_imm_int(
840bf215546Sopenharmony_ci                  &b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28));
841bf215546Sopenharmony_ci               cmd_values[2] = index_type;
842bf215546Sopenharmony_ci            } else {
843bf215546Sopenharmony_ci               cmd_values[0] = nir_imm_int(&b, PKT3(PKT3_INDEX_TYPE, 0, 0));
844bf215546Sopenharmony_ci               cmd_values[1] = index_type;
845bf215546Sopenharmony_ci               cmd_values[2] = nir_imm_int(&b, PKT3_NOP_PAD);
846bf215546Sopenharmony_ci            }
847bf215546Sopenharmony_ci
848bf215546Sopenharmony_ci            nir_ssa_def *addr_upper = nir_channel(&b, data, 1);
849bf215546Sopenharmony_ci            addr_upper = nir_ishr_imm(&b, nir_ishl_imm(&b, addr_upper, 16), 16);
850bf215546Sopenharmony_ci
851bf215546Sopenharmony_ci            cmd_values[3] = nir_imm_int(&b, PKT3(PKT3_INDEX_BASE, 1, 0));
852bf215546Sopenharmony_ci            cmd_values[4] = nir_channel(&b, data, 0);
853bf215546Sopenharmony_ci            cmd_values[5] = addr_upper;
854bf215546Sopenharmony_ci            cmd_values[6] = nir_imm_int(&b, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
855bf215546Sopenharmony_ci            cmd_values[7] = max_index_count;
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci            dgc_emit(&b, &cmd_buf, nir_vec(&b, cmd_values, 8));
858bf215546Sopenharmony_ci         }
859bf215546Sopenharmony_ci         nir_pop_if(&b, NULL);
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_ci         nir_ssa_def *index_size = nir_load_var(&b, index_size_var);
862bf215546Sopenharmony_ci         nir_ssa_def *max_index_count = nir_load_var(&b, max_index_count_var);
863bf215546Sopenharmony_ci         nir_ssa_def *vtx_base_sgpr = load_param16(&b, vtx_base_sgpr);
864bf215546Sopenharmony_ci         nir_ssa_def *stream_offset =
865bf215546Sopenharmony_ci            nir_iadd(&b, load_param16(&b, draw_params_offset), stream_base);
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci         index_size =
868bf215546Sopenharmony_ci            nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size);
869bf215546Sopenharmony_ci         max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var),
870bf215546Sopenharmony_ci                                     max_index_count);
871bf215546Sopenharmony_ci         nir_ssa_def *draw_data0 =
872bf215546Sopenharmony_ci            nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4);
873bf215546Sopenharmony_ci         nir_ssa_def *draw_data1 = nir_load_ssbo(
874bf215546Sopenharmony_ci            &b, 1, 32, stream_buf, nir_iadd_imm(&b, stream_offset, 16), .align_mul = 4);
875bf215546Sopenharmony_ci         nir_ssa_def *index_count = nir_channel(&b, draw_data0, 0);
876bf215546Sopenharmony_ci         nir_ssa_def *instance_count = nir_channel(&b, draw_data0, 1);
877bf215546Sopenharmony_ci         nir_ssa_def *first_index = nir_channel(&b, draw_data0, 2);
878bf215546Sopenharmony_ci         nir_ssa_def *vertex_offset = nir_channel(&b, draw_data0, 3);
879bf215546Sopenharmony_ci         nir_ssa_def *first_instance = nir_channel(&b, draw_data1, 0);
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_ci         nir_push_if(&b, nir_iand(&b, nir_ine_imm(&b, index_count, 0), nir_ine_imm(&b, instance_count, 0)));
882bf215546Sopenharmony_ci         {
883bf215546Sopenharmony_ci            dgc_emit_userdata_vertex(&b, &cmd_buf, vtx_base_sgpr, vertex_offset, first_instance, sequence_id);
884bf215546Sopenharmony_ci            dgc_emit_instance_count(&b, &cmd_buf, instance_count);
885bf215546Sopenharmony_ci            dgc_emit_draw_indexed(&b, &cmd_buf, first_index, index_count,
886bf215546Sopenharmony_ci                                       max_index_count);
887bf215546Sopenharmony_ci         }
888bf215546Sopenharmony_ci         nir_pop_if(&b, 0);
889bf215546Sopenharmony_ci      }
890bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
891bf215546Sopenharmony_ci
892bf215546Sopenharmony_ci      /* Pad the cmdbuffer if we did not use the whole stride */
893bf215546Sopenharmony_ci      nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end));
894bf215546Sopenharmony_ci      {
895bf215546Sopenharmony_ci         nir_ssa_def *cnt = nir_isub(&b, cmd_buf_end, nir_load_var(&b, cmd_buf.offset));
896bf215546Sopenharmony_ci         cnt = nir_ushr_imm(&b, cnt, 2);
897bf215546Sopenharmony_ci         cnt = nir_iadd_imm(&b, cnt, -2);
898bf215546Sopenharmony_ci         nir_ssa_def *pkt = nir_pkt3(&b, PKT3_NOP, cnt);
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci         dgc_emit(&b, &cmd_buf, pkt);
901bf215546Sopenharmony_ci      }
902bf215546Sopenharmony_ci      nir_pop_if(&b, NULL);
903bf215546Sopenharmony_ci   }
904bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci   build_dgc_buffer_tail(&b, sequence_count);
907bf215546Sopenharmony_ci   return b.shader;
908bf215546Sopenharmony_ci}
909bf215546Sopenharmony_ci
910bf215546Sopenharmony_civoid
911bf215546Sopenharmony_ciradv_device_finish_dgc_prepare_state(struct radv_device *device)
912bf215546Sopenharmony_ci{
913bf215546Sopenharmony_ci   radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.dgc_prepare.pipeline,
914bf215546Sopenharmony_ci                        &device->meta_state.alloc);
915bf215546Sopenharmony_ci   radv_DestroyPipelineLayout(radv_device_to_handle(device),
916bf215546Sopenharmony_ci                              device->meta_state.dgc_prepare.p_layout, &device->meta_state.alloc);
917bf215546Sopenharmony_ci   device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
918bf215546Sopenharmony_ci                                                        device->meta_state.dgc_prepare.ds_layout,
919bf215546Sopenharmony_ci                                                        &device->meta_state.alloc);
920bf215546Sopenharmony_ci}
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ciVkResult
923bf215546Sopenharmony_ciradv_device_init_dgc_prepare_state(struct radv_device *device)
924bf215546Sopenharmony_ci{
925bf215546Sopenharmony_ci   VkResult result;
926bf215546Sopenharmony_ci   nir_shader *cs = build_dgc_prepare_shader(device);
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_ci   VkDescriptorSetLayoutCreateInfo ds_create_info = {
929bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
930bf215546Sopenharmony_ci      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
931bf215546Sopenharmony_ci      .bindingCount = DGC_NUM_DESCS,
932bf215546Sopenharmony_ci      .pBindings = (VkDescriptorSetLayoutBinding[]){
933bf215546Sopenharmony_ci         {.binding = DGC_DESC_STREAM,
934bf215546Sopenharmony_ci          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
935bf215546Sopenharmony_ci          .descriptorCount = 1,
936bf215546Sopenharmony_ci          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
937bf215546Sopenharmony_ci          .pImmutableSamplers = NULL},
938bf215546Sopenharmony_ci         {.binding = DGC_DESC_PREPARE,
939bf215546Sopenharmony_ci          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
940bf215546Sopenharmony_ci          .descriptorCount = 1,
941bf215546Sopenharmony_ci          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
942bf215546Sopenharmony_ci          .pImmutableSamplers = NULL},
943bf215546Sopenharmony_ci         {.binding = DGC_DESC_PARAMS,
944bf215546Sopenharmony_ci          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
945bf215546Sopenharmony_ci          .descriptorCount = 1,
946bf215546Sopenharmony_ci          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
947bf215546Sopenharmony_ci          .pImmutableSamplers = NULL},
948bf215546Sopenharmony_ci         {.binding = DGC_DESC_COUNT,
949bf215546Sopenharmony_ci          .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
950bf215546Sopenharmony_ci          .descriptorCount = 1,
951bf215546Sopenharmony_ci          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
952bf215546Sopenharmony_ci          .pImmutableSamplers = NULL},
953bf215546Sopenharmony_ci      }};
954bf215546Sopenharmony_ci
955bf215546Sopenharmony_ci   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
956bf215546Sopenharmony_ci                                           &device->meta_state.alloc,
957bf215546Sopenharmony_ci                                           &device->meta_state.dgc_prepare.ds_layout);
958bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
959bf215546Sopenharmony_ci      goto cleanup;
960bf215546Sopenharmony_ci
961bf215546Sopenharmony_ci   const VkPipelineLayoutCreateInfo leaf_pl_create_info = {
962bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
963bf215546Sopenharmony_ci      .setLayoutCount = 1,
964bf215546Sopenharmony_ci      .pSetLayouts = &device->meta_state.dgc_prepare.ds_layout,
965bf215546Sopenharmony_ci      .pushConstantRangeCount = 1,
966bf215546Sopenharmony_ci      .pPushConstantRanges =
967bf215546Sopenharmony_ci         &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct radv_dgc_params)},
968bf215546Sopenharmony_ci   };
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info,
971bf215546Sopenharmony_ci                                      &device->meta_state.alloc,
972bf215546Sopenharmony_ci                                      &device->meta_state.dgc_prepare.p_layout);
973bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
974bf215546Sopenharmony_ci      goto cleanup;
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci   VkPipelineShaderStageCreateInfo shader_stage = {
977bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
978bf215546Sopenharmony_ci      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
979bf215546Sopenharmony_ci      .module = vk_shader_module_handle_from_nir(cs),
980bf215546Sopenharmony_ci      .pName = "main",
981bf215546Sopenharmony_ci      .pSpecializationInfo = NULL,
982bf215546Sopenharmony_ci   };
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci   VkComputePipelineCreateInfo pipeline_info = {
985bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
986bf215546Sopenharmony_ci      .stage = shader_stage,
987bf215546Sopenharmony_ci      .flags = 0,
988bf215546Sopenharmony_ci      .layout = device->meta_state.dgc_prepare.p_layout,
989bf215546Sopenharmony_ci   };
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci   result = radv_CreateComputePipelines(
992bf215546Sopenharmony_ci      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
993bf215546Sopenharmony_ci      &pipeline_info, &device->meta_state.alloc, &device->meta_state.dgc_prepare.pipeline);
994bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
995bf215546Sopenharmony_ci      goto cleanup;
996bf215546Sopenharmony_ci
997bf215546Sopenharmony_cicleanup:
998bf215546Sopenharmony_ci   ralloc_free(cs);
999bf215546Sopenharmony_ci   return result;
1000bf215546Sopenharmony_ci}
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_ciVkResult
1003bf215546Sopenharmony_ciradv_CreateIndirectCommandsLayoutNV(VkDevice _device,
1004bf215546Sopenharmony_ci                                    const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo,
1005bf215546Sopenharmony_ci                                    const VkAllocationCallbacks *pAllocator,
1006bf215546Sopenharmony_ci                                    VkIndirectCommandsLayoutNV *pIndirectCommandsLayout)
1007bf215546Sopenharmony_ci{
1008bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
1009bf215546Sopenharmony_ci   struct radv_indirect_command_layout *layout;
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_ci   size_t size =
1012bf215546Sopenharmony_ci      sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV);
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci   layout =
1015bf215546Sopenharmony_ci      vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout),
1016bf215546Sopenharmony_ci                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1017bf215546Sopenharmony_ci   if (!layout)
1018bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_ci   vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV);
1021bf215546Sopenharmony_ci
1022bf215546Sopenharmony_ci   layout->input_stride = pCreateInfo->pStreamStrides[0];
1023bf215546Sopenharmony_ci   layout->token_count = pCreateInfo->tokenCount;
1024bf215546Sopenharmony_ci   typed_memcpy(layout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount);
1025bf215546Sopenharmony_ci
1026bf215546Sopenharmony_ci   layout->ibo_type_32 = VK_INDEX_TYPE_UINT32;
1027bf215546Sopenharmony_ci   layout->ibo_type_8 = VK_INDEX_TYPE_UINT8_EXT;
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCreateInfo->tokenCount; ++i) {
1030bf215546Sopenharmony_ci      switch (pCreateInfo->pTokens[i].tokenType) {
1031bf215546Sopenharmony_ci      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV:
1032bf215546Sopenharmony_ci         layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
1033bf215546Sopenharmony_ci         break;
1034bf215546Sopenharmony_ci      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV:
1035bf215546Sopenharmony_ci         layout->indexed = true;
1036bf215546Sopenharmony_ci         layout->draw_params_offset = pCreateInfo->pTokens[i].offset;
1037bf215546Sopenharmony_ci         break;
1038bf215546Sopenharmony_ci      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
1039bf215546Sopenharmony_ci         layout->binds_index_buffer = true;
1040bf215546Sopenharmony_ci         layout->index_buffer_offset = pCreateInfo->pTokens[i].offset;
1041bf215546Sopenharmony_ci         /* 16-bit is implied if we find no match. */
1042bf215546Sopenharmony_ci         for (unsigned j = 0; j < pCreateInfo->pTokens[i].indexTypeCount; j++) {
1043bf215546Sopenharmony_ci            if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT32)
1044bf215546Sopenharmony_ci               layout->ibo_type_32 = pCreateInfo->pTokens[i].pIndexTypeValues[j];
1045bf215546Sopenharmony_ci            else if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT8_EXT)
1046bf215546Sopenharmony_ci               layout->ibo_type_8 = pCreateInfo->pTokens[i].pIndexTypeValues[j];
1047bf215546Sopenharmony_ci         }
1048bf215546Sopenharmony_ci         break;
1049bf215546Sopenharmony_ci      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
1050bf215546Sopenharmony_ci         layout->bind_vbo_mask |= 1u << pCreateInfo->pTokens[i].vertexBindingUnit;
1051bf215546Sopenharmony_ci         layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] =
1052bf215546Sopenharmony_ci            pCreateInfo->pTokens[i].offset;
1053bf215546Sopenharmony_ci         if (pCreateInfo->pTokens[i].vertexDynamicStride)
1054bf215546Sopenharmony_ci            layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] |= DGC_DYNAMIC_STRIDE;
1055bf215546Sopenharmony_ci         break;
1056bf215546Sopenharmony_ci      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
1057bf215546Sopenharmony_ci         for (unsigned j = pCreateInfo->pTokens[i].pushconstantOffset / 4, k = 0;
1058bf215546Sopenharmony_ci              k < pCreateInfo->pTokens[i].pushconstantSize / 4; ++j, ++k) {
1059bf215546Sopenharmony_ci            layout->push_constant_mask |= 1ull << j;
1060bf215546Sopenharmony_ci            layout->push_constant_offsets[j] = pCreateInfo->pTokens[i].offset + k * 4;
1061bf215546Sopenharmony_ci         }
1062bf215546Sopenharmony_ci         break;
1063bf215546Sopenharmony_ci      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV:
1064bf215546Sopenharmony_ci         layout->binds_state = true;
1065bf215546Sopenharmony_ci         layout->state_offset = pCreateInfo->pTokens[i].offset;
1066bf215546Sopenharmony_ci         break;
1067bf215546Sopenharmony_ci      default:
1068bf215546Sopenharmony_ci         unreachable("Unhandled token type");
1069bf215546Sopenharmony_ci      }
1070bf215546Sopenharmony_ci   }
1071bf215546Sopenharmony_ci   if (!layout->indexed)
1072bf215546Sopenharmony_ci      layout->binds_index_buffer = false;
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci   *pIndirectCommandsLayout = radv_indirect_command_layout_to_handle(layout);
1075bf215546Sopenharmony_ci   return VK_SUCCESS;
1076bf215546Sopenharmony_ci}
1077bf215546Sopenharmony_ci
1078bf215546Sopenharmony_civoid
1079bf215546Sopenharmony_ciradv_DestroyIndirectCommandsLayoutNV(VkDevice _device,
1080bf215546Sopenharmony_ci                                     VkIndirectCommandsLayoutNV indirectCommandsLayout,
1081bf215546Sopenharmony_ci                                     const VkAllocationCallbacks *pAllocator)
1082bf215546Sopenharmony_ci{
1083bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
1084bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_indirect_command_layout, layout, indirectCommandsLayout);
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci   if (!layout)
1087bf215546Sopenharmony_ci      return;
1088bf215546Sopenharmony_ci
1089bf215546Sopenharmony_ci   vk_object_base_finish(&layout->base);
1090bf215546Sopenharmony_ci   vk_free2(&device->vk.alloc, pAllocator, layout);
1091bf215546Sopenharmony_ci}
1092bf215546Sopenharmony_ci
1093bf215546Sopenharmony_civoid
1094bf215546Sopenharmony_ciradv_GetGeneratedCommandsMemoryRequirementsNV(
1095bf215546Sopenharmony_ci   VkDevice _device, const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo,
1096bf215546Sopenharmony_ci   VkMemoryRequirements2 *pMemoryRequirements)
1097bf215546Sopenharmony_ci{
1098bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
1099bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout);
1100bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline);
1101bf215546Sopenharmony_ci   struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1102bf215546Sopenharmony_ci
1103bf215546Sopenharmony_ci   uint32_t cmd_stride, upload_stride;
1104bf215546Sopenharmony_ci   radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride);
1105bf215546Sopenharmony_ci
1106bf215546Sopenharmony_ci   VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pInfo->maxSequencesCount);
1107bf215546Sopenharmony_ci   VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
1108bf215546Sopenharmony_ci
1109bf215546Sopenharmony_ci   pMemoryRequirements->memoryRequirements.memoryTypeBits =
1110bf215546Sopenharmony_ci      device->physical_device->memory_types_32bit;
1111bf215546Sopenharmony_ci   pMemoryRequirements->memoryRequirements.alignment = 256;
1112bf215546Sopenharmony_ci   pMemoryRequirements->memoryRequirements.size =
1113bf215546Sopenharmony_ci      align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
1114bf215546Sopenharmony_ci}
1115bf215546Sopenharmony_ci
1116bf215546Sopenharmony_civoid
1117bf215546Sopenharmony_ciradv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer,
1118bf215546Sopenharmony_ci                                      const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
1119bf215546Sopenharmony_ci{
1120bf215546Sopenharmony_ci   /* Can't do anything here as we depend on some dynamic state in some cases that we only know
1121bf215546Sopenharmony_ci    * at draw time. */
1122bf215546Sopenharmony_ci}
1123bf215546Sopenharmony_ci
1124bf215546Sopenharmony_ci/* Always need to call this directly before draw due to dependence on bound state. */
1125bf215546Sopenharmony_civoid
1126bf215546Sopenharmony_ciradv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer,
1127bf215546Sopenharmony_ci                 const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
1128bf215546Sopenharmony_ci{
1129bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_indirect_command_layout, layout,
1130bf215546Sopenharmony_ci                  pGeneratedCommandsInfo->indirectCommandsLayout);
1131bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
1132bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
1133bf215546Sopenharmony_ci   struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
1134bf215546Sopenharmony_ci   struct radv_meta_saved_state saved_state;
1135bf215546Sopenharmony_ci   struct radv_buffer token_buffer;
1136bf215546Sopenharmony_ci
1137bf215546Sopenharmony_ci   uint32_t cmd_stride, upload_stride;
1138bf215546Sopenharmony_ci   radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride);
1139bf215546Sopenharmony_ci
1140bf215546Sopenharmony_ci   unsigned cmd_buf_size =
1141bf215546Sopenharmony_ci      radv_align_cmdbuf_size(cmd_stride * pGeneratedCommandsInfo->sequencesCount);
1142bf215546Sopenharmony_ci
1143bf215546Sopenharmony_ci   unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(graphics_pipeline->vb_desc_usage_mask) * 24 : 0;
1144bf215546Sopenharmony_ci   unsigned const_size = graphics_pipeline->base.push_constant_size +
1145bf215546Sopenharmony_ci                         16 * graphics_pipeline->base.dynamic_offset_count +
1146bf215546Sopenharmony_ci                         sizeof(layout->push_constant_offsets) + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12;
1147bf215546Sopenharmony_ci   if (!layout->push_constant_mask)
1148bf215546Sopenharmony_ci      const_size = 0;
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_ci   unsigned scissor_size = (8 + 2 * cmd_buffer->state.dynamic.scissor.count) * 4;
1151bf215546Sopenharmony_ci   if (!layout->binds_state || !cmd_buffer->state.dynamic.scissor.count ||
1152bf215546Sopenharmony_ci       !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
1153bf215546Sopenharmony_ci      scissor_size = 0;
1154bf215546Sopenharmony_ci
1155bf215546Sopenharmony_ci   unsigned upload_size = MAX2(vb_size + const_size + scissor_size, 16);
1156bf215546Sopenharmony_ci
1157bf215546Sopenharmony_ci   void *upload_data;
1158bf215546Sopenharmony_ci   unsigned upload_offset;
1159bf215546Sopenharmony_ci   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, upload_size, &upload_offset, &upload_data)) {
1160bf215546Sopenharmony_ci      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1161bf215546Sopenharmony_ci      return;
1162bf215546Sopenharmony_ci   }
1163bf215546Sopenharmony_ci
1164bf215546Sopenharmony_ci   void *upload_data_base = upload_data;
1165bf215546Sopenharmony_ci
1166bf215546Sopenharmony_ci   radv_buffer_init(&token_buffer, cmd_buffer->device, cmd_buffer->upload.upload_bo, upload_size,
1167bf215546Sopenharmony_ci                    upload_offset);
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ci   uint64_t upload_addr = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset +
1170bf215546Sopenharmony_ci                          pGeneratedCommandsInfo->preprocessOffset;
1171bf215546Sopenharmony_ci
1172bf215546Sopenharmony_ci   uint16_t vtx_base_sgpr =
1173bf215546Sopenharmony_ci      (cmd_buffer->state.graphics_pipeline->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2;
1174bf215546Sopenharmony_ci   if (cmd_buffer->state.graphics_pipeline->uses_drawid)
1175bf215546Sopenharmony_ci      vtx_base_sgpr |= DGC_USES_DRAWID;
1176bf215546Sopenharmony_ci   if (cmd_buffer->state.graphics_pipeline->uses_baseinstance)
1177bf215546Sopenharmony_ci      vtx_base_sgpr |= DGC_USES_BASEINSTANCE;
1178bf215546Sopenharmony_ci
1179bf215546Sopenharmony_ci   uint16_t vbo_sgpr =
1180bf215546Sopenharmony_ci      ((radv_lookup_user_sgpr(&graphics_pipeline->base, MESA_SHADER_VERTEX, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 +
1181bf215546Sopenharmony_ci        graphics_pipeline->base.user_data_0[MESA_SHADER_VERTEX]) -
1182bf215546Sopenharmony_ci       SI_SH_REG_OFFSET) >>
1183bf215546Sopenharmony_ci      2;
1184bf215546Sopenharmony_ci   struct radv_dgc_params params = {
1185bf215546Sopenharmony_ci      .cmd_buf_stride = cmd_stride,
1186bf215546Sopenharmony_ci      .cmd_buf_size = cmd_buf_size,
1187bf215546Sopenharmony_ci      .upload_addr = (uint32_t)upload_addr,
1188bf215546Sopenharmony_ci      .upload_stride = upload_stride,
1189bf215546Sopenharmony_ci      .sequence_count = pGeneratedCommandsInfo->sequencesCount,
1190bf215546Sopenharmony_ci      .stream_stride = layout->input_stride,
1191bf215546Sopenharmony_ci      .draw_indexed = layout->indexed,
1192bf215546Sopenharmony_ci      .draw_params_offset = layout->draw_params_offset,
1193bf215546Sopenharmony_ci      .base_index_size =
1194bf215546Sopenharmony_ci         layout->binds_index_buffer ? 0 : radv_get_vgt_index_size(cmd_buffer->state.index_type),
1195bf215546Sopenharmony_ci      .vtx_base_sgpr = vtx_base_sgpr,
1196bf215546Sopenharmony_ci      .max_index_count = cmd_buffer->state.max_index_count,
1197bf215546Sopenharmony_ci      .index_buffer_offset = layout->index_buffer_offset,
1198bf215546Sopenharmony_ci      .vbo_reg = vbo_sgpr,
1199bf215546Sopenharmony_ci      .ibo_type_32 = layout->ibo_type_32,
1200bf215546Sopenharmony_ci      .ibo_type_8 = layout->ibo_type_8,
1201bf215546Sopenharmony_ci      .emit_state = layout->binds_state,
1202bf215546Sopenharmony_ci      .pa_su_sc_mode_cntl_base = radv_get_pa_su_sc_mode_cntl(cmd_buffer) & C_028814_FACE,
1203bf215546Sopenharmony_ci      .state_offset = layout->state_offset,
1204bf215546Sopenharmony_ci   };
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_ci   if (layout->bind_vbo_mask) {
1207bf215546Sopenharmony_ci      radv_write_vertex_descriptors(cmd_buffer, graphics_pipeline, true, upload_data);
1208bf215546Sopenharmony_ci
1209bf215546Sopenharmony_ci      uint32_t *vbo_info = (uint32_t *)((char *)upload_data + graphics_pipeline->vb_desc_alloc_size);
1210bf215546Sopenharmony_ci
1211bf215546Sopenharmony_ci      struct radv_shader *vs_shader = radv_get_shader(&graphics_pipeline->base, MESA_SHADER_VERTEX);
1212bf215546Sopenharmony_ci      const struct radv_vs_input_state *vs_state =
1213bf215546Sopenharmony_ci         vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL;
1214bf215546Sopenharmony_ci      uint32_t mask = graphics_pipeline->vb_desc_usage_mask;
1215bf215546Sopenharmony_ci      unsigned idx = 0;
1216bf215546Sopenharmony_ci      while (mask) {
1217bf215546Sopenharmony_ci         unsigned i = u_bit_scan(&mask);
1218bf215546Sopenharmony_ci         unsigned binding =
1219bf215546Sopenharmony_ci            vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i]
1220bf215546Sopenharmony_ci                     : (graphics_pipeline->use_per_attribute_vb_descs ? graphics_pipeline->attrib_bindings[i] : i);
1221bf215546Sopenharmony_ci         uint32_t attrib_end =
1222bf215546Sopenharmony_ci            vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : graphics_pipeline->attrib_ends[i];
1223bf215546Sopenharmony_ci
1224bf215546Sopenharmony_ci         params.vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx;
1225bf215546Sopenharmony_ci         vbo_info[2 * idx] = ((graphics_pipeline->use_per_attribute_vb_descs ? 1u : 0u) << 31) |
1226bf215546Sopenharmony_ci                             (vs_state ? vs_state->offsets[i] << 16 : 0) |
1227bf215546Sopenharmony_ci                             layout->vbo_offsets[binding];
1228bf215546Sopenharmony_ci         vbo_info[2 * idx + 1] = graphics_pipeline->attrib_index_offset[i] | (attrib_end << 16);
1229bf215546Sopenharmony_ci         ++idx;
1230bf215546Sopenharmony_ci      }
1231bf215546Sopenharmony_ci      params.vbo_cnt = idx | (vs_state ? DGC_DYNAMIC_VERTEX_INPUT : 0);
1232bf215546Sopenharmony_ci      upload_data = (char *)upload_data + vb_size;
1233bf215546Sopenharmony_ci   }
1234bf215546Sopenharmony_ci
1235bf215546Sopenharmony_ci   if (layout->push_constant_mask) {
1236bf215546Sopenharmony_ci      uint32_t *desc = upload_data;
1237bf215546Sopenharmony_ci      upload_data = (char *)upload_data + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12;
1238bf215546Sopenharmony_ci
1239bf215546Sopenharmony_ci      unsigned idx = 0;
1240bf215546Sopenharmony_ci      for (unsigned i = 0; i < ARRAY_SIZE(graphics_pipeline->base.shaders); ++i) {
1241bf215546Sopenharmony_ci         if (!graphics_pipeline->base.shaders[i])
1242bf215546Sopenharmony_ci            continue;
1243bf215546Sopenharmony_ci
1244bf215546Sopenharmony_ci         struct radv_userdata_locations *locs = &graphics_pipeline->base.shaders[i]->info.user_sgprs_locs;
1245bf215546Sopenharmony_ci         if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0)
1246bf215546Sopenharmony_ci            params.const_copy = 1;
1247bf215546Sopenharmony_ci
1248bf215546Sopenharmony_ci         if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 ||
1249bf215546Sopenharmony_ci             locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
1250bf215546Sopenharmony_ci            unsigned upload_sgpr = 0;
1251bf215546Sopenharmony_ci            unsigned inline_sgpr = 0;
1252bf215546Sopenharmony_ci
1253bf215546Sopenharmony_ci            if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) {
1254bf215546Sopenharmony_ci               upload_sgpr =
1255bf215546Sopenharmony_ci                  (graphics_pipeline->base.user_data_0[i] + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx -
1256bf215546Sopenharmony_ci                   SI_SH_REG_OFFSET) >>
1257bf215546Sopenharmony_ci                  2;
1258bf215546Sopenharmony_ci            }
1259bf215546Sopenharmony_ci
1260bf215546Sopenharmony_ci            if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) {
1261bf215546Sopenharmony_ci               inline_sgpr = (graphics_pipeline->base.user_data_0[i] +
1262bf215546Sopenharmony_ci                              4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx -
1263bf215546Sopenharmony_ci                              SI_SH_REG_OFFSET) >>
1264bf215546Sopenharmony_ci                             2;
1265bf215546Sopenharmony_ci               desc[idx * 3 + 1] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask;
1266bf215546Sopenharmony_ci               desc[idx * 3 + 2] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask >> 32;
1267bf215546Sopenharmony_ci            }
1268bf215546Sopenharmony_ci            desc[idx * 3] = upload_sgpr | (inline_sgpr << 16);
1269bf215546Sopenharmony_ci            ++idx;
1270bf215546Sopenharmony_ci         }
1271bf215546Sopenharmony_ci      }
1272bf215546Sopenharmony_ci
1273bf215546Sopenharmony_ci      params.push_constant_shader_cnt = idx;
1274bf215546Sopenharmony_ci
1275bf215546Sopenharmony_ci      params.const_copy_size = graphics_pipeline->base.push_constant_size +
1276bf215546Sopenharmony_ci                               16 * graphics_pipeline->base.dynamic_offset_count;
1277bf215546Sopenharmony_ci      params.push_constant_mask = layout->push_constant_mask;
1278bf215546Sopenharmony_ci
1279bf215546Sopenharmony_ci      memcpy(upload_data, layout->push_constant_offsets, sizeof(layout->push_constant_offsets));
1280bf215546Sopenharmony_ci      upload_data = (char *)upload_data + sizeof(layout->push_constant_offsets);
1281bf215546Sopenharmony_ci
1282bf215546Sopenharmony_ci      memcpy(upload_data, cmd_buffer->push_constants, graphics_pipeline->base.push_constant_size);
1283bf215546Sopenharmony_ci      upload_data = (char *)upload_data + graphics_pipeline->base.push_constant_size;
1284bf215546Sopenharmony_ci
1285bf215546Sopenharmony_ci      struct radv_descriptor_state *descriptors_state =
1286bf215546Sopenharmony_ci         radv_get_descriptors_state(cmd_buffer, pGeneratedCommandsInfo->pipelineBindPoint);
1287bf215546Sopenharmony_ci      memcpy(upload_data, descriptors_state->dynamic_buffers, 16 * graphics_pipeline->base.dynamic_offset_count);
1288bf215546Sopenharmony_ci      upload_data = (char *)upload_data + 16 * graphics_pipeline->base.dynamic_offset_count;
1289bf215546Sopenharmony_ci   }
1290bf215546Sopenharmony_ci
1291bf215546Sopenharmony_ci   if (scissor_size) {
1292bf215546Sopenharmony_ci      params.scissor_offset = (char*)upload_data - (char*)upload_data_base;
1293bf215546Sopenharmony_ci      params.scissor_count = scissor_size / 4;
1294bf215546Sopenharmony_ci
1295bf215546Sopenharmony_ci      struct radeon_cmdbuf scissor_cs = {
1296bf215546Sopenharmony_ci         .buf = upload_data,
1297bf215546Sopenharmony_ci         .cdw = 0,
1298bf215546Sopenharmony_ci         .max_dw = scissor_size / 4
1299bf215546Sopenharmony_ci      };
1300bf215546Sopenharmony_ci
1301bf215546Sopenharmony_ci      radv_write_scissors(cmd_buffer, &scissor_cs);
1302bf215546Sopenharmony_ci      assert(scissor_cs.cdw * 4 == scissor_size);
1303bf215546Sopenharmony_ci      upload_data = (char *)upload_data + scissor_size;
1304bf215546Sopenharmony_ci   }
1305bf215546Sopenharmony_ci
1306bf215546Sopenharmony_ci   VkWriteDescriptorSet ds_writes[5];
1307bf215546Sopenharmony_ci   VkDescriptorBufferInfo buf_info[ARRAY_SIZE(ds_writes)];
1308bf215546Sopenharmony_ci   int ds_cnt = 0;
1309bf215546Sopenharmony_ci   buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&token_buffer),
1310bf215546Sopenharmony_ci                                               .offset = 0,
1311bf215546Sopenharmony_ci                                               .range = upload_size};
1312bf215546Sopenharmony_ci   ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1313bf215546Sopenharmony_ci                                              .dstBinding = DGC_DESC_PARAMS,
1314bf215546Sopenharmony_ci                                              .dstArrayElement = 0,
1315bf215546Sopenharmony_ci                                              .descriptorCount = 1,
1316bf215546Sopenharmony_ci                                              .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1317bf215546Sopenharmony_ci                                              .pBufferInfo = &buf_info[ds_cnt]};
1318bf215546Sopenharmony_ci   ++ds_cnt;
1319bf215546Sopenharmony_ci
1320bf215546Sopenharmony_ci   buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->preprocessBuffer,
1321bf215546Sopenharmony_ci                                               .offset = pGeneratedCommandsInfo->preprocessOffset,
1322bf215546Sopenharmony_ci                                               .range = pGeneratedCommandsInfo->preprocessSize};
1323bf215546Sopenharmony_ci   ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1324bf215546Sopenharmony_ci                                              .dstBinding = DGC_DESC_PREPARE,
1325bf215546Sopenharmony_ci                                              .dstArrayElement = 0,
1326bf215546Sopenharmony_ci                                              .descriptorCount = 1,
1327bf215546Sopenharmony_ci                                              .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1328bf215546Sopenharmony_ci                                              .pBufferInfo = &buf_info[ds_cnt]};
1329bf215546Sopenharmony_ci   ++ds_cnt;
1330bf215546Sopenharmony_ci
1331bf215546Sopenharmony_ci   if (pGeneratedCommandsInfo->streamCount > 0) {
1332bf215546Sopenharmony_ci      buf_info[ds_cnt] =
1333bf215546Sopenharmony_ci         (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->pStreams[0].buffer,
1334bf215546Sopenharmony_ci                                  .offset = pGeneratedCommandsInfo->pStreams[0].offset,
1335bf215546Sopenharmony_ci                                  .range = VK_WHOLE_SIZE};
1336bf215546Sopenharmony_ci      ds_writes[ds_cnt] =
1337bf215546Sopenharmony_ci         (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1338bf215546Sopenharmony_ci                                .dstBinding = DGC_DESC_STREAM,
1339bf215546Sopenharmony_ci                                .dstArrayElement = 0,
1340bf215546Sopenharmony_ci                                .descriptorCount = 1,
1341bf215546Sopenharmony_ci                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1342bf215546Sopenharmony_ci                                .pBufferInfo = &buf_info[ds_cnt]};
1343bf215546Sopenharmony_ci      ++ds_cnt;
1344bf215546Sopenharmony_ci   }
1345bf215546Sopenharmony_ci
1346bf215546Sopenharmony_ci   if (pGeneratedCommandsInfo->sequencesCountBuffer != VK_NULL_HANDLE) {
1347bf215546Sopenharmony_ci      buf_info[ds_cnt] =
1348bf215546Sopenharmony_ci         (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->sequencesCountBuffer,
1349bf215546Sopenharmony_ci                                  .offset = pGeneratedCommandsInfo->sequencesCountOffset,
1350bf215546Sopenharmony_ci                                  .range = VK_WHOLE_SIZE};
1351bf215546Sopenharmony_ci      ds_writes[ds_cnt] =
1352bf215546Sopenharmony_ci         (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1353bf215546Sopenharmony_ci                                .dstBinding = DGC_DESC_COUNT,
1354bf215546Sopenharmony_ci                                .dstArrayElement = 0,
1355bf215546Sopenharmony_ci                                .descriptorCount = 1,
1356bf215546Sopenharmony_ci                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1357bf215546Sopenharmony_ci                                .pBufferInfo = &buf_info[ds_cnt]};
1358bf215546Sopenharmony_ci      ++ds_cnt;
1359bf215546Sopenharmony_ci      params.sequence_count = UINT32_MAX;
1360bf215546Sopenharmony_ci   }
1361bf215546Sopenharmony_ci
1362bf215546Sopenharmony_ci   radv_meta_save(
1363bf215546Sopenharmony_ci      &saved_state, cmd_buffer,
1364bf215546Sopenharmony_ci      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1365bf215546Sopenharmony_ci
1366bf215546Sopenharmony_ci   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1367bf215546Sopenharmony_ci                        cmd_buffer->device->meta_state.dgc_prepare.pipeline);
1368bf215546Sopenharmony_ci
1369bf215546Sopenharmony_ci   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1370bf215546Sopenharmony_ci                         cmd_buffer->device->meta_state.dgc_prepare.p_layout,
1371bf215546Sopenharmony_ci                         VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), &params);
1372bf215546Sopenharmony_ci
1373bf215546Sopenharmony_ci   radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1374bf215546Sopenharmony_ci                                 cmd_buffer->device->meta_state.dgc_prepare.p_layout, 0, ds_cnt,
1375bf215546Sopenharmony_ci                                 ds_writes);
1376bf215546Sopenharmony_ci
1377bf215546Sopenharmony_ci   unsigned block_count = MAX2(1, round_up_u32(pGeneratedCommandsInfo->sequencesCount, 64));
1378bf215546Sopenharmony_ci   radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
1379bf215546Sopenharmony_ci
1380bf215546Sopenharmony_ci   radv_buffer_finish(&token_buffer);
1381bf215546Sopenharmony_ci   radv_meta_restore(&saved_state, cmd_buffer);
1382bf215546Sopenharmony_ci
1383bf215546Sopenharmony_ci   cmd_buffer->state.flush_bits |=
1384bf215546Sopenharmony_ci      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
1385bf215546Sopenharmony_ci}