1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Google 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "radv_meta.h" 25bf215546Sopenharmony_ci#include "radv_private.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "nir_builder.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_cistatic void 30bf215546Sopenharmony_ciradv_get_sequence_size(const struct radv_indirect_command_layout *layout, 31bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, uint32_t *cmd_size, 32bf215546Sopenharmony_ci uint32_t *upload_size) 33bf215546Sopenharmony_ci{ 34bf215546Sopenharmony_ci *cmd_size = 0; 35bf215546Sopenharmony_ci *upload_size = 0; 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci if (layout->bind_vbo_mask) { 38bf215546Sopenharmony_ci *upload_size += 16 * util_bitcount(pipeline->vb_desc_usage_mask); 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci /* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */ 41bf215546Sopenharmony_ci *cmd_size += 3 * 4; 42bf215546Sopenharmony_ci } 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci if (layout->push_constant_mask) { 45bf215546Sopenharmony_ci bool need_copy = false; 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(pipeline->base.shaders); ++i) { 48bf215546Sopenharmony_ci if (!pipeline->base.shaders[i]) 49bf215546Sopenharmony_ci continue; 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci struct radv_userdata_locations *locs = &pipeline->base.shaders[i]->info.user_sgprs_locs; 52bf215546Sopenharmony_ci if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) { 53bf215546Sopenharmony_ci /* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */ 54bf215546Sopenharmony_ci *cmd_size += 3 * 4; 55bf215546Sopenharmony_ci need_copy = true; 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) 58bf215546Sopenharmony_ci /* One PKT3_SET_SH_REG writing all inline push constants. */ 59bf215546Sopenharmony_ci *cmd_size += (2 + locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].num_sgprs) * 4; 60bf215546Sopenharmony_ci } 61bf215546Sopenharmony_ci if (need_copy) 62bf215546Sopenharmony_ci *upload_size += 63bf215546Sopenharmony_ci align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16); 64bf215546Sopenharmony_ci } 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci if (layout->binds_index_buffer) { 67bf215546Sopenharmony_ci /* Index type write (normal reg write) + index buffer base write (64-bits, but special packet 68bf215546Sopenharmony_ci * so only 1 word overhead) + index buffer size (again, special packet so only 1 word 69bf215546Sopenharmony_ci * overhead) 70bf215546Sopenharmony_ci */ 71bf215546Sopenharmony_ci *cmd_size += (3 + 3 + 2) * 4; 72bf215546Sopenharmony_ci } 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci if (layout->indexed) { 75bf215546Sopenharmony_ci /* userdata writes + instance count + indexed draw */ 76bf215546Sopenharmony_ci *cmd_size += (5 + 2 + 5) * 4; 77bf215546Sopenharmony_ci } else { 78bf215546Sopenharmony_ci /* userdata writes + instance count + non-indexed draw */ 79bf215546Sopenharmony_ci *cmd_size += (5 + 2 + 3) * 4; 80bf215546Sopenharmony_ci } 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci if (layout->binds_state) { 83bf215546Sopenharmony_ci /* One PKT3_SET_CONTEXT_REG (PA_SU_SC_MODE_CNTL) */ 84bf215546Sopenharmony_ci *cmd_size += 3 * 4; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci if (pipeline->base.device->physical_device->rad_info.has_gfx9_scissor_bug) { 87bf215546Sopenharmony_ci /* 1 reg write of 4 regs + 1 reg write of 2 regs per scissor */ 88bf215546Sopenharmony_ci *cmd_size += (8 + 2 * MAX_SCISSORS) * 4; 89bf215546Sopenharmony_ci } 90bf215546Sopenharmony_ci } 91bf215546Sopenharmony_ci} 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_cistatic uint32_t 94bf215546Sopenharmony_ciradv_align_cmdbuf_size(uint32_t size) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci return align(MAX2(1, size), 256); 97bf215546Sopenharmony_ci} 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ciuint32_t 100bf215546Sopenharmony_ciradv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info) 101bf215546Sopenharmony_ci{ 102bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_indirect_command_layout, layout, cmd_info->indirectCommandsLayout); 103bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_pipeline, pipeline, cmd_info->pipeline); 104bf215546Sopenharmony_ci struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci uint32_t cmd_size, upload_size; 107bf215546Sopenharmony_ci radv_get_sequence_size(layout, graphics_pipeline, &cmd_size, &upload_size); 108bf215546Sopenharmony_ci return radv_align_cmdbuf_size(cmd_size * cmd_info->sequencesCount); 109bf215546Sopenharmony_ci} 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_cienum radv_dgc_token_type { 112bf215546Sopenharmony_ci RADV_DGC_INDEX_BUFFER, 113bf215546Sopenharmony_ci RADV_DGC_DRAW, 114bf215546Sopenharmony_ci RADV_DGC_INDEXED_DRAW, 115bf215546Sopenharmony_ci}; 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_cistruct radv_dgc_token { 118bf215546Sopenharmony_ci uint16_t type; /* enum radv_dgc_token_type, but making the size explicit */ 119bf215546Sopenharmony_ci uint16_t offset; /* offset in the input stream */ 120bf215546Sopenharmony_ci union { 121bf215546Sopenharmony_ci struct { 122bf215546Sopenharmony_ci uint16_t vtx_base_sgpr; 123bf215546Sopenharmony_ci } draw; 124bf215546Sopenharmony_ci struct { 125bf215546Sopenharmony_ci uint16_t index_size; 126bf215546Sopenharmony_ci uint16_t vtx_base_sgpr; 127bf215546Sopenharmony_ci uint32_t max_index_count; 128bf215546Sopenharmony_ci } indexed_draw; 129bf215546Sopenharmony_ci }; 130bf215546Sopenharmony_ci}; 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_cistruct radv_dgc_params { 133bf215546Sopenharmony_ci uint32_t cmd_buf_stride; 134bf215546Sopenharmony_ci uint32_t cmd_buf_size; 135bf215546Sopenharmony_ci uint32_t upload_stride; 136bf215546Sopenharmony_ci uint32_t upload_addr; 137bf215546Sopenharmony_ci uint32_t sequence_count; 138bf215546Sopenharmony_ci uint32_t stream_stride; 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci /* draw info */ 141bf215546Sopenharmony_ci uint16_t draw_indexed; 142bf215546Sopenharmony_ci uint16_t draw_params_offset; 143bf215546Sopenharmony_ci uint16_t base_index_size; 144bf215546Sopenharmony_ci uint16_t vtx_base_sgpr; 145bf215546Sopenharmony_ci uint32_t max_index_count; 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci /* bind index buffer info. Valid if base_index_size == 0 && draw_indexed */ 148bf215546Sopenharmony_ci uint16_t index_buffer_offset; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci /* Top bit is DGC_DYNAMIC_VERTEX_INPUT */ 151bf215546Sopenharmony_ci uint8_t vbo_cnt; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci uint8_t const_copy; 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci /* Which VBOs are set in this indirect layout. */ 156bf215546Sopenharmony_ci uint32_t vbo_bind_mask; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci uint16_t vbo_reg; 159bf215546Sopenharmony_ci uint16_t const_copy_size; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci uint64_t push_constant_mask; 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci uint32_t ibo_type_32; 164bf215546Sopenharmony_ci uint32_t ibo_type_8; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci uint16_t push_constant_shader_cnt; 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci uint16_t emit_state; 169bf215546Sopenharmony_ci uint32_t pa_su_sc_mode_cntl_base; 170bf215546Sopenharmony_ci uint16_t state_offset; 171bf215546Sopenharmony_ci uint16_t scissor_count; 172bf215546Sopenharmony_ci uint16_t scissor_offset; /* in parameter buffer. */ 173bf215546Sopenharmony_ci}; 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cienum { 176bf215546Sopenharmony_ci DGC_USES_DRAWID = 1u << 14, 177bf215546Sopenharmony_ci DGC_USES_BASEINSTANCE = 1u << 15, 178bf215546Sopenharmony_ci}; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_cienum { 181bf215546Sopenharmony_ci DGC_DYNAMIC_STRIDE = 1u << 15, 182bf215546Sopenharmony_ci}; 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_cienum { 185bf215546Sopenharmony_ci DGC_DYNAMIC_VERTEX_INPUT = 1u << 7, 186bf215546Sopenharmony_ci}; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_cienum { 189bf215546Sopenharmony_ci DGC_DESC_STREAM, 190bf215546Sopenharmony_ci DGC_DESC_PREPARE, 191bf215546Sopenharmony_ci DGC_DESC_PARAMS, 192bf215546Sopenharmony_ci DGC_DESC_COUNT, 193bf215546Sopenharmony_ci DGC_NUM_DESCS, 194bf215546Sopenharmony_ci}; 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_cistruct dgc_cmdbuf { 197bf215546Sopenharmony_ci nir_ssa_def *descriptor; 198bf215546Sopenharmony_ci nir_variable *offset; 199bf215546Sopenharmony_ci}; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_cistatic void 202bf215546Sopenharmony_cidgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value) 203bf215546Sopenharmony_ci{ 204bf215546Sopenharmony_ci assert(value->bit_size >= 32); 205bf215546Sopenharmony_ci nir_ssa_def *offset = nir_load_var(b, cs->offset); 206bf215546Sopenharmony_ci nir_store_ssbo(b, value, cs->descriptor, offset,.access = ACCESS_NON_READABLE); 207bf215546Sopenharmony_ci nir_store_var(b, cs->offset, nir_iadd_imm(b, offset, value->num_components * value->bit_size / 8), 0x1); 208bf215546Sopenharmony_ci} 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci#define load_param32(b, field) \ 212bf215546Sopenharmony_ci nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ 213bf215546Sopenharmony_ci .base = offsetof(struct radv_dgc_params, field), .range = 4) 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci#define load_param16(b, field) \ 216bf215546Sopenharmony_ci nir_ubfe( \ 217bf215546Sopenharmony_ci (b), \ 218bf215546Sopenharmony_ci nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ 219bf215546Sopenharmony_ci .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \ 220bf215546Sopenharmony_ci nir_imm_int((b), (offsetof(struct radv_dgc_params, field) & 2) * 8), nir_imm_int((b), 16)) 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci#define load_param8(b, field) \ 223bf215546Sopenharmony_ci nir_ubfe( \ 224bf215546Sopenharmony_ci (b), \ 225bf215546Sopenharmony_ci nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ 226bf215546Sopenharmony_ci .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \ 227bf215546Sopenharmony_ci nir_imm_int((b), (offsetof(struct radv_dgc_params, field) & 3) * 8), nir_imm_int((b), 8)) 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci#define load_param64(b, field) \ 230bf215546Sopenharmony_ci nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \ 231bf215546Sopenharmony_ci .base = offsetof(struct radv_dgc_params, field), .range = 8)) 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_cistatic nir_ssa_def * 234bf215546Sopenharmony_cinir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len) 235bf215546Sopenharmony_ci{ 236bf215546Sopenharmony_ci len = nir_iand_imm(b, len, 0x3fff); 237bf215546Sopenharmony_ci return nir_ior_imm(b, nir_ishl_imm(b, len, 16), PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op)); 238bf215546Sopenharmony_ci} 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_cistatic void 241bf215546Sopenharmony_cidgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr, 242bf215546Sopenharmony_ci nir_ssa_def *first_vertex, nir_ssa_def *first_instance, nir_ssa_def *drawid) 243bf215546Sopenharmony_ci{ 244bf215546Sopenharmony_ci vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr); 245bf215546Sopenharmony_ci nir_ssa_def *has_drawid = 246bf215546Sopenharmony_ci nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID); 247bf215546Sopenharmony_ci nir_ssa_def *has_baseinstance = 248bf215546Sopenharmony_ci nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE); 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci nir_ssa_def *pkt_cnt = nir_imm_int(b, 1); 251bf215546Sopenharmony_ci pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt); 252bf215546Sopenharmony_ci pkt_cnt = nir_bcsel(b, has_baseinstance, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci nir_ssa_def *values[5] = { 255bf215546Sopenharmony_ci nir_pkt3(b, PKT3_SET_SH_REG, pkt_cnt), nir_iand_imm(b, vtx_base_sgpr, 0x3FFF), first_vertex, 256bf215546Sopenharmony_ci nir_imm_int(b, PKT3_NOP_PAD), nir_imm_int(b, PKT3_NOP_PAD), 257bf215546Sopenharmony_ci }; 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci values[3] = nir_bcsel(b, nir_ior(b, has_drawid, has_baseinstance), 260bf215546Sopenharmony_ci nir_bcsel(b, has_drawid, drawid, first_instance), values[4]); 261bf215546Sopenharmony_ci values[4] = nir_bcsel(b, nir_iand(b, has_drawid, has_baseinstance), first_instance, values[4]); 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci dgc_emit(b, cs, nir_vec(b, values, 5)); 264bf215546Sopenharmony_ci} 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_cistatic void 267bf215546Sopenharmony_cidgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *instance_count) 268bf215546Sopenharmony_ci{ 269bf215546Sopenharmony_ci nir_ssa_def *values[2] = {nir_imm_int(b, PKT3(PKT3_NUM_INSTANCES, 0, false)), instance_count}; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci dgc_emit(b, cs, nir_vec(b, values, 2)); 272bf215546Sopenharmony_ci} 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_cistatic void 275bf215546Sopenharmony_cidgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset, 276bf215546Sopenharmony_ci nir_ssa_def *index_count, nir_ssa_def *max_index_count) 277bf215546Sopenharmony_ci{ 278bf215546Sopenharmony_ci nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), 279bf215546Sopenharmony_ci max_index_count, index_offset, index_count, 280bf215546Sopenharmony_ci nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)}; 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci dgc_emit(b, cs, nir_vec(b, values, 5)); 283bf215546Sopenharmony_ci} 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_cistatic void 286bf215546Sopenharmony_cidgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vertex_count) 287bf215546Sopenharmony_ci{ 288bf215546Sopenharmony_ci nir_ssa_def *values[3] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_AUTO, 1, false)), vertex_count, 289bf215546Sopenharmony_ci nir_imm_int(b, V_0287F0_DI_SRC_SEL_AUTO_INDEX)}; 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci dgc_emit(b, cs, nir_vec(b, values, 3)); 292bf215546Sopenharmony_ci} 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_cistatic void 295bf215546Sopenharmony_cibuild_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count) 296bf215546Sopenharmony_ci{ 297bf215546Sopenharmony_ci nir_ssa_def *global_id = get_global_ids(b, 1); 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci nir_ssa_def *cmd_buf_stride = load_param32(b, cmd_buf_stride); 300bf215546Sopenharmony_ci nir_ssa_def *cmd_buf_size = load_param32(b, cmd_buf_size); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, global_id, 0)); 303bf215546Sopenharmony_ci { 304bf215546Sopenharmony_ci nir_ssa_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count); 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci nir_variable *offset = 307bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset"); 308bf215546Sopenharmony_ci nir_store_var(b, offset, cmd_buf_tail_start, 0x1); 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci nir_ssa_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE); 311bf215546Sopenharmony_ci nir_push_loop(b); 312bf215546Sopenharmony_ci { 313bf215546Sopenharmony_ci nir_ssa_def *curr_offset = nir_load_var(b, offset); 314bf215546Sopenharmony_ci const unsigned MAX_PACKET_WORDS = 0x3FFC; 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci nir_push_if(b, nir_ieq(b, curr_offset, cmd_buf_size)); 317bf215546Sopenharmony_ci { 318bf215546Sopenharmony_ci nir_jump(b, nir_jump_break); 319bf215546Sopenharmony_ci } 320bf215546Sopenharmony_ci nir_pop_if(b, NULL); 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci nir_ssa_def *packet_size = nir_isub(b, cmd_buf_size, curr_offset); 323bf215546Sopenharmony_ci packet_size = nir_umin(b, packet_size, nir_imm_int(b, MAX_PACKET_WORDS * 4)); 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci nir_ssa_def *len = nir_ushr_imm(b, packet_size, 2); 326bf215546Sopenharmony_ci len = nir_iadd_imm(b, len, -2); 327bf215546Sopenharmony_ci nir_ssa_def *packet = nir_pkt3(b, PKT3_NOP, len); 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci nir_store_ssbo(b, packet, dst_buf, curr_offset, .access = ACCESS_NON_READABLE); 330bf215546Sopenharmony_ci nir_store_var(b, offset, nir_iadd(b, curr_offset, packet_size), 0x1); 331bf215546Sopenharmony_ci } 332bf215546Sopenharmony_ci nir_pop_loop(b, NULL); 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci nir_pop_if(b, NULL); 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cistatic nir_shader * 338bf215546Sopenharmony_cibuild_dgc_prepare_shader(struct radv_device *dev) 339bf215546Sopenharmony_ci{ 340bf215546Sopenharmony_ci nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare"); 341bf215546Sopenharmony_ci b.shader->info.workgroup_size[0] = 64; 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci nir_ssa_def *global_id = get_global_ids(&b, 1); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci nir_ssa_def *sequence_id = global_id; 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci nir_ssa_def *cmd_buf_stride = load_param32(&b, cmd_buf_stride); 348bf215546Sopenharmony_ci nir_ssa_def *sequence_count = load_param32(&b, sequence_count); 349bf215546Sopenharmony_ci nir_ssa_def *stream_stride = load_param32(&b, stream_stride); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci nir_variable *count_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "sequence_count"); 352bf215546Sopenharmony_ci nir_store_var(&b, count_var, sequence_count, 0x1); 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq_imm(&b, sequence_count, UINT32_MAX)); 355bf215546Sopenharmony_ci { 356bf215546Sopenharmony_ci nir_ssa_def *count_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_COUNT); 357bf215546Sopenharmony_ci nir_ssa_def *cnt = nir_load_ssbo(&b, 1, 32, count_buf, nir_imm_int(&b, 0), .align_mul = 4); 358bf215546Sopenharmony_ci nir_store_var(&b, count_var, cnt, 0x1); 359bf215546Sopenharmony_ci } 360bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci sequence_count = nir_load_var(&b, count_var); 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci nir_push_if(&b, nir_ult(&b, sequence_id, sequence_count)); 365bf215546Sopenharmony_ci { 366bf215546Sopenharmony_ci struct dgc_cmdbuf cmd_buf = { 367bf215546Sopenharmony_ci .descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE), 368bf215546Sopenharmony_ci .offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"), 369bf215546Sopenharmony_ci }; 370bf215546Sopenharmony_ci nir_store_var(&b, cmd_buf.offset, nir_imul(&b, global_id, cmd_buf_stride), 1); 371bf215546Sopenharmony_ci nir_ssa_def *cmd_buf_end = nir_iadd(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_stride); 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci nir_ssa_def *stream_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_STREAM); 374bf215546Sopenharmony_ci nir_ssa_def *stream_base = nir_imul(&b, sequence_id, stream_stride); 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci nir_variable *upload_offset = 377bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "upload_offset"); 378bf215546Sopenharmony_ci nir_store_var(&b, upload_offset, 379bf215546Sopenharmony_ci nir_iadd(&b, load_param32(&b, cmd_buf_size), 380bf215546Sopenharmony_ci nir_imul(&b, load_param32(&b, upload_stride), sequence_id)), 381bf215546Sopenharmony_ci 0x1); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci nir_ssa_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask); 384bf215546Sopenharmony_ci nir_ssa_def *vbo_cnt = nir_iand_imm(&b, load_param8(&b, vbo_cnt), 0x7F); 385bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, vbo_bind_mask, 0)); 386bf215546Sopenharmony_ci { 387bf215546Sopenharmony_ci nir_variable *vbo_idx = 388bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx"); 389bf215546Sopenharmony_ci nir_store_var(&b, vbo_idx, nir_imm_int(&b, 0), 0x1); 390bf215546Sopenharmony_ci 391bf215546Sopenharmony_ci nir_push_loop(&b); 392bf215546Sopenharmony_ci { 393bf215546Sopenharmony_ci nir_push_if(&b, nir_uge(&b, nir_load_var(&b, vbo_idx), vbo_cnt)); 394bf215546Sopenharmony_ci { 395bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 396bf215546Sopenharmony_ci } 397bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci nir_ssa_def *vbo_offset = nir_imul_imm(&b, nir_load_var(&b, vbo_idx), 16); 400bf215546Sopenharmony_ci nir_variable *vbo_data = 401bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data"); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS); 404bf215546Sopenharmony_ci nir_store_var(&b, vbo_data, 405bf215546Sopenharmony_ci nir_load_ssbo(&b, 4, 32, param_buf, vbo_offset, .align_mul = 4), 0xf); 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci nir_ssa_def *vbo_override = 408bf215546Sopenharmony_ci nir_ine_imm(&b, 409bf215546Sopenharmony_ci nir_iand(&b, vbo_bind_mask, 410bf215546Sopenharmony_ci nir_ishl(&b, nir_imm_int(&b, 1), nir_load_var(&b, vbo_idx))), 411bf215546Sopenharmony_ci 0); 412bf215546Sopenharmony_ci nir_push_if(&b, vbo_override); 413bf215546Sopenharmony_ci { 414bf215546Sopenharmony_ci nir_ssa_def *vbo_offset_offset = 415bf215546Sopenharmony_ci nir_iadd(&b, nir_imul_imm(&b, vbo_cnt, 16), 416bf215546Sopenharmony_ci nir_imul_imm(&b, nir_load_var(&b, vbo_idx), 8)); 417bf215546Sopenharmony_ci nir_ssa_def *vbo_over_data = 418bf215546Sopenharmony_ci nir_load_ssbo(&b, 2, 32, param_buf, vbo_offset_offset, .align_mul = 4); 419bf215546Sopenharmony_ci nir_ssa_def *stream_offset = nir_iadd( 420bf215546Sopenharmony_ci &b, stream_base, nir_iand_imm(&b, nir_channel(&b, vbo_over_data, 0), 0x7FFF)); 421bf215546Sopenharmony_ci nir_ssa_def *stream_data = 422bf215546Sopenharmony_ci nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4); 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci nir_ssa_def *va = nir_pack_64_2x32(&b, nir_channels(&b, stream_data, 0x3)); 425bf215546Sopenharmony_ci nir_ssa_def *size = nir_channel(&b, stream_data, 2); 426bf215546Sopenharmony_ci nir_ssa_def *stride = nir_channel(&b, stream_data, 3); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci nir_ssa_def *vs_state_offset = nir_ubfe(&b, nir_channel(&b, vbo_over_data, 0), nir_imm_int(&b, 16), nir_imm_int(&b, 15)); 429bf215546Sopenharmony_ci va = nir_iadd(&b, va, nir_u2u64(&b, vs_state_offset)); 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci nir_ssa_def *dyn_stride = nir_test_mask(&b, nir_channel(&b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE); 432bf215546Sopenharmony_ci nir_ssa_def *old_stride = 433bf215546Sopenharmony_ci nir_ubfe(&b, nir_channel(&b, nir_load_var(&b, vbo_data), 1), nir_imm_int(&b, 16), 434bf215546Sopenharmony_ci nir_imm_int(&b, 14)); 435bf215546Sopenharmony_ci stride = nir_bcsel(&b, dyn_stride, stride, old_stride); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci nir_ssa_def *use_per_attribute_vb_descs = 438bf215546Sopenharmony_ci nir_test_mask(&b, nir_channel(&b, vbo_over_data, 0), 1u << 31); 439bf215546Sopenharmony_ci nir_variable *num_records = nir_variable_create(b.shader, nir_var_shader_temp, 440bf215546Sopenharmony_ci glsl_uint_type(), "num_records"); 441bf215546Sopenharmony_ci nir_store_var(&b, num_records, size, 0x1); 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci nir_push_if(&b, use_per_attribute_vb_descs); 444bf215546Sopenharmony_ci { 445bf215546Sopenharmony_ci nir_ssa_def *attrib_end = nir_ubfe(&b, nir_channel(&b, vbo_over_data, 1), 446bf215546Sopenharmony_ci nir_imm_int(&b, 16), nir_imm_int(&b, 16)); 447bf215546Sopenharmony_ci nir_ssa_def *attrib_index_offset = 448bf215546Sopenharmony_ci nir_ubfe(&b, nir_channel(&b, vbo_over_data, 1), nir_imm_int(&b, 0), 449bf215546Sopenharmony_ci nir_imm_int(&b, 16)); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci nir_push_if(&b, nir_ult(&b, nir_load_var(&b, num_records), attrib_end)); 452bf215546Sopenharmony_ci { 453bf215546Sopenharmony_ci nir_store_var(&b, num_records, nir_imm_int(&b, 0), 0x1); 454bf215546Sopenharmony_ci } 455bf215546Sopenharmony_ci nir_push_else(&b, NULL); 456bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq_imm(&b, stride, 0)); 457bf215546Sopenharmony_ci { 458bf215546Sopenharmony_ci nir_store_var(&b, num_records, nir_imm_int(&b, 1), 0x1); 459bf215546Sopenharmony_ci } 460bf215546Sopenharmony_ci nir_push_else(&b, NULL); 461bf215546Sopenharmony_ci { 462bf215546Sopenharmony_ci nir_ssa_def *r = nir_iadd( 463bf215546Sopenharmony_ci &b, 464bf215546Sopenharmony_ci nir_iadd_imm( 465bf215546Sopenharmony_ci &b, 466bf215546Sopenharmony_ci nir_udiv(&b, nir_isub(&b, nir_load_var(&b, num_records), attrib_end), 467bf215546Sopenharmony_ci stride), 468bf215546Sopenharmony_ci 1), 469bf215546Sopenharmony_ci attrib_index_offset); 470bf215546Sopenharmony_ci nir_store_var(&b, num_records, r, 0x1); 471bf215546Sopenharmony_ci } 472bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 473bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci nir_ssa_def *convert_cond = 476bf215546Sopenharmony_ci nir_ine_imm(&b, nir_load_var(&b, num_records), 0); 477bf215546Sopenharmony_ci if (dev->physical_device->rad_info.gfx_level == GFX9) 478bf215546Sopenharmony_ci convert_cond = nir_imm_bool(&b, false); 479bf215546Sopenharmony_ci else if (dev->physical_device->rad_info.gfx_level != GFX8) 480bf215546Sopenharmony_ci convert_cond = 481bf215546Sopenharmony_ci nir_iand(&b, convert_cond, nir_ieq_imm(&b, stride, 0)); 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci nir_ssa_def *new_records = nir_iadd( 484bf215546Sopenharmony_ci &b, nir_imul(&b, nir_iadd_imm(&b, nir_load_var(&b, num_records), -1), stride), 485bf215546Sopenharmony_ci attrib_end); 486bf215546Sopenharmony_ci new_records = 487bf215546Sopenharmony_ci nir_bcsel(&b, convert_cond, new_records, nir_load_var(&b, num_records)); 488bf215546Sopenharmony_ci nir_store_var(&b, num_records, new_records, 0x1); 489bf215546Sopenharmony_ci } 490bf215546Sopenharmony_ci nir_push_else(&b, NULL); 491bf215546Sopenharmony_ci { 492bf215546Sopenharmony_ci if (dev->physical_device->rad_info.gfx_level != GFX8) { 493bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, stride, 0)); 494bf215546Sopenharmony_ci { 495bf215546Sopenharmony_ci nir_ssa_def *r = nir_iadd(&b, nir_load_var(&b, num_records), 496bf215546Sopenharmony_ci nir_iadd_imm(&b, stride, -1)); 497bf215546Sopenharmony_ci nir_store_var(&b, num_records, nir_udiv(&b, r, stride), 0x1); 498bf215546Sopenharmony_ci } 499bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 500bf215546Sopenharmony_ci } 501bf215546Sopenharmony_ci } 502bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci nir_ssa_def *rsrc_word3 = nir_channel(&b, nir_load_var(&b, vbo_data), 3); 505bf215546Sopenharmony_ci if (dev->physical_device->rad_info.gfx_level >= GFX10) { 506bf215546Sopenharmony_ci nir_ssa_def *oob_select = nir_bcsel( 507bf215546Sopenharmony_ci &b, nir_ieq_imm(&b, stride, 0), nir_imm_int(&b, V_008F0C_OOB_SELECT_RAW), 508bf215546Sopenharmony_ci nir_imm_int(&b, V_008F0C_OOB_SELECT_STRUCTURED)); 509bf215546Sopenharmony_ci rsrc_word3 = nir_iand_imm(&b, rsrc_word3, C_008F0C_OOB_SELECT); 510bf215546Sopenharmony_ci rsrc_word3 = nir_ior(&b, rsrc_word3, nir_ishl_imm(&b, oob_select, 28)); 511bf215546Sopenharmony_ci } 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci nir_ssa_def *va_hi = nir_iand_imm(&b, nir_unpack_64_2x32_split_y(&b, va), 0xFFFF); 514bf215546Sopenharmony_ci stride = nir_iand_imm(&b, stride, 0x3FFF); 515bf215546Sopenharmony_ci nir_ssa_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(&b, va), 516bf215546Sopenharmony_ci nir_ior(&b, nir_ishl_imm(&b, stride, 16), va_hi), 517bf215546Sopenharmony_ci nir_load_var(&b, num_records), rsrc_word3}; 518bf215546Sopenharmony_ci nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf); 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci /* On GFX9, it seems bounds checking is disabled if both 523bf215546Sopenharmony_ci * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and 524bf215546Sopenharmony_ci * GFX10.3 but it doesn't hurt. 525bf215546Sopenharmony_ci */ 526bf215546Sopenharmony_ci nir_ssa_def *num_records = nir_channel(&b, nir_load_var(&b, vbo_data), 2); 527bf215546Sopenharmony_ci nir_ssa_def *buf_va = nir_iand_imm( 528bf215546Sopenharmony_ci &b, nir_pack_64_2x32(&b, nir_channels(&b, nir_load_var(&b, vbo_data), 0x3)), 529bf215546Sopenharmony_ci (1ull << 48) - 1ull); 530bf215546Sopenharmony_ci nir_push_if(&b, 531bf215546Sopenharmony_ci nir_ior(&b, nir_ieq_imm(&b, num_records, 0), nir_ieq_imm(&b, buf_va, 0))); 532bf215546Sopenharmony_ci { 533bf215546Sopenharmony_ci nir_ssa_def *use_dynamic_vertex_input = 534bf215546Sopenharmony_ci nir_test_mask(&b, load_param8(&b, vbo_cnt), DGC_DYNAMIC_VERTEX_INPUT); 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci nir_push_if(&b, use_dynamic_vertex_input); 537bf215546Sopenharmony_ci { 538bf215546Sopenharmony_ci nir_ssa_def *new_vbo_data[4] = { 539bf215546Sopenharmony_ci nir_imm_int(&b, 0), nir_imm_int(&b, S_008F04_STRIDE(16)), nir_imm_int(&b, 0), 540bf215546Sopenharmony_ci nir_channel(&b, nir_load_var(&b, vbo_data), 3)}; 541bf215546Sopenharmony_ci nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf); 542bf215546Sopenharmony_ci } 543bf215546Sopenharmony_ci nir_push_else(&b, NULL); 544bf215546Sopenharmony_ci { 545bf215546Sopenharmony_ci nir_ssa_def *new_vbo_data[4] = {nir_imm_int(&b, 0), nir_imm_int(&b, 0), 546bf215546Sopenharmony_ci nir_imm_int(&b, 0), nir_imm_int(&b, 0)}; 547bf215546Sopenharmony_ci nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf); 548bf215546Sopenharmony_ci } 549bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci nir_ssa_def *upload_off = nir_iadd(&b, nir_load_var(&b, upload_offset), vbo_offset); 554bf215546Sopenharmony_ci nir_store_ssbo(&b, nir_load_var(&b, vbo_data), cmd_buf.descriptor, upload_off, .access = ACCESS_NON_READABLE); 555bf215546Sopenharmony_ci nir_store_var(&b, vbo_idx, nir_iadd_imm(&b, nir_load_var(&b, vbo_idx), 1), 0x1); 556bf215546Sopenharmony_ci } 557bf215546Sopenharmony_ci nir_pop_loop(&b, NULL); 558bf215546Sopenharmony_ci nir_ssa_def *packet[3] = { 559bf215546Sopenharmony_ci nir_imm_int(&b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(&b, vbo_reg), 560bf215546Sopenharmony_ci nir_iadd(&b, load_param32(&b, upload_addr), nir_load_var(&b, upload_offset))}; 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci dgc_emit(&b, &cmd_buf, nir_vec(&b, packet, 3)); 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci nir_store_var(&b, upload_offset, 565bf215546Sopenharmony_ci nir_iadd(&b, nir_load_var(&b, upload_offset), nir_imul_imm(&b, vbo_cnt, 16)), 566bf215546Sopenharmony_ci 0x1); 567bf215546Sopenharmony_ci } 568bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci nir_ssa_def *push_const_mask = load_param64(&b, push_constant_mask); 572bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0)); 573bf215546Sopenharmony_ci { 574bf215546Sopenharmony_ci nir_ssa_def *const_copy = nir_ine_imm(&b, load_param8(&b, const_copy), 0); 575bf215546Sopenharmony_ci nir_ssa_def *const_copy_size = load_param16(&b, const_copy_size); 576bf215546Sopenharmony_ci nir_ssa_def *const_copy_words = nir_ushr_imm(&b, const_copy_size, 2); 577bf215546Sopenharmony_ci const_copy_words = nir_bcsel(&b, const_copy, const_copy_words, nir_imm_int(&b, 0)); 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_ci nir_variable *idx = 580bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx"); 581bf215546Sopenharmony_ci nir_store_var(&b, idx, nir_imm_int(&b, 0), 0x1); 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS); 584bf215546Sopenharmony_ci nir_ssa_def *param_offset = nir_imul_imm(&b, vbo_cnt, 24); 585bf215546Sopenharmony_ci nir_ssa_def *param_offset_offset = nir_iadd_imm(&b, param_offset, MESA_VULKAN_SHADER_STAGES * 12); 586bf215546Sopenharmony_ci nir_ssa_def *param_const_offset = nir_iadd_imm(&b, param_offset, MAX_PUSH_CONSTANTS_SIZE + MESA_VULKAN_SHADER_STAGES * 12); 587bf215546Sopenharmony_ci nir_push_loop(&b); 588bf215546Sopenharmony_ci { 589bf215546Sopenharmony_ci nir_ssa_def *cur_idx = nir_load_var(&b, idx); 590bf215546Sopenharmony_ci nir_push_if(&b, nir_uge(&b, cur_idx, const_copy_words)); 591bf215546Sopenharmony_ci { 592bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 593bf215546Sopenharmony_ci } 594bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci nir_variable *data = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "copy_data"); 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci nir_ssa_def *update = nir_iand(&b, push_const_mask, nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx)); 599bf215546Sopenharmony_ci update = nir_bcsel( 600bf215546Sopenharmony_ci &b, nir_ult(&b, cur_idx, nir_imm_int(&b, 64 /* bits in push_const_mask */)), update, 601bf215546Sopenharmony_ci nir_imm_int64(&b, 0)); 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, update, 0)); 604bf215546Sopenharmony_ci { 605bf215546Sopenharmony_ci nir_ssa_def *stream_offset = nir_load_ssbo( 606bf215546Sopenharmony_ci &b, 1, 32, param_buf, 607bf215546Sopenharmony_ci nir_iadd(&b, param_offset_offset, nir_ishl_imm(&b, cur_idx, 2)), .align_mul = 4); 608bf215546Sopenharmony_ci nir_ssa_def *new_data = nir_load_ssbo(&b, 1, 32, stream_buf, nir_iadd(&b, stream_base, stream_offset), .align_mul = 4); 609bf215546Sopenharmony_ci nir_store_var(&b, data, new_data, 0x1); 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci nir_push_else(&b, NULL); 612bf215546Sopenharmony_ci { 613bf215546Sopenharmony_ci nir_store_var( 614bf215546Sopenharmony_ci &b, data, 615bf215546Sopenharmony_ci nir_load_ssbo(&b, 1, 32, param_buf, 616bf215546Sopenharmony_ci nir_iadd(&b, param_const_offset, nir_ishl_imm(&b, cur_idx, 2)), 617bf215546Sopenharmony_ci .align_mul = 4), 618bf215546Sopenharmony_ci 0x1); 619bf215546Sopenharmony_ci } 620bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci nir_store_ssbo( 623bf215546Sopenharmony_ci &b, nir_load_var(&b, data), cmd_buf.descriptor, 624bf215546Sopenharmony_ci nir_iadd(&b, nir_load_var(&b, upload_offset), nir_ishl_imm(&b, cur_idx, 2)), 625bf215546Sopenharmony_ci .access = ACCESS_NON_READABLE); 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1); 628bf215546Sopenharmony_ci } 629bf215546Sopenharmony_ci nir_pop_loop(&b, NULL); 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci nir_variable *shader_idx = 632bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx"); 633bf215546Sopenharmony_ci nir_store_var(&b, shader_idx, nir_imm_int(&b, 0), 0x1); 634bf215546Sopenharmony_ci nir_ssa_def *shader_cnt = load_param16(&b, push_constant_shader_cnt); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci nir_push_loop(&b); 637bf215546Sopenharmony_ci { 638bf215546Sopenharmony_ci nir_ssa_def *cur_shader_idx = nir_load_var(&b, shader_idx); 639bf215546Sopenharmony_ci nir_push_if(&b, nir_uge(&b, cur_shader_idx, shader_cnt)); 640bf215546Sopenharmony_ci { 641bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 642bf215546Sopenharmony_ci } 643bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci nir_ssa_def *reg_info = nir_load_ssbo(&b, 3, 32, param_buf, nir_iadd(&b, param_offset, nir_imul_imm(&b, cur_shader_idx, 12)), .align_mul = 4); 646bf215546Sopenharmony_ci nir_ssa_def *upload_sgpr = nir_ubfe(&b, nir_channel(&b, reg_info, 0), nir_imm_int(&b, 0), nir_imm_int(&b, 16)); 647bf215546Sopenharmony_ci nir_ssa_def *inline_sgpr = nir_ubfe(&b, nir_channel(&b, reg_info, 0), nir_imm_int(&b, 16), nir_imm_int(&b, 16)); 648bf215546Sopenharmony_ci nir_ssa_def *inline_mask = nir_pack_64_2x32(&b, nir_channels(&b, reg_info, 0x6)); 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, upload_sgpr, 0)); 651bf215546Sopenharmony_ci { 652bf215546Sopenharmony_ci nir_ssa_def *pkt[3] = { 653bf215546Sopenharmony_ci nir_imm_int(&b, PKT3(PKT3_SET_SH_REG, 1, 0)), 654bf215546Sopenharmony_ci upload_sgpr, 655bf215546Sopenharmony_ci nir_iadd(&b, load_param32(&b, upload_addr), nir_load_var(&b, upload_offset)) 656bf215546Sopenharmony_ci }; 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci dgc_emit(&b, &cmd_buf, nir_vec(&b, pkt, 3)); 659bf215546Sopenharmony_ci } 660bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, inline_sgpr, 0)); 663bf215546Sopenharmony_ci { 664bf215546Sopenharmony_ci nir_ssa_def *inline_len = nir_bit_count(&b, inline_mask); 665bf215546Sopenharmony_ci nir_store_var(&b, idx, nir_imm_int(&b, 0), 0x1); 666bf215546Sopenharmony_ci 667bf215546Sopenharmony_ci nir_ssa_def *pkt[2] = { 668bf215546Sopenharmony_ci nir_pkt3(&b, PKT3_SET_SH_REG, inline_len), 669bf215546Sopenharmony_ci inline_sgpr 670bf215546Sopenharmony_ci }; 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci dgc_emit(&b, &cmd_buf, nir_vec(&b, pkt, 2)); 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci nir_push_loop(&b); 675bf215546Sopenharmony_ci { 676bf215546Sopenharmony_ci nir_ssa_def *cur_idx = nir_load_var(&b, idx); 677bf215546Sopenharmony_ci nir_push_if(&b, 678bf215546Sopenharmony_ci nir_uge(&b, cur_idx, nir_imm_int(&b, 64 /* bits in inline_mask */))); 679bf215546Sopenharmony_ci { 680bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 681bf215546Sopenharmony_ci } 682bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_ci nir_ssa_def *l = nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx); 685bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq_imm(&b, nir_iand(&b, l, inline_mask), 0)); 686bf215546Sopenharmony_ci { 687bf215546Sopenharmony_ci nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1); 688bf215546Sopenharmony_ci nir_jump(&b, nir_jump_continue); 689bf215546Sopenharmony_ci } 690bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci nir_variable *data = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "copy_data"); 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci nir_ssa_def *update = nir_iand(&b, push_const_mask, nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx)); 695bf215546Sopenharmony_ci update = nir_bcsel( 696bf215546Sopenharmony_ci &b, nir_ult(&b, cur_idx, nir_imm_int(&b, 64 /* bits in push_const_mask */)), 697bf215546Sopenharmony_ci update, nir_imm_int64(&b, 0)); 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, update, 0)); 700bf215546Sopenharmony_ci { 701bf215546Sopenharmony_ci nir_ssa_def *stream_offset = nir_load_ssbo( 702bf215546Sopenharmony_ci &b, 1, 32, param_buf, 703bf215546Sopenharmony_ci nir_iadd(&b, param_offset_offset, nir_ishl_imm(&b, cur_idx, 2)), 704bf215546Sopenharmony_ci .align_mul = 4); 705bf215546Sopenharmony_ci nir_ssa_def *new_data = nir_load_ssbo(&b, 1, 32, stream_buf, nir_iadd(&b, stream_base, stream_offset), .align_mul = 4); 706bf215546Sopenharmony_ci nir_store_var(&b, data, new_data, 0x1); 707bf215546Sopenharmony_ci } 708bf215546Sopenharmony_ci nir_push_else(&b, NULL); 709bf215546Sopenharmony_ci { 710bf215546Sopenharmony_ci nir_store_var(&b, data, 711bf215546Sopenharmony_ci nir_load_ssbo(&b, 1, 32, param_buf, 712bf215546Sopenharmony_ci nir_iadd(&b, param_const_offset, 713bf215546Sopenharmony_ci nir_ishl_imm(&b, cur_idx, 2)), 714bf215546Sopenharmony_ci .align_mul = 4), 715bf215546Sopenharmony_ci 0x1); 716bf215546Sopenharmony_ci } 717bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci dgc_emit(&b, &cmd_buf, nir_load_var(&b, data)); 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1); 722bf215546Sopenharmony_ci } 723bf215546Sopenharmony_ci nir_pop_loop(&b, NULL); 724bf215546Sopenharmony_ci } 725bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 726bf215546Sopenharmony_ci nir_store_var(&b, shader_idx, nir_iadd_imm(&b, cur_shader_idx, 1), 0x1); 727bf215546Sopenharmony_ci } 728bf215546Sopenharmony_ci nir_pop_loop(&b, NULL); 729bf215546Sopenharmony_ci } 730bf215546Sopenharmony_ci nir_pop_if(&b, 0); 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, emit_state), 1)); 733bf215546Sopenharmony_ci { 734bf215546Sopenharmony_ci nir_ssa_def *stream_offset = nir_iadd(&b, load_param16(&b, state_offset), stream_base); 735bf215546Sopenharmony_ci nir_ssa_def *state = nir_load_ssbo(&b, 1, 32, stream_buf, stream_offset, .align_mul = 4); 736bf215546Sopenharmony_ci state = nir_iand_imm(&b, state, 1); 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci nir_ssa_def *reg = 739bf215546Sopenharmony_ci nir_ior(&b, load_param32(&b, pa_su_sc_mode_cntl_base), nir_ishl_imm(&b, state, 2)); 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci nir_ssa_def *cmd_values[3] = { 742bf215546Sopenharmony_ci nir_imm_int(&b, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)), 743bf215546Sopenharmony_ci nir_imm_int(&b, (R_028814_PA_SU_SC_MODE_CNTL - SI_CONTEXT_REG_OFFSET) >> 2), reg}; 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_ci dgc_emit(&b, &cmd_buf, nir_vec(&b, cmd_values, 3)); 746bf215546Sopenharmony_ci } 747bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci nir_ssa_def *scissor_count = load_param16(&b, scissor_count); 750bf215546Sopenharmony_ci nir_push_if(&b, nir_ine_imm(&b, scissor_count, 0)); 751bf215546Sopenharmony_ci { 752bf215546Sopenharmony_ci nir_ssa_def *scissor_offset = load_param16(&b, scissor_offset); 753bf215546Sopenharmony_ci nir_variable *idx = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), 754bf215546Sopenharmony_ci "scissor_copy_idx"); 755bf215546Sopenharmony_ci nir_store_var(&b, idx, nir_imm_int(&b, 0), 1); 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci nir_push_loop(&b); 758bf215546Sopenharmony_ci { 759bf215546Sopenharmony_ci nir_ssa_def *cur_idx = nir_load_var(&b, idx); 760bf215546Sopenharmony_ci nir_push_if(&b, nir_uge(&b, cur_idx, scissor_count)); 761bf215546Sopenharmony_ci { 762bf215546Sopenharmony_ci nir_jump(&b, nir_jump_break); 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS); 767bf215546Sopenharmony_ci nir_ssa_def *param_offset = nir_iadd(&b, scissor_offset, nir_imul_imm(&b, cur_idx, 4)); 768bf215546Sopenharmony_ci nir_ssa_def *value = nir_load_ssbo(&b, 1, 32, param_buf, param_offset, .align_mul = 4); 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci dgc_emit(&b, &cmd_buf, value); 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 1); 773bf215546Sopenharmony_ci } 774bf215546Sopenharmony_ci nir_pop_loop(&b, NULL); 775bf215546Sopenharmony_ci } 776bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, draw_indexed), 0)); 779bf215546Sopenharmony_ci { 780bf215546Sopenharmony_ci nir_ssa_def *vtx_base_sgpr = load_param16(&b, vtx_base_sgpr); 781bf215546Sopenharmony_ci nir_ssa_def *stream_offset = 782bf215546Sopenharmony_ci nir_iadd(&b, load_param16(&b, draw_params_offset), stream_base); 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci nir_ssa_def *draw_data0 = 785bf215546Sopenharmony_ci nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4); 786bf215546Sopenharmony_ci nir_ssa_def *vertex_count = nir_channel(&b, draw_data0, 0); 787bf215546Sopenharmony_ci nir_ssa_def *instance_count = nir_channel(&b, draw_data0, 1); 788bf215546Sopenharmony_ci nir_ssa_def *vertex_offset = nir_channel(&b, draw_data0, 2); 789bf215546Sopenharmony_ci nir_ssa_def *first_instance = nir_channel(&b, draw_data0, 3); 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci nir_push_if(&b, nir_iand(&b, nir_ine_imm(&b, vertex_count, 0), nir_ine_imm(&b, instance_count, 0))); 792bf215546Sopenharmony_ci { 793bf215546Sopenharmony_ci dgc_emit_userdata_vertex(&b, &cmd_buf, vtx_base_sgpr, vertex_offset, first_instance, sequence_id); 794bf215546Sopenharmony_ci dgc_emit_instance_count(&b, &cmd_buf, instance_count); 795bf215546Sopenharmony_ci dgc_emit_draw(&b, &cmd_buf, vertex_count); 796bf215546Sopenharmony_ci } 797bf215546Sopenharmony_ci nir_pop_if(&b, 0); 798bf215546Sopenharmony_ci } 799bf215546Sopenharmony_ci nir_push_else(&b, NULL); 800bf215546Sopenharmony_ci { 801bf215546Sopenharmony_ci nir_variable *index_size_var = 802bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "index_size"); 803bf215546Sopenharmony_ci nir_store_var(&b, index_size_var, load_param16(&b, base_index_size), 0x1); 804bf215546Sopenharmony_ci nir_variable *max_index_count_var = 805bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "max_index_count"); 806bf215546Sopenharmony_ci nir_store_var(&b, max_index_count_var, load_param32(&b, max_index_count), 0x1); 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci nir_ssa_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0); 809bf215546Sopenharmony_ci nir_push_if(&b, bind_index_buffer); 810bf215546Sopenharmony_ci { 811bf215546Sopenharmony_ci nir_ssa_def *index_stream_offset = 812bf215546Sopenharmony_ci nir_iadd(&b, load_param16(&b, index_buffer_offset), stream_base); 813bf215546Sopenharmony_ci nir_ssa_def *data = 814bf215546Sopenharmony_ci nir_load_ssbo(&b, 4, 32, stream_buf, index_stream_offset, .align_mul = 4); 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci nir_ssa_def *vk_index_type = nir_channel(&b, data, 3); 817bf215546Sopenharmony_ci nir_ssa_def *index_type = nir_bcsel( 818bf215546Sopenharmony_ci &b, nir_ieq(&b, vk_index_type, load_param32(&b, ibo_type_32)), 819bf215546Sopenharmony_ci nir_imm_int(&b, V_028A7C_VGT_INDEX_32), nir_imm_int(&b, V_028A7C_VGT_INDEX_16)); 820bf215546Sopenharmony_ci index_type = nir_bcsel(&b, nir_ieq(&b, vk_index_type, load_param32(&b, ibo_type_8)), 821bf215546Sopenharmony_ci nir_imm_int(&b, V_028A7C_VGT_INDEX_8), index_type); 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci nir_ssa_def *index_size = nir_iand_imm( 824bf215546Sopenharmony_ci &b, nir_ushr(&b, nir_imm_int(&b, 0x142), nir_imul_imm(&b, index_type, 4)), 0xf); 825bf215546Sopenharmony_ci nir_store_var(&b, index_size_var, index_size, 0x1); 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci nir_ssa_def *max_index_count = nir_udiv(&b, nir_channel(&b, data, 2), index_size); 828bf215546Sopenharmony_ci nir_store_var(&b, max_index_count_var, max_index_count, 0x1); 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci nir_ssa_def *cmd_values[3 + 2 + 3]; 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci if (dev->physical_device->rad_info.gfx_level >= GFX9) { 833bf215546Sopenharmony_ci unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; 834bf215546Sopenharmony_ci if (dev->physical_device->rad_info.gfx_level < GFX9 || 835bf215546Sopenharmony_ci (dev->physical_device->rad_info.gfx_level == GFX9 && 836bf215546Sopenharmony_ci dev->physical_device->rad_info.me_fw_version < 26)) 837bf215546Sopenharmony_ci opcode = PKT3_SET_UCONFIG_REG; 838bf215546Sopenharmony_ci cmd_values[0] = nir_imm_int(&b, PKT3(opcode, 1, 0)); 839bf215546Sopenharmony_ci cmd_values[1] = nir_imm_int( 840bf215546Sopenharmony_ci &b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28)); 841bf215546Sopenharmony_ci cmd_values[2] = index_type; 842bf215546Sopenharmony_ci } else { 843bf215546Sopenharmony_ci cmd_values[0] = nir_imm_int(&b, PKT3(PKT3_INDEX_TYPE, 0, 0)); 844bf215546Sopenharmony_ci cmd_values[1] = index_type; 845bf215546Sopenharmony_ci cmd_values[2] = nir_imm_int(&b, PKT3_NOP_PAD); 846bf215546Sopenharmony_ci } 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci nir_ssa_def *addr_upper = nir_channel(&b, data, 1); 849bf215546Sopenharmony_ci addr_upper = nir_ishr_imm(&b, nir_ishl_imm(&b, addr_upper, 16), 16); 850bf215546Sopenharmony_ci 851bf215546Sopenharmony_ci cmd_values[3] = nir_imm_int(&b, PKT3(PKT3_INDEX_BASE, 1, 0)); 852bf215546Sopenharmony_ci cmd_values[4] = nir_channel(&b, data, 0); 853bf215546Sopenharmony_ci cmd_values[5] = addr_upper; 854bf215546Sopenharmony_ci cmd_values[6] = nir_imm_int(&b, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); 855bf215546Sopenharmony_ci cmd_values[7] = max_index_count; 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci dgc_emit(&b, &cmd_buf, nir_vec(&b, cmd_values, 8)); 858bf215546Sopenharmony_ci } 859bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 860bf215546Sopenharmony_ci 861bf215546Sopenharmony_ci nir_ssa_def *index_size = nir_load_var(&b, index_size_var); 862bf215546Sopenharmony_ci nir_ssa_def *max_index_count = nir_load_var(&b, max_index_count_var); 863bf215546Sopenharmony_ci nir_ssa_def *vtx_base_sgpr = load_param16(&b, vtx_base_sgpr); 864bf215546Sopenharmony_ci nir_ssa_def *stream_offset = 865bf215546Sopenharmony_ci nir_iadd(&b, load_param16(&b, draw_params_offset), stream_base); 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci index_size = 868bf215546Sopenharmony_ci nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size); 869bf215546Sopenharmony_ci max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var), 870bf215546Sopenharmony_ci max_index_count); 871bf215546Sopenharmony_ci nir_ssa_def *draw_data0 = 872bf215546Sopenharmony_ci nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4); 873bf215546Sopenharmony_ci nir_ssa_def *draw_data1 = nir_load_ssbo( 874bf215546Sopenharmony_ci &b, 1, 32, stream_buf, nir_iadd_imm(&b, stream_offset, 16), .align_mul = 4); 875bf215546Sopenharmony_ci nir_ssa_def *index_count = nir_channel(&b, draw_data0, 0); 876bf215546Sopenharmony_ci nir_ssa_def *instance_count = nir_channel(&b, draw_data0, 1); 877bf215546Sopenharmony_ci nir_ssa_def *first_index = nir_channel(&b, draw_data0, 2); 878bf215546Sopenharmony_ci nir_ssa_def *vertex_offset = nir_channel(&b, draw_data0, 3); 879bf215546Sopenharmony_ci nir_ssa_def *first_instance = nir_channel(&b, draw_data1, 0); 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci nir_push_if(&b, nir_iand(&b, nir_ine_imm(&b, index_count, 0), nir_ine_imm(&b, instance_count, 0))); 882bf215546Sopenharmony_ci { 883bf215546Sopenharmony_ci dgc_emit_userdata_vertex(&b, &cmd_buf, vtx_base_sgpr, vertex_offset, first_instance, sequence_id); 884bf215546Sopenharmony_ci dgc_emit_instance_count(&b, &cmd_buf, instance_count); 885bf215546Sopenharmony_ci dgc_emit_draw_indexed(&b, &cmd_buf, first_index, index_count, 886bf215546Sopenharmony_ci max_index_count); 887bf215546Sopenharmony_ci } 888bf215546Sopenharmony_ci nir_pop_if(&b, 0); 889bf215546Sopenharmony_ci } 890bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 891bf215546Sopenharmony_ci 892bf215546Sopenharmony_ci /* Pad the cmdbuffer if we did not use the whole stride */ 893bf215546Sopenharmony_ci nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end)); 894bf215546Sopenharmony_ci { 895bf215546Sopenharmony_ci nir_ssa_def *cnt = nir_isub(&b, cmd_buf_end, nir_load_var(&b, cmd_buf.offset)); 896bf215546Sopenharmony_ci cnt = nir_ushr_imm(&b, cnt, 2); 897bf215546Sopenharmony_ci cnt = nir_iadd_imm(&b, cnt, -2); 898bf215546Sopenharmony_ci nir_ssa_def *pkt = nir_pkt3(&b, PKT3_NOP, cnt); 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci dgc_emit(&b, &cmd_buf, pkt); 901bf215546Sopenharmony_ci } 902bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 903bf215546Sopenharmony_ci } 904bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci build_dgc_buffer_tail(&b, sequence_count); 907bf215546Sopenharmony_ci return b.shader; 908bf215546Sopenharmony_ci} 909bf215546Sopenharmony_ci 910bf215546Sopenharmony_civoid 911bf215546Sopenharmony_ciradv_device_finish_dgc_prepare_state(struct radv_device *device) 912bf215546Sopenharmony_ci{ 913bf215546Sopenharmony_ci radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.dgc_prepare.pipeline, 914bf215546Sopenharmony_ci &device->meta_state.alloc); 915bf215546Sopenharmony_ci radv_DestroyPipelineLayout(radv_device_to_handle(device), 916bf215546Sopenharmony_ci device->meta_state.dgc_prepare.p_layout, &device->meta_state.alloc); 917bf215546Sopenharmony_ci device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), 918bf215546Sopenharmony_ci device->meta_state.dgc_prepare.ds_layout, 919bf215546Sopenharmony_ci &device->meta_state.alloc); 920bf215546Sopenharmony_ci} 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ciVkResult 923bf215546Sopenharmony_ciradv_device_init_dgc_prepare_state(struct radv_device *device) 924bf215546Sopenharmony_ci{ 925bf215546Sopenharmony_ci VkResult result; 926bf215546Sopenharmony_ci nir_shader *cs = build_dgc_prepare_shader(device); 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci VkDescriptorSetLayoutCreateInfo ds_create_info = { 929bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 930bf215546Sopenharmony_ci .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 931bf215546Sopenharmony_ci .bindingCount = DGC_NUM_DESCS, 932bf215546Sopenharmony_ci .pBindings = (VkDescriptorSetLayoutBinding[]){ 933bf215546Sopenharmony_ci {.binding = DGC_DESC_STREAM, 934bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 935bf215546Sopenharmony_ci .descriptorCount = 1, 936bf215546Sopenharmony_ci .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 937bf215546Sopenharmony_ci .pImmutableSamplers = NULL}, 938bf215546Sopenharmony_ci {.binding = DGC_DESC_PREPARE, 939bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 940bf215546Sopenharmony_ci .descriptorCount = 1, 941bf215546Sopenharmony_ci .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 942bf215546Sopenharmony_ci .pImmutableSamplers = NULL}, 943bf215546Sopenharmony_ci {.binding = DGC_DESC_PARAMS, 944bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 945bf215546Sopenharmony_ci .descriptorCount = 1, 946bf215546Sopenharmony_ci .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 947bf215546Sopenharmony_ci .pImmutableSamplers = NULL}, 948bf215546Sopenharmony_ci {.binding = DGC_DESC_COUNT, 949bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 950bf215546Sopenharmony_ci .descriptorCount = 1, 951bf215546Sopenharmony_ci .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 952bf215546Sopenharmony_ci .pImmutableSamplers = NULL}, 953bf215546Sopenharmony_ci }}; 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 956bf215546Sopenharmony_ci &device->meta_state.alloc, 957bf215546Sopenharmony_ci &device->meta_state.dgc_prepare.ds_layout); 958bf215546Sopenharmony_ci if (result != VK_SUCCESS) 959bf215546Sopenharmony_ci goto cleanup; 960bf215546Sopenharmony_ci 961bf215546Sopenharmony_ci const VkPipelineLayoutCreateInfo leaf_pl_create_info = { 962bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 963bf215546Sopenharmony_ci .setLayoutCount = 1, 964bf215546Sopenharmony_ci .pSetLayouts = &device->meta_state.dgc_prepare.ds_layout, 965bf215546Sopenharmony_ci .pushConstantRangeCount = 1, 966bf215546Sopenharmony_ci .pPushConstantRanges = 967bf215546Sopenharmony_ci &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct radv_dgc_params)}, 968bf215546Sopenharmony_ci }; 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info, 971bf215546Sopenharmony_ci &device->meta_state.alloc, 972bf215546Sopenharmony_ci &device->meta_state.dgc_prepare.p_layout); 973bf215546Sopenharmony_ci if (result != VK_SUCCESS) 974bf215546Sopenharmony_ci goto cleanup; 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci VkPipelineShaderStageCreateInfo shader_stage = { 977bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 978bf215546Sopenharmony_ci .stage = VK_SHADER_STAGE_COMPUTE_BIT, 979bf215546Sopenharmony_ci .module = vk_shader_module_handle_from_nir(cs), 980bf215546Sopenharmony_ci .pName = "main", 981bf215546Sopenharmony_ci .pSpecializationInfo = NULL, 982bf215546Sopenharmony_ci }; 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci VkComputePipelineCreateInfo pipeline_info = { 985bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 986bf215546Sopenharmony_ci .stage = shader_stage, 987bf215546Sopenharmony_ci .flags = 0, 988bf215546Sopenharmony_ci .layout = device->meta_state.dgc_prepare.p_layout, 989bf215546Sopenharmony_ci }; 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci result = radv_CreateComputePipelines( 992bf215546Sopenharmony_ci radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 993bf215546Sopenharmony_ci &pipeline_info, &device->meta_state.alloc, &device->meta_state.dgc_prepare.pipeline); 994bf215546Sopenharmony_ci if (result != VK_SUCCESS) 995bf215546Sopenharmony_ci goto cleanup; 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_cicleanup: 998bf215546Sopenharmony_ci ralloc_free(cs); 999bf215546Sopenharmony_ci return result; 1000bf215546Sopenharmony_ci} 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ciVkResult 1003bf215546Sopenharmony_ciradv_CreateIndirectCommandsLayoutNV(VkDevice _device, 1004bf215546Sopenharmony_ci const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo, 1005bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 1006bf215546Sopenharmony_ci VkIndirectCommandsLayoutNV *pIndirectCommandsLayout) 1007bf215546Sopenharmony_ci{ 1008bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 1009bf215546Sopenharmony_ci struct radv_indirect_command_layout *layout; 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_ci size_t size = 1012bf215546Sopenharmony_ci sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV); 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci layout = 1015bf215546Sopenharmony_ci vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout), 1016bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1017bf215546Sopenharmony_ci if (!layout) 1018bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_ci vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV); 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci layout->input_stride = pCreateInfo->pStreamStrides[0]; 1023bf215546Sopenharmony_ci layout->token_count = pCreateInfo->tokenCount; 1024bf215546Sopenharmony_ci typed_memcpy(layout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount); 1025bf215546Sopenharmony_ci 1026bf215546Sopenharmony_ci layout->ibo_type_32 = VK_INDEX_TYPE_UINT32; 1027bf215546Sopenharmony_ci layout->ibo_type_8 = VK_INDEX_TYPE_UINT8_EXT; 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->tokenCount; ++i) { 1030bf215546Sopenharmony_ci switch (pCreateInfo->pTokens[i].tokenType) { 1031bf215546Sopenharmony_ci case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: 1032bf215546Sopenharmony_ci layout->draw_params_offset = pCreateInfo->pTokens[i].offset; 1033bf215546Sopenharmony_ci break; 1034bf215546Sopenharmony_ci case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: 1035bf215546Sopenharmony_ci layout->indexed = true; 1036bf215546Sopenharmony_ci layout->draw_params_offset = pCreateInfo->pTokens[i].offset; 1037bf215546Sopenharmony_ci break; 1038bf215546Sopenharmony_ci case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: 1039bf215546Sopenharmony_ci layout->binds_index_buffer = true; 1040bf215546Sopenharmony_ci layout->index_buffer_offset = pCreateInfo->pTokens[i].offset; 1041bf215546Sopenharmony_ci /* 16-bit is implied if we find no match. */ 1042bf215546Sopenharmony_ci for (unsigned j = 0; j < pCreateInfo->pTokens[i].indexTypeCount; j++) { 1043bf215546Sopenharmony_ci if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT32) 1044bf215546Sopenharmony_ci layout->ibo_type_32 = pCreateInfo->pTokens[i].pIndexTypeValues[j]; 1045bf215546Sopenharmony_ci else if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT8_EXT) 1046bf215546Sopenharmony_ci layout->ibo_type_8 = pCreateInfo->pTokens[i].pIndexTypeValues[j]; 1047bf215546Sopenharmony_ci } 1048bf215546Sopenharmony_ci break; 1049bf215546Sopenharmony_ci case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: 1050bf215546Sopenharmony_ci layout->bind_vbo_mask |= 1u << pCreateInfo->pTokens[i].vertexBindingUnit; 1051bf215546Sopenharmony_ci layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] = 1052bf215546Sopenharmony_ci pCreateInfo->pTokens[i].offset; 1053bf215546Sopenharmony_ci if (pCreateInfo->pTokens[i].vertexDynamicStride) 1054bf215546Sopenharmony_ci layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] |= DGC_DYNAMIC_STRIDE; 1055bf215546Sopenharmony_ci break; 1056bf215546Sopenharmony_ci case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: 1057bf215546Sopenharmony_ci for (unsigned j = pCreateInfo->pTokens[i].pushconstantOffset / 4, k = 0; 1058bf215546Sopenharmony_ci k < pCreateInfo->pTokens[i].pushconstantSize / 4; ++j, ++k) { 1059bf215546Sopenharmony_ci layout->push_constant_mask |= 1ull << j; 1060bf215546Sopenharmony_ci layout->push_constant_offsets[j] = pCreateInfo->pTokens[i].offset + k * 4; 1061bf215546Sopenharmony_ci } 1062bf215546Sopenharmony_ci break; 1063bf215546Sopenharmony_ci case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV: 1064bf215546Sopenharmony_ci layout->binds_state = true; 1065bf215546Sopenharmony_ci layout->state_offset = pCreateInfo->pTokens[i].offset; 1066bf215546Sopenharmony_ci break; 1067bf215546Sopenharmony_ci default: 1068bf215546Sopenharmony_ci unreachable("Unhandled token type"); 1069bf215546Sopenharmony_ci } 1070bf215546Sopenharmony_ci } 1071bf215546Sopenharmony_ci if (!layout->indexed) 1072bf215546Sopenharmony_ci layout->binds_index_buffer = false; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci *pIndirectCommandsLayout = radv_indirect_command_layout_to_handle(layout); 1075bf215546Sopenharmony_ci return VK_SUCCESS; 1076bf215546Sopenharmony_ci} 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_civoid 1079bf215546Sopenharmony_ciradv_DestroyIndirectCommandsLayoutNV(VkDevice _device, 1080bf215546Sopenharmony_ci VkIndirectCommandsLayoutNV indirectCommandsLayout, 1081bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 1082bf215546Sopenharmony_ci{ 1083bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 1084bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_indirect_command_layout, layout, indirectCommandsLayout); 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci if (!layout) 1087bf215546Sopenharmony_ci return; 1088bf215546Sopenharmony_ci 1089bf215546Sopenharmony_ci vk_object_base_finish(&layout->base); 1090bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, layout); 1091bf215546Sopenharmony_ci} 1092bf215546Sopenharmony_ci 1093bf215546Sopenharmony_civoid 1094bf215546Sopenharmony_ciradv_GetGeneratedCommandsMemoryRequirementsNV( 1095bf215546Sopenharmony_ci VkDevice _device, const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo, 1096bf215546Sopenharmony_ci VkMemoryRequirements2 *pMemoryRequirements) 1097bf215546Sopenharmony_ci{ 1098bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 1099bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout); 1100bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline); 1101bf215546Sopenharmony_ci struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci uint32_t cmd_stride, upload_stride; 1104bf215546Sopenharmony_ci radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride); 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pInfo->maxSequencesCount); 1107bf215546Sopenharmony_ci VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci pMemoryRequirements->memoryRequirements.memoryTypeBits = 1110bf215546Sopenharmony_ci device->physical_device->memory_types_32bit; 1111bf215546Sopenharmony_ci pMemoryRequirements->memoryRequirements.alignment = 256; 1112bf215546Sopenharmony_ci pMemoryRequirements->memoryRequirements.size = 1113bf215546Sopenharmony_ci align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment); 1114bf215546Sopenharmony_ci} 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_civoid 1117bf215546Sopenharmony_ciradv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer, 1118bf215546Sopenharmony_ci const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) 1119bf215546Sopenharmony_ci{ 1120bf215546Sopenharmony_ci /* Can't do anything here as we depend on some dynamic state in some cases that we only know 1121bf215546Sopenharmony_ci * at draw time. */ 1122bf215546Sopenharmony_ci} 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci/* Always need to call this directly before draw due to dependence on bound state. */ 1125bf215546Sopenharmony_civoid 1126bf215546Sopenharmony_ciradv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, 1127bf215546Sopenharmony_ci const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) 1128bf215546Sopenharmony_ci{ 1129bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_indirect_command_layout, layout, 1130bf215546Sopenharmony_ci pGeneratedCommandsInfo->indirectCommandsLayout); 1131bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline); 1132bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer); 1133bf215546Sopenharmony_ci struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); 1134bf215546Sopenharmony_ci struct radv_meta_saved_state saved_state; 1135bf215546Sopenharmony_ci struct radv_buffer token_buffer; 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci uint32_t cmd_stride, upload_stride; 1138bf215546Sopenharmony_ci radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride); 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci unsigned cmd_buf_size = 1141bf215546Sopenharmony_ci radv_align_cmdbuf_size(cmd_stride * pGeneratedCommandsInfo->sequencesCount); 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(graphics_pipeline->vb_desc_usage_mask) * 24 : 0; 1144bf215546Sopenharmony_ci unsigned const_size = graphics_pipeline->base.push_constant_size + 1145bf215546Sopenharmony_ci 16 * graphics_pipeline->base.dynamic_offset_count + 1146bf215546Sopenharmony_ci sizeof(layout->push_constant_offsets) + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12; 1147bf215546Sopenharmony_ci if (!layout->push_constant_mask) 1148bf215546Sopenharmony_ci const_size = 0; 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_ci unsigned scissor_size = (8 + 2 * cmd_buffer->state.dynamic.scissor.count) * 4; 1151bf215546Sopenharmony_ci if (!layout->binds_state || !cmd_buffer->state.dynamic.scissor.count || 1152bf215546Sopenharmony_ci !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug) 1153bf215546Sopenharmony_ci scissor_size = 0; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci unsigned upload_size = MAX2(vb_size + const_size + scissor_size, 16); 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci void *upload_data; 1158bf215546Sopenharmony_ci unsigned upload_offset; 1159bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_alloc(cmd_buffer, upload_size, &upload_offset, &upload_data)) { 1160bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 1161bf215546Sopenharmony_ci return; 1162bf215546Sopenharmony_ci } 1163bf215546Sopenharmony_ci 1164bf215546Sopenharmony_ci void *upload_data_base = upload_data; 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ci radv_buffer_init(&token_buffer, cmd_buffer->device, cmd_buffer->upload.upload_bo, upload_size, 1167bf215546Sopenharmony_ci upload_offset); 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ci uint64_t upload_addr = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + 1170bf215546Sopenharmony_ci pGeneratedCommandsInfo->preprocessOffset; 1171bf215546Sopenharmony_ci 1172bf215546Sopenharmony_ci uint16_t vtx_base_sgpr = 1173bf215546Sopenharmony_ci (cmd_buffer->state.graphics_pipeline->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2; 1174bf215546Sopenharmony_ci if (cmd_buffer->state.graphics_pipeline->uses_drawid) 1175bf215546Sopenharmony_ci vtx_base_sgpr |= DGC_USES_DRAWID; 1176bf215546Sopenharmony_ci if (cmd_buffer->state.graphics_pipeline->uses_baseinstance) 1177bf215546Sopenharmony_ci vtx_base_sgpr |= DGC_USES_BASEINSTANCE; 1178bf215546Sopenharmony_ci 1179bf215546Sopenharmony_ci uint16_t vbo_sgpr = 1180bf215546Sopenharmony_ci ((radv_lookup_user_sgpr(&graphics_pipeline->base, MESA_SHADER_VERTEX, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 + 1181bf215546Sopenharmony_ci graphics_pipeline->base.user_data_0[MESA_SHADER_VERTEX]) - 1182bf215546Sopenharmony_ci SI_SH_REG_OFFSET) >> 1183bf215546Sopenharmony_ci 2; 1184bf215546Sopenharmony_ci struct radv_dgc_params params = { 1185bf215546Sopenharmony_ci .cmd_buf_stride = cmd_stride, 1186bf215546Sopenharmony_ci .cmd_buf_size = cmd_buf_size, 1187bf215546Sopenharmony_ci .upload_addr = (uint32_t)upload_addr, 1188bf215546Sopenharmony_ci .upload_stride = upload_stride, 1189bf215546Sopenharmony_ci .sequence_count = pGeneratedCommandsInfo->sequencesCount, 1190bf215546Sopenharmony_ci .stream_stride = layout->input_stride, 1191bf215546Sopenharmony_ci .draw_indexed = layout->indexed, 1192bf215546Sopenharmony_ci .draw_params_offset = layout->draw_params_offset, 1193bf215546Sopenharmony_ci .base_index_size = 1194bf215546Sopenharmony_ci layout->binds_index_buffer ? 0 : radv_get_vgt_index_size(cmd_buffer->state.index_type), 1195bf215546Sopenharmony_ci .vtx_base_sgpr = vtx_base_sgpr, 1196bf215546Sopenharmony_ci .max_index_count = cmd_buffer->state.max_index_count, 1197bf215546Sopenharmony_ci .index_buffer_offset = layout->index_buffer_offset, 1198bf215546Sopenharmony_ci .vbo_reg = vbo_sgpr, 1199bf215546Sopenharmony_ci .ibo_type_32 = layout->ibo_type_32, 1200bf215546Sopenharmony_ci .ibo_type_8 = layout->ibo_type_8, 1201bf215546Sopenharmony_ci .emit_state = layout->binds_state, 1202bf215546Sopenharmony_ci .pa_su_sc_mode_cntl_base = radv_get_pa_su_sc_mode_cntl(cmd_buffer) & C_028814_FACE, 1203bf215546Sopenharmony_ci .state_offset = layout->state_offset, 1204bf215546Sopenharmony_ci }; 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci if (layout->bind_vbo_mask) { 1207bf215546Sopenharmony_ci radv_write_vertex_descriptors(cmd_buffer, graphics_pipeline, true, upload_data); 1208bf215546Sopenharmony_ci 1209bf215546Sopenharmony_ci uint32_t *vbo_info = (uint32_t *)((char *)upload_data + graphics_pipeline->vb_desc_alloc_size); 1210bf215546Sopenharmony_ci 1211bf215546Sopenharmony_ci struct radv_shader *vs_shader = radv_get_shader(&graphics_pipeline->base, MESA_SHADER_VERTEX); 1212bf215546Sopenharmony_ci const struct radv_vs_input_state *vs_state = 1213bf215546Sopenharmony_ci vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL; 1214bf215546Sopenharmony_ci uint32_t mask = graphics_pipeline->vb_desc_usage_mask; 1215bf215546Sopenharmony_ci unsigned idx = 0; 1216bf215546Sopenharmony_ci while (mask) { 1217bf215546Sopenharmony_ci unsigned i = u_bit_scan(&mask); 1218bf215546Sopenharmony_ci unsigned binding = 1219bf215546Sopenharmony_ci vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i] 1220bf215546Sopenharmony_ci : (graphics_pipeline->use_per_attribute_vb_descs ? graphics_pipeline->attrib_bindings[i] : i); 1221bf215546Sopenharmony_ci uint32_t attrib_end = 1222bf215546Sopenharmony_ci vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : graphics_pipeline->attrib_ends[i]; 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci params.vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx; 1225bf215546Sopenharmony_ci vbo_info[2 * idx] = ((graphics_pipeline->use_per_attribute_vb_descs ? 1u : 0u) << 31) | 1226bf215546Sopenharmony_ci (vs_state ? vs_state->offsets[i] << 16 : 0) | 1227bf215546Sopenharmony_ci layout->vbo_offsets[binding]; 1228bf215546Sopenharmony_ci vbo_info[2 * idx + 1] = graphics_pipeline->attrib_index_offset[i] | (attrib_end << 16); 1229bf215546Sopenharmony_ci ++idx; 1230bf215546Sopenharmony_ci } 1231bf215546Sopenharmony_ci params.vbo_cnt = idx | (vs_state ? DGC_DYNAMIC_VERTEX_INPUT : 0); 1232bf215546Sopenharmony_ci upload_data = (char *)upload_data + vb_size; 1233bf215546Sopenharmony_ci } 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_ci if (layout->push_constant_mask) { 1236bf215546Sopenharmony_ci uint32_t *desc = upload_data; 1237bf215546Sopenharmony_ci upload_data = (char *)upload_data + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12; 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ci unsigned idx = 0; 1240bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(graphics_pipeline->base.shaders); ++i) { 1241bf215546Sopenharmony_ci if (!graphics_pipeline->base.shaders[i]) 1242bf215546Sopenharmony_ci continue; 1243bf215546Sopenharmony_ci 1244bf215546Sopenharmony_ci struct radv_userdata_locations *locs = &graphics_pipeline->base.shaders[i]->info.user_sgprs_locs; 1245bf215546Sopenharmony_ci if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) 1246bf215546Sopenharmony_ci params.const_copy = 1; 1247bf215546Sopenharmony_ci 1248bf215546Sopenharmony_ci if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 || 1249bf215546Sopenharmony_ci locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) { 1250bf215546Sopenharmony_ci unsigned upload_sgpr = 0; 1251bf215546Sopenharmony_ci unsigned inline_sgpr = 0; 1252bf215546Sopenharmony_ci 1253bf215546Sopenharmony_ci if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) { 1254bf215546Sopenharmony_ci upload_sgpr = 1255bf215546Sopenharmony_ci (graphics_pipeline->base.user_data_0[i] + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx - 1256bf215546Sopenharmony_ci SI_SH_REG_OFFSET) >> 1257bf215546Sopenharmony_ci 2; 1258bf215546Sopenharmony_ci } 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_ci if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) { 1261bf215546Sopenharmony_ci inline_sgpr = (graphics_pipeline->base.user_data_0[i] + 1262bf215546Sopenharmony_ci 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx - 1263bf215546Sopenharmony_ci SI_SH_REG_OFFSET) >> 1264bf215546Sopenharmony_ci 2; 1265bf215546Sopenharmony_ci desc[idx * 3 + 1] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask; 1266bf215546Sopenharmony_ci desc[idx * 3 + 2] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask >> 32; 1267bf215546Sopenharmony_ci } 1268bf215546Sopenharmony_ci desc[idx * 3] = upload_sgpr | (inline_sgpr << 16); 1269bf215546Sopenharmony_ci ++idx; 1270bf215546Sopenharmony_ci } 1271bf215546Sopenharmony_ci } 1272bf215546Sopenharmony_ci 1273bf215546Sopenharmony_ci params.push_constant_shader_cnt = idx; 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci params.const_copy_size = graphics_pipeline->base.push_constant_size + 1276bf215546Sopenharmony_ci 16 * graphics_pipeline->base.dynamic_offset_count; 1277bf215546Sopenharmony_ci params.push_constant_mask = layout->push_constant_mask; 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_ci memcpy(upload_data, layout->push_constant_offsets, sizeof(layout->push_constant_offsets)); 1280bf215546Sopenharmony_ci upload_data = (char *)upload_data + sizeof(layout->push_constant_offsets); 1281bf215546Sopenharmony_ci 1282bf215546Sopenharmony_ci memcpy(upload_data, cmd_buffer->push_constants, graphics_pipeline->base.push_constant_size); 1283bf215546Sopenharmony_ci upload_data = (char *)upload_data + graphics_pipeline->base.push_constant_size; 1284bf215546Sopenharmony_ci 1285bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 1286bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, pGeneratedCommandsInfo->pipelineBindPoint); 1287bf215546Sopenharmony_ci memcpy(upload_data, descriptors_state->dynamic_buffers, 16 * graphics_pipeline->base.dynamic_offset_count); 1288bf215546Sopenharmony_ci upload_data = (char *)upload_data + 16 * graphics_pipeline->base.dynamic_offset_count; 1289bf215546Sopenharmony_ci } 1290bf215546Sopenharmony_ci 1291bf215546Sopenharmony_ci if (scissor_size) { 1292bf215546Sopenharmony_ci params.scissor_offset = (char*)upload_data - (char*)upload_data_base; 1293bf215546Sopenharmony_ci params.scissor_count = scissor_size / 4; 1294bf215546Sopenharmony_ci 1295bf215546Sopenharmony_ci struct radeon_cmdbuf scissor_cs = { 1296bf215546Sopenharmony_ci .buf = upload_data, 1297bf215546Sopenharmony_ci .cdw = 0, 1298bf215546Sopenharmony_ci .max_dw = scissor_size / 4 1299bf215546Sopenharmony_ci }; 1300bf215546Sopenharmony_ci 1301bf215546Sopenharmony_ci radv_write_scissors(cmd_buffer, &scissor_cs); 1302bf215546Sopenharmony_ci assert(scissor_cs.cdw * 4 == scissor_size); 1303bf215546Sopenharmony_ci upload_data = (char *)upload_data + scissor_size; 1304bf215546Sopenharmony_ci } 1305bf215546Sopenharmony_ci 1306bf215546Sopenharmony_ci VkWriteDescriptorSet ds_writes[5]; 1307bf215546Sopenharmony_ci VkDescriptorBufferInfo buf_info[ARRAY_SIZE(ds_writes)]; 1308bf215546Sopenharmony_ci int ds_cnt = 0; 1309bf215546Sopenharmony_ci buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&token_buffer), 1310bf215546Sopenharmony_ci .offset = 0, 1311bf215546Sopenharmony_ci .range = upload_size}; 1312bf215546Sopenharmony_ci ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1313bf215546Sopenharmony_ci .dstBinding = DGC_DESC_PARAMS, 1314bf215546Sopenharmony_ci .dstArrayElement = 0, 1315bf215546Sopenharmony_ci .descriptorCount = 1, 1316bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1317bf215546Sopenharmony_ci .pBufferInfo = &buf_info[ds_cnt]}; 1318bf215546Sopenharmony_ci ++ds_cnt; 1319bf215546Sopenharmony_ci 1320bf215546Sopenharmony_ci buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->preprocessBuffer, 1321bf215546Sopenharmony_ci .offset = pGeneratedCommandsInfo->preprocessOffset, 1322bf215546Sopenharmony_ci .range = pGeneratedCommandsInfo->preprocessSize}; 1323bf215546Sopenharmony_ci ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1324bf215546Sopenharmony_ci .dstBinding = DGC_DESC_PREPARE, 1325bf215546Sopenharmony_ci .dstArrayElement = 0, 1326bf215546Sopenharmony_ci .descriptorCount = 1, 1327bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1328bf215546Sopenharmony_ci .pBufferInfo = &buf_info[ds_cnt]}; 1329bf215546Sopenharmony_ci ++ds_cnt; 1330bf215546Sopenharmony_ci 1331bf215546Sopenharmony_ci if (pGeneratedCommandsInfo->streamCount > 0) { 1332bf215546Sopenharmony_ci buf_info[ds_cnt] = 1333bf215546Sopenharmony_ci (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->pStreams[0].buffer, 1334bf215546Sopenharmony_ci .offset = pGeneratedCommandsInfo->pStreams[0].offset, 1335bf215546Sopenharmony_ci .range = VK_WHOLE_SIZE}; 1336bf215546Sopenharmony_ci ds_writes[ds_cnt] = 1337bf215546Sopenharmony_ci (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1338bf215546Sopenharmony_ci .dstBinding = DGC_DESC_STREAM, 1339bf215546Sopenharmony_ci .dstArrayElement = 0, 1340bf215546Sopenharmony_ci .descriptorCount = 1, 1341bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1342bf215546Sopenharmony_ci .pBufferInfo = &buf_info[ds_cnt]}; 1343bf215546Sopenharmony_ci ++ds_cnt; 1344bf215546Sopenharmony_ci } 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_ci if (pGeneratedCommandsInfo->sequencesCountBuffer != VK_NULL_HANDLE) { 1347bf215546Sopenharmony_ci buf_info[ds_cnt] = 1348bf215546Sopenharmony_ci (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->sequencesCountBuffer, 1349bf215546Sopenharmony_ci .offset = pGeneratedCommandsInfo->sequencesCountOffset, 1350bf215546Sopenharmony_ci .range = VK_WHOLE_SIZE}; 1351bf215546Sopenharmony_ci ds_writes[ds_cnt] = 1352bf215546Sopenharmony_ci (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1353bf215546Sopenharmony_ci .dstBinding = DGC_DESC_COUNT, 1354bf215546Sopenharmony_ci .dstArrayElement = 0, 1355bf215546Sopenharmony_ci .descriptorCount = 1, 1356bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1357bf215546Sopenharmony_ci .pBufferInfo = &buf_info[ds_cnt]}; 1358bf215546Sopenharmony_ci ++ds_cnt; 1359bf215546Sopenharmony_ci params.sequence_count = UINT32_MAX; 1360bf215546Sopenharmony_ci } 1361bf215546Sopenharmony_ci 1362bf215546Sopenharmony_ci radv_meta_save( 1363bf215546Sopenharmony_ci &saved_state, cmd_buffer, 1364bf215546Sopenharmony_ci RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); 1365bf215546Sopenharmony_ci 1366bf215546Sopenharmony_ci radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1367bf215546Sopenharmony_ci cmd_buffer->device->meta_state.dgc_prepare.pipeline); 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1370bf215546Sopenharmony_ci cmd_buffer->device->meta_state.dgc_prepare.p_layout, 1371bf215546Sopenharmony_ci VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), ¶ms); 1372bf215546Sopenharmony_ci 1373bf215546Sopenharmony_ci radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 1374bf215546Sopenharmony_ci cmd_buffer->device->meta_state.dgc_prepare.p_layout, 0, ds_cnt, 1375bf215546Sopenharmony_ci ds_writes); 1376bf215546Sopenharmony_ci 1377bf215546Sopenharmony_ci unsigned block_count = MAX2(1, round_up_u32(pGeneratedCommandsInfo->sequencesCount, 64)); 1378bf215546Sopenharmony_ci radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); 1379bf215546Sopenharmony_ci 1380bf215546Sopenharmony_ci radv_buffer_finish(&token_buffer); 1381bf215546Sopenharmony_ci radv_meta_restore(&saved_state, cmd_buffer); 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 1384bf215546Sopenharmony_ci RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2; 1385bf215546Sopenharmony_ci}