1/* 2 * Copyright © 2021 Google 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "radv_meta.h" 25#include "radv_private.h" 26 27#include "nir_builder.h" 28 29static void 30radv_get_sequence_size(const struct radv_indirect_command_layout *layout, 31 const struct radv_graphics_pipeline *pipeline, uint32_t *cmd_size, 32 uint32_t *upload_size) 33{ 34 *cmd_size = 0; 35 *upload_size = 0; 36 37 if (layout->bind_vbo_mask) { 38 *upload_size += 16 * util_bitcount(pipeline->vb_desc_usage_mask); 39 40 /* One PKT3_SET_SH_REG for emitting VBO pointer (32-bit) */ 41 *cmd_size += 3 * 4; 42 } 43 44 if (layout->push_constant_mask) { 45 bool need_copy = false; 46 47 for (unsigned i = 0; i < ARRAY_SIZE(pipeline->base.shaders); ++i) { 48 if (!pipeline->base.shaders[i]) 49 continue; 50 51 struct radv_userdata_locations *locs = &pipeline->base.shaders[i]->info.user_sgprs_locs; 52 if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) { 53 /* One PKT3_SET_SH_REG for emitting push constants pointer (32-bit) */ 54 *cmd_size += 3 * 4; 55 need_copy = true; 56 } 57 if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) 58 /* One PKT3_SET_SH_REG writing all inline push constants. */ 59 *cmd_size += (2 + locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].num_sgprs) * 4; 60 } 61 if (need_copy) 62 *upload_size += 63 align(pipeline->base.push_constant_size + 16 * pipeline->base.dynamic_offset_count, 16); 64 } 65 66 if (layout->binds_index_buffer) { 67 /* Index type write (normal reg write) + index buffer base write (64-bits, but special packet 68 * so only 1 word overhead) + index buffer size (again, special packet so only 1 word 69 * overhead) 70 */ 71 *cmd_size += (3 + 3 + 2) * 4; 72 } 73 74 if (layout->indexed) { 75 /* userdata writes + instance count + indexed draw */ 76 *cmd_size += (5 + 2 + 5) * 4; 77 } else { 78 /* userdata writes + instance count + non-indexed draw */ 79 *cmd_size += (5 + 2 + 3) * 4; 80 } 81 82 if (layout->binds_state) { 83 /* One PKT3_SET_CONTEXT_REG (PA_SU_SC_MODE_CNTL) */ 84 *cmd_size += 3 * 4; 85 86 if (pipeline->base.device->physical_device->rad_info.has_gfx9_scissor_bug) { 87 /* 1 reg write of 4 regs + 1 reg write of 2 regs per scissor */ 88 *cmd_size += (8 + 2 * MAX_SCISSORS) * 4; 89 } 90 } 91} 92 93static uint32_t 94radv_align_cmdbuf_size(uint32_t size) 95{ 96 return align(MAX2(1, size), 256); 97} 98 99uint32_t 100radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info) 101{ 102 VK_FROM_HANDLE(radv_indirect_command_layout, layout, cmd_info->indirectCommandsLayout); 103 VK_FROM_HANDLE(radv_pipeline, pipeline, cmd_info->pipeline); 104 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); 105 106 uint32_t cmd_size, upload_size; 107 radv_get_sequence_size(layout, graphics_pipeline, &cmd_size, &upload_size); 108 return radv_align_cmdbuf_size(cmd_size * cmd_info->sequencesCount); 109} 110 111enum radv_dgc_token_type { 112 RADV_DGC_INDEX_BUFFER, 113 RADV_DGC_DRAW, 114 RADV_DGC_INDEXED_DRAW, 115}; 116 117struct radv_dgc_token { 118 uint16_t type; /* enum radv_dgc_token_type, but making the size explicit */ 119 uint16_t offset; /* offset in the input stream */ 120 union { 121 struct { 122 uint16_t vtx_base_sgpr; 123 } draw; 124 struct { 125 uint16_t index_size; 126 uint16_t vtx_base_sgpr; 127 uint32_t max_index_count; 128 } indexed_draw; 129 }; 130}; 131 132struct radv_dgc_params { 133 uint32_t cmd_buf_stride; 134 uint32_t cmd_buf_size; 135 uint32_t upload_stride; 136 uint32_t upload_addr; 137 uint32_t sequence_count; 138 uint32_t stream_stride; 139 140 /* draw info */ 141 uint16_t draw_indexed; 142 uint16_t draw_params_offset; 143 uint16_t base_index_size; 144 uint16_t vtx_base_sgpr; 145 uint32_t max_index_count; 146 147 /* bind index buffer info. Valid if base_index_size == 0 && draw_indexed */ 148 uint16_t index_buffer_offset; 149 150 /* Top bit is DGC_DYNAMIC_VERTEX_INPUT */ 151 uint8_t vbo_cnt; 152 153 uint8_t const_copy; 154 155 /* Which VBOs are set in this indirect layout. */ 156 uint32_t vbo_bind_mask; 157 158 uint16_t vbo_reg; 159 uint16_t const_copy_size; 160 161 uint64_t push_constant_mask; 162 163 uint32_t ibo_type_32; 164 uint32_t ibo_type_8; 165 166 uint16_t push_constant_shader_cnt; 167 168 uint16_t emit_state; 169 uint32_t pa_su_sc_mode_cntl_base; 170 uint16_t state_offset; 171 uint16_t scissor_count; 172 uint16_t scissor_offset; /* in parameter buffer. */ 173}; 174 175enum { 176 DGC_USES_DRAWID = 1u << 14, 177 DGC_USES_BASEINSTANCE = 1u << 15, 178}; 179 180enum { 181 DGC_DYNAMIC_STRIDE = 1u << 15, 182}; 183 184enum { 185 DGC_DYNAMIC_VERTEX_INPUT = 1u << 7, 186}; 187 188enum { 189 DGC_DESC_STREAM, 190 DGC_DESC_PREPARE, 191 DGC_DESC_PARAMS, 192 DGC_DESC_COUNT, 193 DGC_NUM_DESCS, 194}; 195 196struct dgc_cmdbuf { 197 nir_ssa_def *descriptor; 198 nir_variable *offset; 199}; 200 201static void 202dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value) 203{ 204 assert(value->bit_size >= 32); 205 nir_ssa_def *offset = nir_load_var(b, cs->offset); 206 nir_store_ssbo(b, value, cs->descriptor, offset,.access = ACCESS_NON_READABLE); 207 nir_store_var(b, cs->offset, nir_iadd_imm(b, offset, value->num_components * value->bit_size / 8), 0x1); 208} 209 210 211#define load_param32(b, field) \ 212 nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ 213 .base = offsetof(struct radv_dgc_params, field), .range = 4) 214 215#define load_param16(b, field) \ 216 nir_ubfe( \ 217 (b), \ 218 nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ 219 .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \ 220 nir_imm_int((b), (offsetof(struct radv_dgc_params, field) & 2) * 8), nir_imm_int((b), 16)) 221 222#define load_param8(b, field) \ 223 nir_ubfe( \ 224 (b), \ 225 nir_load_push_constant((b), 1, 32, nir_imm_int((b), 0), \ 226 .base = (offsetof(struct radv_dgc_params, field) & ~3), .range = 4), \ 227 nir_imm_int((b), (offsetof(struct radv_dgc_params, field) & 3) * 8), nir_imm_int((b), 8)) 228 229#define load_param64(b, field) \ 230 nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \ 231 .base = offsetof(struct radv_dgc_params, field), .range = 8)) 232 233static nir_ssa_def * 234nir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len) 235{ 236 len = nir_iand_imm(b, len, 0x3fff); 237 return nir_ior_imm(b, nir_ishl_imm(b, len, 16), PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op)); 238} 239 240static void 241dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr, 242 nir_ssa_def *first_vertex, nir_ssa_def *first_instance, nir_ssa_def *drawid) 243{ 244 vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr); 245 nir_ssa_def *has_drawid = 246 nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID); 247 nir_ssa_def *has_baseinstance = 248 nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE); 249 250 nir_ssa_def *pkt_cnt = nir_imm_int(b, 1); 251 pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt); 252 pkt_cnt = nir_bcsel(b, has_baseinstance, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt); 253 254 nir_ssa_def *values[5] = { 255 nir_pkt3(b, PKT3_SET_SH_REG, pkt_cnt), nir_iand_imm(b, vtx_base_sgpr, 0x3FFF), first_vertex, 256 nir_imm_int(b, PKT3_NOP_PAD), nir_imm_int(b, PKT3_NOP_PAD), 257 }; 258 259 values[3] = nir_bcsel(b, nir_ior(b, has_drawid, has_baseinstance), 260 nir_bcsel(b, has_drawid, drawid, first_instance), values[4]); 261 values[4] = nir_bcsel(b, nir_iand(b, has_drawid, has_baseinstance), first_instance, values[4]); 262 263 dgc_emit(b, cs, nir_vec(b, values, 5)); 264} 265 266static void 267dgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *instance_count) 268{ 269 nir_ssa_def *values[2] = {nir_imm_int(b, PKT3(PKT3_NUM_INSTANCES, 0, false)), instance_count}; 270 271 dgc_emit(b, cs, nir_vec(b, values, 2)); 272} 273 274static void 275dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset, 276 nir_ssa_def *index_count, nir_ssa_def *max_index_count) 277{ 278 nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), 279 max_index_count, index_offset, index_count, 280 nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)}; 281 282 dgc_emit(b, cs, nir_vec(b, values, 5)); 283} 284 285static void 286dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vertex_count) 287{ 288 nir_ssa_def *values[3] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_AUTO, 1, false)), vertex_count, 289 nir_imm_int(b, V_0287F0_DI_SRC_SEL_AUTO_INDEX)}; 290 291 dgc_emit(b, cs, nir_vec(b, values, 3)); 292} 293 294static void 295build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count) 296{ 297 nir_ssa_def *global_id = get_global_ids(b, 1); 298 299 nir_ssa_def *cmd_buf_stride = load_param32(b, cmd_buf_stride); 300 nir_ssa_def *cmd_buf_size = load_param32(b, cmd_buf_size); 301 302 nir_push_if(b, nir_ieq_imm(b, global_id, 0)); 303 { 304 nir_ssa_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count); 305 306 nir_variable *offset = 307 nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset"); 308 nir_store_var(b, offset, cmd_buf_tail_start, 0x1); 309 310 nir_ssa_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE); 311 nir_push_loop(b); 312 { 313 nir_ssa_def *curr_offset = nir_load_var(b, offset); 314 const unsigned MAX_PACKET_WORDS = 0x3FFC; 315 316 nir_push_if(b, nir_ieq(b, curr_offset, cmd_buf_size)); 317 { 318 nir_jump(b, nir_jump_break); 319 } 320 nir_pop_if(b, NULL); 321 322 nir_ssa_def *packet_size = nir_isub(b, cmd_buf_size, curr_offset); 323 packet_size = nir_umin(b, packet_size, nir_imm_int(b, MAX_PACKET_WORDS * 4)); 324 325 nir_ssa_def *len = nir_ushr_imm(b, packet_size, 2); 326 len = nir_iadd_imm(b, len, -2); 327 nir_ssa_def *packet = nir_pkt3(b, PKT3_NOP, len); 328 329 nir_store_ssbo(b, packet, dst_buf, curr_offset, .access = ACCESS_NON_READABLE); 330 nir_store_var(b, offset, nir_iadd(b, curr_offset, packet_size), 0x1); 331 } 332 nir_pop_loop(b, NULL); 333 } 334 nir_pop_if(b, NULL); 335} 336 337static nir_shader * 338build_dgc_prepare_shader(struct radv_device *dev) 339{ 340 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare"); 341 b.shader->info.workgroup_size[0] = 64; 342 343 nir_ssa_def *global_id = get_global_ids(&b, 1); 344 345 nir_ssa_def *sequence_id = global_id; 346 347 nir_ssa_def *cmd_buf_stride = load_param32(&b, cmd_buf_stride); 348 nir_ssa_def *sequence_count = load_param32(&b, sequence_count); 349 nir_ssa_def *stream_stride = load_param32(&b, stream_stride); 350 351 nir_variable *count_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "sequence_count"); 352 nir_store_var(&b, count_var, sequence_count, 0x1); 353 354 nir_push_if(&b, nir_ieq_imm(&b, sequence_count, UINT32_MAX)); 355 { 356 nir_ssa_def *count_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_COUNT); 357 nir_ssa_def *cnt = nir_load_ssbo(&b, 1, 32, count_buf, nir_imm_int(&b, 0), .align_mul = 4); 358 nir_store_var(&b, count_var, cnt, 0x1); 359 } 360 nir_pop_if(&b, NULL); 361 362 sequence_count = nir_load_var(&b, count_var); 363 364 nir_push_if(&b, nir_ult(&b, sequence_id, sequence_count)); 365 { 366 struct dgc_cmdbuf cmd_buf = { 367 .descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE), 368 .offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"), 369 }; 370 nir_store_var(&b, cmd_buf.offset, nir_imul(&b, global_id, cmd_buf_stride), 1); 371 nir_ssa_def *cmd_buf_end = nir_iadd(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_stride); 372 373 nir_ssa_def *stream_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_STREAM); 374 nir_ssa_def *stream_base = nir_imul(&b, sequence_id, stream_stride); 375 376 nir_variable *upload_offset = 377 nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "upload_offset"); 378 nir_store_var(&b, upload_offset, 379 nir_iadd(&b, load_param32(&b, cmd_buf_size), 380 nir_imul(&b, load_param32(&b, upload_stride), sequence_id)), 381 0x1); 382 383 nir_ssa_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask); 384 nir_ssa_def *vbo_cnt = nir_iand_imm(&b, load_param8(&b, vbo_cnt), 0x7F); 385 nir_push_if(&b, nir_ine_imm(&b, vbo_bind_mask, 0)); 386 { 387 nir_variable *vbo_idx = 388 nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx"); 389 nir_store_var(&b, vbo_idx, nir_imm_int(&b, 0), 0x1); 390 391 nir_push_loop(&b); 392 { 393 nir_push_if(&b, nir_uge(&b, nir_load_var(&b, vbo_idx), vbo_cnt)); 394 { 395 nir_jump(&b, nir_jump_break); 396 } 397 nir_pop_if(&b, NULL); 398 399 nir_ssa_def *vbo_offset = nir_imul_imm(&b, nir_load_var(&b, vbo_idx), 16); 400 nir_variable *vbo_data = 401 nir_variable_create(b.shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data"); 402 403 nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS); 404 nir_store_var(&b, vbo_data, 405 nir_load_ssbo(&b, 4, 32, param_buf, vbo_offset, .align_mul = 4), 0xf); 406 407 nir_ssa_def *vbo_override = 408 nir_ine_imm(&b, 409 nir_iand(&b, vbo_bind_mask, 410 nir_ishl(&b, nir_imm_int(&b, 1), nir_load_var(&b, vbo_idx))), 411 0); 412 nir_push_if(&b, vbo_override); 413 { 414 nir_ssa_def *vbo_offset_offset = 415 nir_iadd(&b, nir_imul_imm(&b, vbo_cnt, 16), 416 nir_imul_imm(&b, nir_load_var(&b, vbo_idx), 8)); 417 nir_ssa_def *vbo_over_data = 418 nir_load_ssbo(&b, 2, 32, param_buf, vbo_offset_offset, .align_mul = 4); 419 nir_ssa_def *stream_offset = nir_iadd( 420 &b, stream_base, nir_iand_imm(&b, nir_channel(&b, vbo_over_data, 0), 0x7FFF)); 421 nir_ssa_def *stream_data = 422 nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4); 423 424 nir_ssa_def *va = nir_pack_64_2x32(&b, nir_channels(&b, stream_data, 0x3)); 425 nir_ssa_def *size = nir_channel(&b, stream_data, 2); 426 nir_ssa_def *stride = nir_channel(&b, stream_data, 3); 427 428 nir_ssa_def *vs_state_offset = nir_ubfe(&b, nir_channel(&b, vbo_over_data, 0), nir_imm_int(&b, 16), nir_imm_int(&b, 15)); 429 va = nir_iadd(&b, va, nir_u2u64(&b, vs_state_offset)); 430 431 nir_ssa_def *dyn_stride = nir_test_mask(&b, nir_channel(&b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE); 432 nir_ssa_def *old_stride = 433 nir_ubfe(&b, nir_channel(&b, nir_load_var(&b, vbo_data), 1), nir_imm_int(&b, 16), 434 nir_imm_int(&b, 14)); 435 stride = nir_bcsel(&b, dyn_stride, stride, old_stride); 436 437 nir_ssa_def *use_per_attribute_vb_descs = 438 nir_test_mask(&b, nir_channel(&b, vbo_over_data, 0), 1u << 31); 439 nir_variable *num_records = nir_variable_create(b.shader, nir_var_shader_temp, 440 glsl_uint_type(), "num_records"); 441 nir_store_var(&b, num_records, size, 0x1); 442 443 nir_push_if(&b, use_per_attribute_vb_descs); 444 { 445 nir_ssa_def *attrib_end = nir_ubfe(&b, nir_channel(&b, vbo_over_data, 1), 446 nir_imm_int(&b, 16), nir_imm_int(&b, 16)); 447 nir_ssa_def *attrib_index_offset = 448 nir_ubfe(&b, nir_channel(&b, vbo_over_data, 1), nir_imm_int(&b, 0), 449 nir_imm_int(&b, 16)); 450 451 nir_push_if(&b, nir_ult(&b, nir_load_var(&b, num_records), attrib_end)); 452 { 453 nir_store_var(&b, num_records, nir_imm_int(&b, 0), 0x1); 454 } 455 nir_push_else(&b, NULL); 456 nir_push_if(&b, nir_ieq_imm(&b, stride, 0)); 457 { 458 nir_store_var(&b, num_records, nir_imm_int(&b, 1), 0x1); 459 } 460 nir_push_else(&b, NULL); 461 { 462 nir_ssa_def *r = nir_iadd( 463 &b, 464 nir_iadd_imm( 465 &b, 466 nir_udiv(&b, nir_isub(&b, nir_load_var(&b, num_records), attrib_end), 467 stride), 468 1), 469 attrib_index_offset); 470 nir_store_var(&b, num_records, r, 0x1); 471 } 472 nir_pop_if(&b, NULL); 473 nir_pop_if(&b, NULL); 474 475 nir_ssa_def *convert_cond = 476 nir_ine_imm(&b, nir_load_var(&b, num_records), 0); 477 if (dev->physical_device->rad_info.gfx_level == GFX9) 478 convert_cond = nir_imm_bool(&b, false); 479 else if (dev->physical_device->rad_info.gfx_level != GFX8) 480 convert_cond = 481 nir_iand(&b, convert_cond, nir_ieq_imm(&b, stride, 0)); 482 483 nir_ssa_def *new_records = nir_iadd( 484 &b, nir_imul(&b, nir_iadd_imm(&b, nir_load_var(&b, num_records), -1), stride), 485 attrib_end); 486 new_records = 487 nir_bcsel(&b, convert_cond, new_records, nir_load_var(&b, num_records)); 488 nir_store_var(&b, num_records, new_records, 0x1); 489 } 490 nir_push_else(&b, NULL); 491 { 492 if (dev->physical_device->rad_info.gfx_level != GFX8) { 493 nir_push_if(&b, nir_ine_imm(&b, stride, 0)); 494 { 495 nir_ssa_def *r = nir_iadd(&b, nir_load_var(&b, num_records), 496 nir_iadd_imm(&b, stride, -1)); 497 nir_store_var(&b, num_records, nir_udiv(&b, r, stride), 0x1); 498 } 499 nir_pop_if(&b, NULL); 500 } 501 } 502 nir_pop_if(&b, NULL); 503 504 nir_ssa_def *rsrc_word3 = nir_channel(&b, nir_load_var(&b, vbo_data), 3); 505 if (dev->physical_device->rad_info.gfx_level >= GFX10) { 506 nir_ssa_def *oob_select = nir_bcsel( 507 &b, nir_ieq_imm(&b, stride, 0), nir_imm_int(&b, V_008F0C_OOB_SELECT_RAW), 508 nir_imm_int(&b, V_008F0C_OOB_SELECT_STRUCTURED)); 509 rsrc_word3 = nir_iand_imm(&b, rsrc_word3, C_008F0C_OOB_SELECT); 510 rsrc_word3 = nir_ior(&b, rsrc_word3, nir_ishl_imm(&b, oob_select, 28)); 511 } 512 513 nir_ssa_def *va_hi = nir_iand_imm(&b, nir_unpack_64_2x32_split_y(&b, va), 0xFFFF); 514 stride = nir_iand_imm(&b, stride, 0x3FFF); 515 nir_ssa_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(&b, va), 516 nir_ior(&b, nir_ishl_imm(&b, stride, 16), va_hi), 517 nir_load_var(&b, num_records), rsrc_word3}; 518 nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf); 519 } 520 nir_pop_if(&b, NULL); 521 522 /* On GFX9, it seems bounds checking is disabled if both 523 * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and 524 * GFX10.3 but it doesn't hurt. 525 */ 526 nir_ssa_def *num_records = nir_channel(&b, nir_load_var(&b, vbo_data), 2); 527 nir_ssa_def *buf_va = nir_iand_imm( 528 &b, nir_pack_64_2x32(&b, nir_channels(&b, nir_load_var(&b, vbo_data), 0x3)), 529 (1ull << 48) - 1ull); 530 nir_push_if(&b, 531 nir_ior(&b, nir_ieq_imm(&b, num_records, 0), nir_ieq_imm(&b, buf_va, 0))); 532 { 533 nir_ssa_def *use_dynamic_vertex_input = 534 nir_test_mask(&b, load_param8(&b, vbo_cnt), DGC_DYNAMIC_VERTEX_INPUT); 535 536 nir_push_if(&b, use_dynamic_vertex_input); 537 { 538 nir_ssa_def *new_vbo_data[4] = { 539 nir_imm_int(&b, 0), nir_imm_int(&b, S_008F04_STRIDE(16)), nir_imm_int(&b, 0), 540 nir_channel(&b, nir_load_var(&b, vbo_data), 3)}; 541 nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf); 542 } 543 nir_push_else(&b, NULL); 544 { 545 nir_ssa_def *new_vbo_data[4] = {nir_imm_int(&b, 0), nir_imm_int(&b, 0), 546 nir_imm_int(&b, 0), nir_imm_int(&b, 0)}; 547 nir_store_var(&b, vbo_data, nir_vec(&b, new_vbo_data, 4), 0xf); 548 } 549 nir_pop_if(&b, NULL); 550 } 551 nir_pop_if(&b, NULL); 552 553 nir_ssa_def *upload_off = nir_iadd(&b, nir_load_var(&b, upload_offset), vbo_offset); 554 nir_store_ssbo(&b, nir_load_var(&b, vbo_data), cmd_buf.descriptor, upload_off, .access = ACCESS_NON_READABLE); 555 nir_store_var(&b, vbo_idx, nir_iadd_imm(&b, nir_load_var(&b, vbo_idx), 1), 0x1); 556 } 557 nir_pop_loop(&b, NULL); 558 nir_ssa_def *packet[3] = { 559 nir_imm_int(&b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(&b, vbo_reg), 560 nir_iadd(&b, load_param32(&b, upload_addr), nir_load_var(&b, upload_offset))}; 561 562 dgc_emit(&b, &cmd_buf, nir_vec(&b, packet, 3)); 563 564 nir_store_var(&b, upload_offset, 565 nir_iadd(&b, nir_load_var(&b, upload_offset), nir_imul_imm(&b, vbo_cnt, 16)), 566 0x1); 567 } 568 nir_pop_if(&b, NULL); 569 570 571 nir_ssa_def *push_const_mask = load_param64(&b, push_constant_mask); 572 nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0)); 573 { 574 nir_ssa_def *const_copy = nir_ine_imm(&b, load_param8(&b, const_copy), 0); 575 nir_ssa_def *const_copy_size = load_param16(&b, const_copy_size); 576 nir_ssa_def *const_copy_words = nir_ushr_imm(&b, const_copy_size, 2); 577 const_copy_words = nir_bcsel(&b, const_copy, const_copy_words, nir_imm_int(&b, 0)); 578 579 nir_variable *idx = 580 nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx"); 581 nir_store_var(&b, idx, nir_imm_int(&b, 0), 0x1); 582 583 nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS); 584 nir_ssa_def *param_offset = nir_imul_imm(&b, vbo_cnt, 24); 585 nir_ssa_def *param_offset_offset = nir_iadd_imm(&b, param_offset, MESA_VULKAN_SHADER_STAGES * 12); 586 nir_ssa_def *param_const_offset = nir_iadd_imm(&b, param_offset, MAX_PUSH_CONSTANTS_SIZE + MESA_VULKAN_SHADER_STAGES * 12); 587 nir_push_loop(&b); 588 { 589 nir_ssa_def *cur_idx = nir_load_var(&b, idx); 590 nir_push_if(&b, nir_uge(&b, cur_idx, const_copy_words)); 591 { 592 nir_jump(&b, nir_jump_break); 593 } 594 nir_pop_if(&b, NULL); 595 596 nir_variable *data = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "copy_data"); 597 598 nir_ssa_def *update = nir_iand(&b, push_const_mask, nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx)); 599 update = nir_bcsel( 600 &b, nir_ult(&b, cur_idx, nir_imm_int(&b, 64 /* bits in push_const_mask */)), update, 601 nir_imm_int64(&b, 0)); 602 603 nir_push_if(&b, nir_ine_imm(&b, update, 0)); 604 { 605 nir_ssa_def *stream_offset = nir_load_ssbo( 606 &b, 1, 32, param_buf, 607 nir_iadd(&b, param_offset_offset, nir_ishl_imm(&b, cur_idx, 2)), .align_mul = 4); 608 nir_ssa_def *new_data = nir_load_ssbo(&b, 1, 32, stream_buf, nir_iadd(&b, stream_base, stream_offset), .align_mul = 4); 609 nir_store_var(&b, data, new_data, 0x1); 610 } 611 nir_push_else(&b, NULL); 612 { 613 nir_store_var( 614 &b, data, 615 nir_load_ssbo(&b, 1, 32, param_buf, 616 nir_iadd(&b, param_const_offset, nir_ishl_imm(&b, cur_idx, 2)), 617 .align_mul = 4), 618 0x1); 619 } 620 nir_pop_if(&b, NULL); 621 622 nir_store_ssbo( 623 &b, nir_load_var(&b, data), cmd_buf.descriptor, 624 nir_iadd(&b, nir_load_var(&b, upload_offset), nir_ishl_imm(&b, cur_idx, 2)), 625 .access = ACCESS_NON_READABLE); 626 627 nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1); 628 } 629 nir_pop_loop(&b, NULL); 630 631 nir_variable *shader_idx = 632 nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx"); 633 nir_store_var(&b, shader_idx, nir_imm_int(&b, 0), 0x1); 634 nir_ssa_def *shader_cnt = load_param16(&b, push_constant_shader_cnt); 635 636 nir_push_loop(&b); 637 { 638 nir_ssa_def *cur_shader_idx = nir_load_var(&b, shader_idx); 639 nir_push_if(&b, nir_uge(&b, cur_shader_idx, shader_cnt)); 640 { 641 nir_jump(&b, nir_jump_break); 642 } 643 nir_pop_if(&b, NULL); 644 645 nir_ssa_def *reg_info = nir_load_ssbo(&b, 3, 32, param_buf, nir_iadd(&b, param_offset, nir_imul_imm(&b, cur_shader_idx, 12)), .align_mul = 4); 646 nir_ssa_def *upload_sgpr = nir_ubfe(&b, nir_channel(&b, reg_info, 0), nir_imm_int(&b, 0), nir_imm_int(&b, 16)); 647 nir_ssa_def *inline_sgpr = nir_ubfe(&b, nir_channel(&b, reg_info, 0), nir_imm_int(&b, 16), nir_imm_int(&b, 16)); 648 nir_ssa_def *inline_mask = nir_pack_64_2x32(&b, nir_channels(&b, reg_info, 0x6)); 649 650 nir_push_if(&b, nir_ine_imm(&b, upload_sgpr, 0)); 651 { 652 nir_ssa_def *pkt[3] = { 653 nir_imm_int(&b, PKT3(PKT3_SET_SH_REG, 1, 0)), 654 upload_sgpr, 655 nir_iadd(&b, load_param32(&b, upload_addr), nir_load_var(&b, upload_offset)) 656 }; 657 658 dgc_emit(&b, &cmd_buf, nir_vec(&b, pkt, 3)); 659 } 660 nir_pop_if(&b, NULL); 661 662 nir_push_if(&b, nir_ine_imm(&b, inline_sgpr, 0)); 663 { 664 nir_ssa_def *inline_len = nir_bit_count(&b, inline_mask); 665 nir_store_var(&b, idx, nir_imm_int(&b, 0), 0x1); 666 667 nir_ssa_def *pkt[2] = { 668 nir_pkt3(&b, PKT3_SET_SH_REG, inline_len), 669 inline_sgpr 670 }; 671 672 dgc_emit(&b, &cmd_buf, nir_vec(&b, pkt, 2)); 673 674 nir_push_loop(&b); 675 { 676 nir_ssa_def *cur_idx = nir_load_var(&b, idx); 677 nir_push_if(&b, 678 nir_uge(&b, cur_idx, nir_imm_int(&b, 64 /* bits in inline_mask */))); 679 { 680 nir_jump(&b, nir_jump_break); 681 } 682 nir_pop_if(&b, NULL); 683 684 nir_ssa_def *l = nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx); 685 nir_push_if(&b, nir_ieq_imm(&b, nir_iand(&b, l, inline_mask), 0)); 686 { 687 nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1); 688 nir_jump(&b, nir_jump_continue); 689 } 690 nir_pop_if(&b, NULL); 691 692 nir_variable *data = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "copy_data"); 693 694 nir_ssa_def *update = nir_iand(&b, push_const_mask, nir_ishl(&b, nir_imm_int64(&b, 1), cur_idx)); 695 update = nir_bcsel( 696 &b, nir_ult(&b, cur_idx, nir_imm_int(&b, 64 /* bits in push_const_mask */)), 697 update, nir_imm_int64(&b, 0)); 698 699 nir_push_if(&b, nir_ine_imm(&b, update, 0)); 700 { 701 nir_ssa_def *stream_offset = nir_load_ssbo( 702 &b, 1, 32, param_buf, 703 nir_iadd(&b, param_offset_offset, nir_ishl_imm(&b, cur_idx, 2)), 704 .align_mul = 4); 705 nir_ssa_def *new_data = nir_load_ssbo(&b, 1, 32, stream_buf, nir_iadd(&b, stream_base, stream_offset), .align_mul = 4); 706 nir_store_var(&b, data, new_data, 0x1); 707 } 708 nir_push_else(&b, NULL); 709 { 710 nir_store_var(&b, data, 711 nir_load_ssbo(&b, 1, 32, param_buf, 712 nir_iadd(&b, param_const_offset, 713 nir_ishl_imm(&b, cur_idx, 2)), 714 .align_mul = 4), 715 0x1); 716 } 717 nir_pop_if(&b, NULL); 718 719 dgc_emit(&b, &cmd_buf, nir_load_var(&b, data)); 720 721 nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 0x1); 722 } 723 nir_pop_loop(&b, NULL); 724 } 725 nir_pop_if(&b, NULL); 726 nir_store_var(&b, shader_idx, nir_iadd_imm(&b, cur_shader_idx, 1), 0x1); 727 } 728 nir_pop_loop(&b, NULL); 729 } 730 nir_pop_if(&b, 0); 731 732 nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, emit_state), 1)); 733 { 734 nir_ssa_def *stream_offset = nir_iadd(&b, load_param16(&b, state_offset), stream_base); 735 nir_ssa_def *state = nir_load_ssbo(&b, 1, 32, stream_buf, stream_offset, .align_mul = 4); 736 state = nir_iand_imm(&b, state, 1); 737 738 nir_ssa_def *reg = 739 nir_ior(&b, load_param32(&b, pa_su_sc_mode_cntl_base), nir_ishl_imm(&b, state, 2)); 740 741 nir_ssa_def *cmd_values[3] = { 742 nir_imm_int(&b, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)), 743 nir_imm_int(&b, (R_028814_PA_SU_SC_MODE_CNTL - SI_CONTEXT_REG_OFFSET) >> 2), reg}; 744 745 dgc_emit(&b, &cmd_buf, nir_vec(&b, cmd_values, 3)); 746 } 747 nir_pop_if(&b, NULL); 748 749 nir_ssa_def *scissor_count = load_param16(&b, scissor_count); 750 nir_push_if(&b, nir_ine_imm(&b, scissor_count, 0)); 751 { 752 nir_ssa_def *scissor_offset = load_param16(&b, scissor_offset); 753 nir_variable *idx = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), 754 "scissor_copy_idx"); 755 nir_store_var(&b, idx, nir_imm_int(&b, 0), 1); 756 757 nir_push_loop(&b); 758 { 759 nir_ssa_def *cur_idx = nir_load_var(&b, idx); 760 nir_push_if(&b, nir_uge(&b, cur_idx, scissor_count)); 761 { 762 nir_jump(&b, nir_jump_break); 763 } 764 nir_pop_if(&b, NULL); 765 766 nir_ssa_def *param_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_PARAMS); 767 nir_ssa_def *param_offset = nir_iadd(&b, scissor_offset, nir_imul_imm(&b, cur_idx, 4)); 768 nir_ssa_def *value = nir_load_ssbo(&b, 1, 32, param_buf, param_offset, .align_mul = 4); 769 770 dgc_emit(&b, &cmd_buf, value); 771 772 nir_store_var(&b, idx, nir_iadd_imm(&b, cur_idx, 1), 1); 773 } 774 nir_pop_loop(&b, NULL); 775 } 776 nir_pop_if(&b, NULL); 777 778 nir_push_if(&b, nir_ieq_imm(&b, load_param16(&b, draw_indexed), 0)); 779 { 780 nir_ssa_def *vtx_base_sgpr = load_param16(&b, vtx_base_sgpr); 781 nir_ssa_def *stream_offset = 782 nir_iadd(&b, load_param16(&b, draw_params_offset), stream_base); 783 784 nir_ssa_def *draw_data0 = 785 nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4); 786 nir_ssa_def *vertex_count = nir_channel(&b, draw_data0, 0); 787 nir_ssa_def *instance_count = nir_channel(&b, draw_data0, 1); 788 nir_ssa_def *vertex_offset = nir_channel(&b, draw_data0, 2); 789 nir_ssa_def *first_instance = nir_channel(&b, draw_data0, 3); 790 791 nir_push_if(&b, nir_iand(&b, nir_ine_imm(&b, vertex_count, 0), nir_ine_imm(&b, instance_count, 0))); 792 { 793 dgc_emit_userdata_vertex(&b, &cmd_buf, vtx_base_sgpr, vertex_offset, first_instance, sequence_id); 794 dgc_emit_instance_count(&b, &cmd_buf, instance_count); 795 dgc_emit_draw(&b, &cmd_buf, vertex_count); 796 } 797 nir_pop_if(&b, 0); 798 } 799 nir_push_else(&b, NULL); 800 { 801 nir_variable *index_size_var = 802 nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "index_size"); 803 nir_store_var(&b, index_size_var, load_param16(&b, base_index_size), 0x1); 804 nir_variable *max_index_count_var = 805 nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "max_index_count"); 806 nir_store_var(&b, max_index_count_var, load_param32(&b, max_index_count), 0x1); 807 808 nir_ssa_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0); 809 nir_push_if(&b, bind_index_buffer); 810 { 811 nir_ssa_def *index_stream_offset = 812 nir_iadd(&b, load_param16(&b, index_buffer_offset), stream_base); 813 nir_ssa_def *data = 814 nir_load_ssbo(&b, 4, 32, stream_buf, index_stream_offset, .align_mul = 4); 815 816 nir_ssa_def *vk_index_type = nir_channel(&b, data, 3); 817 nir_ssa_def *index_type = nir_bcsel( 818 &b, nir_ieq(&b, vk_index_type, load_param32(&b, ibo_type_32)), 819 nir_imm_int(&b, V_028A7C_VGT_INDEX_32), nir_imm_int(&b, V_028A7C_VGT_INDEX_16)); 820 index_type = nir_bcsel(&b, nir_ieq(&b, vk_index_type, load_param32(&b, ibo_type_8)), 821 nir_imm_int(&b, V_028A7C_VGT_INDEX_8), index_type); 822 823 nir_ssa_def *index_size = nir_iand_imm( 824 &b, nir_ushr(&b, nir_imm_int(&b, 0x142), nir_imul_imm(&b, index_type, 4)), 0xf); 825 nir_store_var(&b, index_size_var, index_size, 0x1); 826 827 nir_ssa_def *max_index_count = nir_udiv(&b, nir_channel(&b, data, 2), index_size); 828 nir_store_var(&b, max_index_count_var, max_index_count, 0x1); 829 830 nir_ssa_def *cmd_values[3 + 2 + 3]; 831 832 if (dev->physical_device->rad_info.gfx_level >= GFX9) { 833 unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; 834 if (dev->physical_device->rad_info.gfx_level < GFX9 || 835 (dev->physical_device->rad_info.gfx_level == GFX9 && 836 dev->physical_device->rad_info.me_fw_version < 26)) 837 opcode = PKT3_SET_UCONFIG_REG; 838 cmd_values[0] = nir_imm_int(&b, PKT3(opcode, 1, 0)); 839 cmd_values[1] = nir_imm_int( 840 &b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28)); 841 cmd_values[2] = index_type; 842 } else { 843 cmd_values[0] = nir_imm_int(&b, PKT3(PKT3_INDEX_TYPE, 0, 0)); 844 cmd_values[1] = index_type; 845 cmd_values[2] = nir_imm_int(&b, PKT3_NOP_PAD); 846 } 847 848 nir_ssa_def *addr_upper = nir_channel(&b, data, 1); 849 addr_upper = nir_ishr_imm(&b, nir_ishl_imm(&b, addr_upper, 16), 16); 850 851 cmd_values[3] = nir_imm_int(&b, PKT3(PKT3_INDEX_BASE, 1, 0)); 852 cmd_values[4] = nir_channel(&b, data, 0); 853 cmd_values[5] = addr_upper; 854 cmd_values[6] = nir_imm_int(&b, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); 855 cmd_values[7] = max_index_count; 856 857 dgc_emit(&b, &cmd_buf, nir_vec(&b, cmd_values, 8)); 858 } 859 nir_pop_if(&b, NULL); 860 861 nir_ssa_def *index_size = nir_load_var(&b, index_size_var); 862 nir_ssa_def *max_index_count = nir_load_var(&b, max_index_count_var); 863 nir_ssa_def *vtx_base_sgpr = load_param16(&b, vtx_base_sgpr); 864 nir_ssa_def *stream_offset = 865 nir_iadd(&b, load_param16(&b, draw_params_offset), stream_base); 866 867 index_size = 868 nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size); 869 max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var), 870 max_index_count); 871 nir_ssa_def *draw_data0 = 872 nir_load_ssbo(&b, 4, 32, stream_buf, stream_offset, .align_mul = 4); 873 nir_ssa_def *draw_data1 = nir_load_ssbo( 874 &b, 1, 32, stream_buf, nir_iadd_imm(&b, stream_offset, 16), .align_mul = 4); 875 nir_ssa_def *index_count = nir_channel(&b, draw_data0, 0); 876 nir_ssa_def *instance_count = nir_channel(&b, draw_data0, 1); 877 nir_ssa_def *first_index = nir_channel(&b, draw_data0, 2); 878 nir_ssa_def *vertex_offset = nir_channel(&b, draw_data0, 3); 879 nir_ssa_def *first_instance = nir_channel(&b, draw_data1, 0); 880 881 nir_push_if(&b, nir_iand(&b, nir_ine_imm(&b, index_count, 0), nir_ine_imm(&b, instance_count, 0))); 882 { 883 dgc_emit_userdata_vertex(&b, &cmd_buf, vtx_base_sgpr, vertex_offset, first_instance, sequence_id); 884 dgc_emit_instance_count(&b, &cmd_buf, instance_count); 885 dgc_emit_draw_indexed(&b, &cmd_buf, first_index, index_count, 886 max_index_count); 887 } 888 nir_pop_if(&b, 0); 889 } 890 nir_pop_if(&b, NULL); 891 892 /* Pad the cmdbuffer if we did not use the whole stride */ 893 nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end)); 894 { 895 nir_ssa_def *cnt = nir_isub(&b, cmd_buf_end, nir_load_var(&b, cmd_buf.offset)); 896 cnt = nir_ushr_imm(&b, cnt, 2); 897 cnt = nir_iadd_imm(&b, cnt, -2); 898 nir_ssa_def *pkt = nir_pkt3(&b, PKT3_NOP, cnt); 899 900 dgc_emit(&b, &cmd_buf, pkt); 901 } 902 nir_pop_if(&b, NULL); 903 } 904 nir_pop_if(&b, NULL); 905 906 build_dgc_buffer_tail(&b, sequence_count); 907 return b.shader; 908} 909 910void 911radv_device_finish_dgc_prepare_state(struct radv_device *device) 912{ 913 radv_DestroyPipeline(radv_device_to_handle(device), device->meta_state.dgc_prepare.pipeline, 914 &device->meta_state.alloc); 915 radv_DestroyPipelineLayout(radv_device_to_handle(device), 916 device->meta_state.dgc_prepare.p_layout, &device->meta_state.alloc); 917 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), 918 device->meta_state.dgc_prepare.ds_layout, 919 &device->meta_state.alloc); 920} 921 922VkResult 923radv_device_init_dgc_prepare_state(struct radv_device *device) 924{ 925 VkResult result; 926 nir_shader *cs = build_dgc_prepare_shader(device); 927 928 VkDescriptorSetLayoutCreateInfo ds_create_info = { 929 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 930 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 931 .bindingCount = DGC_NUM_DESCS, 932 .pBindings = (VkDescriptorSetLayoutBinding[]){ 933 {.binding = DGC_DESC_STREAM, 934 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 935 .descriptorCount = 1, 936 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 937 .pImmutableSamplers = NULL}, 938 {.binding = DGC_DESC_PREPARE, 939 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 940 .descriptorCount = 1, 941 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 942 .pImmutableSamplers = NULL}, 943 {.binding = DGC_DESC_PARAMS, 944 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 945 .descriptorCount = 1, 946 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 947 .pImmutableSamplers = NULL}, 948 {.binding = DGC_DESC_COUNT, 949 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 950 .descriptorCount = 1, 951 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 952 .pImmutableSamplers = NULL}, 953 }}; 954 955 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 956 &device->meta_state.alloc, 957 &device->meta_state.dgc_prepare.ds_layout); 958 if (result != VK_SUCCESS) 959 goto cleanup; 960 961 const VkPipelineLayoutCreateInfo leaf_pl_create_info = { 962 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 963 .setLayoutCount = 1, 964 .pSetLayouts = &device->meta_state.dgc_prepare.ds_layout, 965 .pushConstantRangeCount = 1, 966 .pPushConstantRanges = 967 &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct radv_dgc_params)}, 968 }; 969 970 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info, 971 &device->meta_state.alloc, 972 &device->meta_state.dgc_prepare.p_layout); 973 if (result != VK_SUCCESS) 974 goto cleanup; 975 976 VkPipelineShaderStageCreateInfo shader_stage = { 977 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 978 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 979 .module = vk_shader_module_handle_from_nir(cs), 980 .pName = "main", 981 .pSpecializationInfo = NULL, 982 }; 983 984 VkComputePipelineCreateInfo pipeline_info = { 985 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 986 .stage = shader_stage, 987 .flags = 0, 988 .layout = device->meta_state.dgc_prepare.p_layout, 989 }; 990 991 result = radv_CreateComputePipelines( 992 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 993 &pipeline_info, &device->meta_state.alloc, &device->meta_state.dgc_prepare.pipeline); 994 if (result != VK_SUCCESS) 995 goto cleanup; 996 997cleanup: 998 ralloc_free(cs); 999 return result; 1000} 1001 1002VkResult 1003radv_CreateIndirectCommandsLayoutNV(VkDevice _device, 1004 const VkIndirectCommandsLayoutCreateInfoNV *pCreateInfo, 1005 const VkAllocationCallbacks *pAllocator, 1006 VkIndirectCommandsLayoutNV *pIndirectCommandsLayout) 1007{ 1008 RADV_FROM_HANDLE(radv_device, device, _device); 1009 struct radv_indirect_command_layout *layout; 1010 1011 size_t size = 1012 sizeof(*layout) + pCreateInfo->tokenCount * sizeof(VkIndirectCommandsLayoutTokenNV); 1013 1014 layout = 1015 vk_zalloc2(&device->vk.alloc, pAllocator, size, alignof(struct radv_indirect_command_layout), 1016 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1017 if (!layout) 1018 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1019 1020 vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV); 1021 1022 layout->input_stride = pCreateInfo->pStreamStrides[0]; 1023 layout->token_count = pCreateInfo->tokenCount; 1024 typed_memcpy(layout->tokens, pCreateInfo->pTokens, pCreateInfo->tokenCount); 1025 1026 layout->ibo_type_32 = VK_INDEX_TYPE_UINT32; 1027 layout->ibo_type_8 = VK_INDEX_TYPE_UINT8_EXT; 1028 1029 for (unsigned i = 0; i < pCreateInfo->tokenCount; ++i) { 1030 switch (pCreateInfo->pTokens[i].tokenType) { 1031 case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: 1032 layout->draw_params_offset = pCreateInfo->pTokens[i].offset; 1033 break; 1034 case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: 1035 layout->indexed = true; 1036 layout->draw_params_offset = pCreateInfo->pTokens[i].offset; 1037 break; 1038 case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: 1039 layout->binds_index_buffer = true; 1040 layout->index_buffer_offset = pCreateInfo->pTokens[i].offset; 1041 /* 16-bit is implied if we find no match. */ 1042 for (unsigned j = 0; j < pCreateInfo->pTokens[i].indexTypeCount; j++) { 1043 if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT32) 1044 layout->ibo_type_32 = pCreateInfo->pTokens[i].pIndexTypeValues[j]; 1045 else if (pCreateInfo->pTokens[i].pIndexTypes[j] == VK_INDEX_TYPE_UINT8_EXT) 1046 layout->ibo_type_8 = pCreateInfo->pTokens[i].pIndexTypeValues[j]; 1047 } 1048 break; 1049 case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: 1050 layout->bind_vbo_mask |= 1u << pCreateInfo->pTokens[i].vertexBindingUnit; 1051 layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] = 1052 pCreateInfo->pTokens[i].offset; 1053 if (pCreateInfo->pTokens[i].vertexDynamicStride) 1054 layout->vbo_offsets[pCreateInfo->pTokens[i].vertexBindingUnit] |= DGC_DYNAMIC_STRIDE; 1055 break; 1056 case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: 1057 for (unsigned j = pCreateInfo->pTokens[i].pushconstantOffset / 4, k = 0; 1058 k < pCreateInfo->pTokens[i].pushconstantSize / 4; ++j, ++k) { 1059 layout->push_constant_mask |= 1ull << j; 1060 layout->push_constant_offsets[j] = pCreateInfo->pTokens[i].offset + k * 4; 1061 } 1062 break; 1063 case VK_INDIRECT_COMMANDS_TOKEN_TYPE_STATE_FLAGS_NV: 1064 layout->binds_state = true; 1065 layout->state_offset = pCreateInfo->pTokens[i].offset; 1066 break; 1067 default: 1068 unreachable("Unhandled token type"); 1069 } 1070 } 1071 if (!layout->indexed) 1072 layout->binds_index_buffer = false; 1073 1074 *pIndirectCommandsLayout = radv_indirect_command_layout_to_handle(layout); 1075 return VK_SUCCESS; 1076} 1077 1078void 1079radv_DestroyIndirectCommandsLayoutNV(VkDevice _device, 1080 VkIndirectCommandsLayoutNV indirectCommandsLayout, 1081 const VkAllocationCallbacks *pAllocator) 1082{ 1083 RADV_FROM_HANDLE(radv_device, device, _device); 1084 VK_FROM_HANDLE(radv_indirect_command_layout, layout, indirectCommandsLayout); 1085 1086 if (!layout) 1087 return; 1088 1089 vk_object_base_finish(&layout->base); 1090 vk_free2(&device->vk.alloc, pAllocator, layout); 1091} 1092 1093void 1094radv_GetGeneratedCommandsMemoryRequirementsNV( 1095 VkDevice _device, const VkGeneratedCommandsMemoryRequirementsInfoNV *pInfo, 1096 VkMemoryRequirements2 *pMemoryRequirements) 1097{ 1098 RADV_FROM_HANDLE(radv_device, device, _device); 1099 VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout); 1100 VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline); 1101 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); 1102 1103 uint32_t cmd_stride, upload_stride; 1104 radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride); 1105 1106 VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(cmd_stride * pInfo->maxSequencesCount); 1107 VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount; 1108 1109 pMemoryRequirements->memoryRequirements.memoryTypeBits = 1110 device->physical_device->memory_types_32bit; 1111 pMemoryRequirements->memoryRequirements.alignment = 256; 1112 pMemoryRequirements->memoryRequirements.size = 1113 align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment); 1114} 1115 1116void 1117radv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer, 1118 const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) 1119{ 1120 /* Can't do anything here as we depend on some dynamic state in some cases that we only know 1121 * at draw time. */ 1122} 1123 1124/* Always need to call this directly before draw due to dependence on bound state. */ 1125void 1126radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, 1127 const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) 1128{ 1129 VK_FROM_HANDLE(radv_indirect_command_layout, layout, 1130 pGeneratedCommandsInfo->indirectCommandsLayout); 1131 VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline); 1132 VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer); 1133 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); 1134 struct radv_meta_saved_state saved_state; 1135 struct radv_buffer token_buffer; 1136 1137 uint32_t cmd_stride, upload_stride; 1138 radv_get_sequence_size(layout, graphics_pipeline, &cmd_stride, &upload_stride); 1139 1140 unsigned cmd_buf_size = 1141 radv_align_cmdbuf_size(cmd_stride * pGeneratedCommandsInfo->sequencesCount); 1142 1143 unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(graphics_pipeline->vb_desc_usage_mask) * 24 : 0; 1144 unsigned const_size = graphics_pipeline->base.push_constant_size + 1145 16 * graphics_pipeline->base.dynamic_offset_count + 1146 sizeof(layout->push_constant_offsets) + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12; 1147 if (!layout->push_constant_mask) 1148 const_size = 0; 1149 1150 unsigned scissor_size = (8 + 2 * cmd_buffer->state.dynamic.scissor.count) * 4; 1151 if (!layout->binds_state || !cmd_buffer->state.dynamic.scissor.count || 1152 !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug) 1153 scissor_size = 0; 1154 1155 unsigned upload_size = MAX2(vb_size + const_size + scissor_size, 16); 1156 1157 void *upload_data; 1158 unsigned upload_offset; 1159 if (!radv_cmd_buffer_upload_alloc(cmd_buffer, upload_size, &upload_offset, &upload_data)) { 1160 cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 1161 return; 1162 } 1163 1164 void *upload_data_base = upload_data; 1165 1166 radv_buffer_init(&token_buffer, cmd_buffer->device, cmd_buffer->upload.upload_bo, upload_size, 1167 upload_offset); 1168 1169 uint64_t upload_addr = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + 1170 pGeneratedCommandsInfo->preprocessOffset; 1171 1172 uint16_t vtx_base_sgpr = 1173 (cmd_buffer->state.graphics_pipeline->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2; 1174 if (cmd_buffer->state.graphics_pipeline->uses_drawid) 1175 vtx_base_sgpr |= DGC_USES_DRAWID; 1176 if (cmd_buffer->state.graphics_pipeline->uses_baseinstance) 1177 vtx_base_sgpr |= DGC_USES_BASEINSTANCE; 1178 1179 uint16_t vbo_sgpr = 1180 ((radv_lookup_user_sgpr(&graphics_pipeline->base, MESA_SHADER_VERTEX, AC_UD_VS_VERTEX_BUFFERS)->sgpr_idx * 4 + 1181 graphics_pipeline->base.user_data_0[MESA_SHADER_VERTEX]) - 1182 SI_SH_REG_OFFSET) >> 1183 2; 1184 struct radv_dgc_params params = { 1185 .cmd_buf_stride = cmd_stride, 1186 .cmd_buf_size = cmd_buf_size, 1187 .upload_addr = (uint32_t)upload_addr, 1188 .upload_stride = upload_stride, 1189 .sequence_count = pGeneratedCommandsInfo->sequencesCount, 1190 .stream_stride = layout->input_stride, 1191 .draw_indexed = layout->indexed, 1192 .draw_params_offset = layout->draw_params_offset, 1193 .base_index_size = 1194 layout->binds_index_buffer ? 0 : radv_get_vgt_index_size(cmd_buffer->state.index_type), 1195 .vtx_base_sgpr = vtx_base_sgpr, 1196 .max_index_count = cmd_buffer->state.max_index_count, 1197 .index_buffer_offset = layout->index_buffer_offset, 1198 .vbo_reg = vbo_sgpr, 1199 .ibo_type_32 = layout->ibo_type_32, 1200 .ibo_type_8 = layout->ibo_type_8, 1201 .emit_state = layout->binds_state, 1202 .pa_su_sc_mode_cntl_base = radv_get_pa_su_sc_mode_cntl(cmd_buffer) & C_028814_FACE, 1203 .state_offset = layout->state_offset, 1204 }; 1205 1206 if (layout->bind_vbo_mask) { 1207 radv_write_vertex_descriptors(cmd_buffer, graphics_pipeline, true, upload_data); 1208 1209 uint32_t *vbo_info = (uint32_t *)((char *)upload_data + graphics_pipeline->vb_desc_alloc_size); 1210 1211 struct radv_shader *vs_shader = radv_get_shader(&graphics_pipeline->base, MESA_SHADER_VERTEX); 1212 const struct radv_vs_input_state *vs_state = 1213 vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL; 1214 uint32_t mask = graphics_pipeline->vb_desc_usage_mask; 1215 unsigned idx = 0; 1216 while (mask) { 1217 unsigned i = u_bit_scan(&mask); 1218 unsigned binding = 1219 vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i] 1220 : (graphics_pipeline->use_per_attribute_vb_descs ? graphics_pipeline->attrib_bindings[i] : i); 1221 uint32_t attrib_end = 1222 vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : graphics_pipeline->attrib_ends[i]; 1223 1224 params.vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx; 1225 vbo_info[2 * idx] = ((graphics_pipeline->use_per_attribute_vb_descs ? 1u : 0u) << 31) | 1226 (vs_state ? vs_state->offsets[i] << 16 : 0) | 1227 layout->vbo_offsets[binding]; 1228 vbo_info[2 * idx + 1] = graphics_pipeline->attrib_index_offset[i] | (attrib_end << 16); 1229 ++idx; 1230 } 1231 params.vbo_cnt = idx | (vs_state ? DGC_DYNAMIC_VERTEX_INPUT : 0); 1232 upload_data = (char *)upload_data + vb_size; 1233 } 1234 1235 if (layout->push_constant_mask) { 1236 uint32_t *desc = upload_data; 1237 upload_data = (char *)upload_data + ARRAY_SIZE(graphics_pipeline->base.shaders) * 12; 1238 1239 unsigned idx = 0; 1240 for (unsigned i = 0; i < ARRAY_SIZE(graphics_pipeline->base.shaders); ++i) { 1241 if (!graphics_pipeline->base.shaders[i]) 1242 continue; 1243 1244 struct radv_userdata_locations *locs = &graphics_pipeline->base.shaders[i]->info.user_sgprs_locs; 1245 if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) 1246 params.const_copy = 1; 1247 1248 if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0 || 1249 locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) { 1250 unsigned upload_sgpr = 0; 1251 unsigned inline_sgpr = 0; 1252 1253 if (locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx >= 0) { 1254 upload_sgpr = 1255 (graphics_pipeline->base.user_data_0[i] + 4 * locs->shader_data[AC_UD_PUSH_CONSTANTS].sgpr_idx - 1256 SI_SH_REG_OFFSET) >> 1257 2; 1258 } 1259 1260 if (locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx >= 0) { 1261 inline_sgpr = (graphics_pipeline->base.user_data_0[i] + 1262 4 * locs->shader_data[AC_UD_INLINE_PUSH_CONSTANTS].sgpr_idx - 1263 SI_SH_REG_OFFSET) >> 1264 2; 1265 desc[idx * 3 + 1] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask; 1266 desc[idx * 3 + 2] = graphics_pipeline->base.shaders[i]->info.inline_push_constant_mask >> 32; 1267 } 1268 desc[idx * 3] = upload_sgpr | (inline_sgpr << 16); 1269 ++idx; 1270 } 1271 } 1272 1273 params.push_constant_shader_cnt = idx; 1274 1275 params.const_copy_size = graphics_pipeline->base.push_constant_size + 1276 16 * graphics_pipeline->base.dynamic_offset_count; 1277 params.push_constant_mask = layout->push_constant_mask; 1278 1279 memcpy(upload_data, layout->push_constant_offsets, sizeof(layout->push_constant_offsets)); 1280 upload_data = (char *)upload_data + sizeof(layout->push_constant_offsets); 1281 1282 memcpy(upload_data, cmd_buffer->push_constants, graphics_pipeline->base.push_constant_size); 1283 upload_data = (char *)upload_data + graphics_pipeline->base.push_constant_size; 1284 1285 struct radv_descriptor_state *descriptors_state = 1286 radv_get_descriptors_state(cmd_buffer, pGeneratedCommandsInfo->pipelineBindPoint); 1287 memcpy(upload_data, descriptors_state->dynamic_buffers, 16 * graphics_pipeline->base.dynamic_offset_count); 1288 upload_data = (char *)upload_data + 16 * graphics_pipeline->base.dynamic_offset_count; 1289 } 1290 1291 if (scissor_size) { 1292 params.scissor_offset = (char*)upload_data - (char*)upload_data_base; 1293 params.scissor_count = scissor_size / 4; 1294 1295 struct radeon_cmdbuf scissor_cs = { 1296 .buf = upload_data, 1297 .cdw = 0, 1298 .max_dw = scissor_size / 4 1299 }; 1300 1301 radv_write_scissors(cmd_buffer, &scissor_cs); 1302 assert(scissor_cs.cdw * 4 == scissor_size); 1303 upload_data = (char *)upload_data + scissor_size; 1304 } 1305 1306 VkWriteDescriptorSet ds_writes[5]; 1307 VkDescriptorBufferInfo buf_info[ARRAY_SIZE(ds_writes)]; 1308 int ds_cnt = 0; 1309 buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&token_buffer), 1310 .offset = 0, 1311 .range = upload_size}; 1312 ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1313 .dstBinding = DGC_DESC_PARAMS, 1314 .dstArrayElement = 0, 1315 .descriptorCount = 1, 1316 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1317 .pBufferInfo = &buf_info[ds_cnt]}; 1318 ++ds_cnt; 1319 1320 buf_info[ds_cnt] = (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->preprocessBuffer, 1321 .offset = pGeneratedCommandsInfo->preprocessOffset, 1322 .range = pGeneratedCommandsInfo->preprocessSize}; 1323 ds_writes[ds_cnt] = (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1324 .dstBinding = DGC_DESC_PREPARE, 1325 .dstArrayElement = 0, 1326 .descriptorCount = 1, 1327 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1328 .pBufferInfo = &buf_info[ds_cnt]}; 1329 ++ds_cnt; 1330 1331 if (pGeneratedCommandsInfo->streamCount > 0) { 1332 buf_info[ds_cnt] = 1333 (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->pStreams[0].buffer, 1334 .offset = pGeneratedCommandsInfo->pStreams[0].offset, 1335 .range = VK_WHOLE_SIZE}; 1336 ds_writes[ds_cnt] = 1337 (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1338 .dstBinding = DGC_DESC_STREAM, 1339 .dstArrayElement = 0, 1340 .descriptorCount = 1, 1341 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1342 .pBufferInfo = &buf_info[ds_cnt]}; 1343 ++ds_cnt; 1344 } 1345 1346 if (pGeneratedCommandsInfo->sequencesCountBuffer != VK_NULL_HANDLE) { 1347 buf_info[ds_cnt] = 1348 (VkDescriptorBufferInfo){.buffer = pGeneratedCommandsInfo->sequencesCountBuffer, 1349 .offset = pGeneratedCommandsInfo->sequencesCountOffset, 1350 .range = VK_WHOLE_SIZE}; 1351 ds_writes[ds_cnt] = 1352 (VkWriteDescriptorSet){.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 1353 .dstBinding = DGC_DESC_COUNT, 1354 .dstArrayElement = 0, 1355 .descriptorCount = 1, 1356 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1357 .pBufferInfo = &buf_info[ds_cnt]}; 1358 ++ds_cnt; 1359 params.sequence_count = UINT32_MAX; 1360 } 1361 1362 radv_meta_save( 1363 &saved_state, cmd_buffer, 1364 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS); 1365 1366 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 1367 cmd_buffer->device->meta_state.dgc_prepare.pipeline); 1368 1369 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 1370 cmd_buffer->device->meta_state.dgc_prepare.p_layout, 1371 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(params), ¶ms); 1372 1373 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 1374 cmd_buffer->device->meta_state.dgc_prepare.p_layout, 0, ds_cnt, 1375 ds_writes); 1376 1377 unsigned block_count = MAX2(1, round_up_u32(pGeneratedCommandsInfo->sequencesCount, 64)); 1378 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); 1379 1380 radv_buffer_finish(&token_buffer); 1381 radv_meta_restore(&saved_state, cmd_buffer); 1382 1383 cmd_buffer->state.flush_bits |= 1384 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2; 1385}