Lines Matching refs:cmd
21 tu6_emit_event_write(struct tu_cmd_buffer *cmd,
42 tu_cs_emit_qw(cs, global_iova(cmd, seqno_dummy));
53 tu6_lazy_emit_tessfactor_addr(struct tu_cmd_buffer *cmd)
55 if (cmd->state.tessfactor_addr_set)
58 tu_cs_emit_regs(&cmd->cs, A6XX_PC_TESSFACTOR_ADDR(.qword = cmd->device->tess_bo->iova));
60 cmd->state.cache.flush_bits |= TU_CMD_FLAG_WAIT_FOR_IDLE;
61 cmd->state.tessfactor_addr_set = true;
185 tu6_emit_zs(struct tu_cmd_buffer *cmd,
206 const struct tu_image_view *iview = cmd->state.attachments[a];
208 &cmd->state.pass->attachments[a];
217 tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment));
232 tu_cs_emit(cs, tu_attachment_gmem_offset_stencil(cmd, attachment));
235 tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment));
244 tu6_emit_mrt(struct tu_cmd_buffer *cmd,
248 const struct tu_framebuffer *fb = cmd->state.framebuffer;
271 const struct tu_image_view *iview = cmd->state.attachments[a];
276 tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a]));
346 tu6_emit_render_cntl(struct tu_cmd_buffer *cmd,
352 bool no_track = !cmd->device->physical_device->info->a6xx.has_cp_reg_write;
366 const struct tu_image_view *iview = cmd->state.attachments[a];
375 const struct tu_image_view *iview = cmd->state.attachments[a];
405 tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
407 struct tu_physical_device *phys_dev = cmd->device->physical_device;
408 const VkRect2D *render_area = &cmd->state.render_area;
544 use_hw_binning(struct tu_cmd_buffer *cmd)
546 const struct tu_framebuffer *fb = cmd->state.framebuffer;
547 const struct tu_tiling_config *tiling = &fb->tiling[cmd->state.gmem_layout];
555 if (cmd->state.rp.xfb_used) {
565 if (cmd->state.rp.has_prim_generated_query_in_rp ||
566 cmd->state.prim_generated_query_running_before_rp) {
575 use_sysmem_rendering(struct tu_cmd_buffer *cmd,
578 if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_SYSMEM))
582 if (!cmd->state.pass->gmem_pixels[cmd->state.gmem_layout])
585 if (cmd->state.framebuffer->layers > 1)
589 if (cmd->state.render_area.extent.width == 0 ||
590 cmd->state.render_area.extent.height == 0)
593 if (cmd->state.rp.has_tess)
596 if (cmd->state.rp.disable_gmem)
600 if (cmd->state.rp.xfb_used && !cmd->state.tiling->binning_possible)
606 if ((cmd->state.rp.has_prim_generated_query_in_rp ||
607 cmd->state.prim_generated_query_running_before_rp) &&
608 !cmd->state.tiling->binning_possible)
611 if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_GMEM))
614 bool use_sysmem = tu_autotune_use_bypass(&cmd->device->autotune,
615 cmd, autotune_result);
617 list_addtail(&(*autotune_result)->node, &cmd->renderpass_autotune_results);
629 tu6_emit_cond_for_load_stores(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
632 if (cmd->state.tiling->binning_possible) {
643 tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
647 const struct tu_tiling_config *tiling = cmd->state.tiling;
659 bool hw_binning = use_hw_binning(cmd);
670 tu_cs_emit(cs, pipe * cmd->vsc_draw_strm_pitch);
672 tu_cs_emit(cs, pipe * cmd->vsc_prim_strm_pitch);
675 tu6_emit_cond_for_load_stores(cmd, cs, pipe, slot, hw_binning);
685 tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
691 const struct tu_framebuffer *fb = cmd->state.framebuffer;
692 const struct tu_image_view *dst = cmd->state.attachments[a];
693 const struct tu_image_view *src = cmd->state.attachments[gmem_a];
695 tu_resolve_sysmem(cmd, cs, src, dst, layer_mask, fb->layers, &cmd->state.render_area);
699 tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
725 tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
727 tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS);
729 tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
741 tu6_emit_sysmem_resolve(cmd, cs, subpass->multiview_mask, a, gmem_a);
747 tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
749 tu6_emit_blit_scissor(cmd, cs, true);
751 for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
752 tu_load_gmem_attachment(cmd, cs, i, cmd->state.tiling->binning, false);
756 tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
758 const struct tu_render_pass *pass = cmd->state.pass;
767 tu6_emit_blit_scissor(cmd, cs, true);
771 tu_store_gmem_attachment(cmd, cs, a, a, cmd->state.tiling->binning_possible);
779 tu_store_gmem_attachment(cmd, cs, a, gmem_a, false);
786 tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
795 cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE;
799 tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
801 struct tu_device *dev = cmd->device;
804 tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
822 cmd->state.cache.pending_flush_bits &=
827 cmd->state.ccu_state = TU_CMD_CCU_SYSMEM;
894 tu_disable_draw_states(cmd, cs);
923 cmd->vsc_prim_strm_pitch = dev->vsc_prim_strm_pitch;
924 cmd->vsc_draw_strm_pitch = dev->vsc_draw_strm_pitch;
929 uint32_t size0 = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES +
930 cmd->vsc_draw_strm_pitch * MAX_VSC_PIPES;
940 .bo_offset = cmd->vsc_prim_strm_pitch * MAX_VSC_PIPES));
946 update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
948 const struct tu_tiling_config *tiling = cmd->state.tiling;
962 A6XX_VSC_PRIM_STRM_PITCH(cmd->vsc_prim_strm_pitch),
963 A6XX_VSC_PRIM_STRM_LIMIT(cmd->vsc_prim_strm_pitch - VSC_PAD));
966 A6XX_VSC_DRAW_STRM_PITCH(cmd->vsc_draw_strm_pitch),
967 A6XX_VSC_DRAW_STRM_LIMIT(cmd->vsc_draw_strm_pitch - VSC_PAD));
971 emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
973 const struct tu_tiling_config *tiling = cmd->state.tiling;
983 tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_draw_strm_pitch - VSC_PAD));
985 tu_cs_emit_qw(cs, global_iova(cmd, vsc_draw_overflow));
986 tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(cmd->vsc_draw_strm_pitch));
993 tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_prim_strm_pitch - VSC_PAD));
995 tu_cs_emit_qw(cs, global_iova(cmd, vsc_prim_overflow));
996 tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(cmd->vsc_prim_strm_pitch));
1003 tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1005 struct tu_physical_device *phys_dev = cmd->device->physical_device;
1006 const struct tu_framebuffer *fb = cmd->state.framebuffer;
1024 update_vsc_pipe(cmd, cs);
1041 trace_start_binning_ib(&cmd->trace, cs);
1044 tu_cs_emit_call(cs, &cmd->draw_cs);
1046 trace_end_binning_ib(&cmd->trace, cs);
1073 tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS);
1079 emit_vsc_overflow_test(cmd, cs);
1089 tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
1093 const struct tu_tiling_config *tiling = cmd->state.tiling;
1109 VkResult result = tu_cs_alloc(&cmd->sub_cs, subpass->input_count * 2,
1112 cmd->record_result = result;
1121 const struct tu_image_view *iview = cmd->state.attachments[a];
1123 &cmd->state.pass->attachments[a];
1125 uint32_t gmem_offset = tu_attachment_gmem_offset(cmd, att);
1158 if (!cmd->device->physical_device->info->a6xx.has_z24uint_s8uint) {
1183 gmem_offset = att->gmem_offset_stencil[cmd->state.gmem_layout];
1196 dst[4] = cmd->device->physical_device->gmem_base + gmem_offset;
1203 struct tu_draw_state ds = tu_cs_draw_state(&cmd->sub_cs, &cs, 9);
1223 tu_set_input_attachments(struct tu_cmd_buffer *cmd, const struct tu_subpass *subpass)
1225 struct tu_cs *cs = &cmd->draw_cs;
1229 tu_emit_input_attachments(cmd, subpass, true));
1231 tu_emit_input_attachments(cmd, subpass, false));
1236 tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
1239 struct tu_cs *cs = &cmd->draw_cs;
1243 tu6_emit_tile_load(cmd, cs);
1245 tu6_emit_blit_scissor(cmd, cs, false);
1247 for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
1248 tu_clear_gmem_attachment(cmd, cs, i, &clear_values[i]);
1254 for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
1255 tu_clear_sysmem_attachment(cmd, cs, i, &clear_values[i]);
1261 tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
1264 const struct tu_framebuffer *fb = cmd->state.framebuffer;
1266 tu_lrz_sysmem_begin(cmd, cs);
1282 tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
1290 tu_autotune_begin_renderpass(cmd, cs, autotune_result);
1296 tu6_sysmem_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
1299 tu_autotune_end_renderpass(cmd, cs, autotune_result);
1304 tu6_emit_sysmem_resolves(cmd, cs, cmd->state.subpass);
1306 tu_cs_emit_call(cs, &cmd->draw_epilogue_cs);
1311 tu_lrz_sysmem_end(cmd, cs);
1317 tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
1320 struct tu_physical_device *phys_dev = cmd->device->physical_device;
1321 const struct tu_tiling_config *tiling = cmd->state.tiling;
1322 tu_lrz_tiling_begin(cmd, cs);
1327 tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_GMEM);
1329 if (use_hw_binning(cmd)) {
1334 tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, true);
1336 tu6_emit_binning_pass(cmd, cs);
1368 tu_autotune_begin_renderpass(cmd, cs, autotune_result);
1374 tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
1377 tu6_emit_tile_select(cmd, &cmd->cs, tx, ty, pipe, slot);
1379 trace_start_draw_ib_gmem(&cmd->trace, &cmd->cs);
1384 if (cmd->state.prim_generated_query_running_before_rp)
1385 tu6_emit_event_write(cmd, cs, STOP_PRIMITIVE_CTRS);
1387 tu_cs_emit_call(cs, &cmd->draw_cs);
1389 if (cmd->state.prim_generated_query_running_before_rp)
1390 tu6_emit_event_write(cmd, cs, START_PRIMITIVE_CTRS);
1392 if (use_hw_binning(cmd)) {
1398 if (cmd->state.rp.draw_cs_writes_to_cond_pred)
1399 tu6_emit_cond_for_load_stores(cmd, cs, pipe, slot, false);
1401 tu_cs_emit_call(cs, &cmd->tile_store_cs);
1403 if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end)) {
1405 tu_cs_emit_pkt7(&cmd->cs, CP_WAIT_FOR_ME, 0);
1406 u_trace_clone_append(cmd->trace_renderpass_start,
1407 cmd->trace_renderpass_end,
1408 &cmd->trace,
1414 trace_end_draw_ib_gmem(&cmd->trace, &cmd->cs);
1418 tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
1421 tu_autotune_end_renderpass(cmd, cs, autotune_result);
1423 tu_cs_emit_call(cs, &cmd->draw_epilogue_cs);
1425 tu_lrz_tiling_end(cmd, cs);
1427 tu6_emit_event_write(cmd, cs, PC_CCU_RESOLVE_TS);
1433 tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
1436 const struct tu_framebuffer *fb = cmd->state.framebuffer;
1437 const struct tu_tiling_config *tiling = cmd->state.tiling;
1444 tu_cs_begin(&cmd->tile_store_cs);
1445 tu6_emit_tile_store(cmd, &cmd->tile_store_cs);
1446 tu_cs_end(&cmd->tile_store_cs);
1448 tu6_tile_render_begin(cmd, &cmd->cs, autotune_result);
1476 tu6_render_tile(cmd, &cmd->cs, tx1 + tx, ty, pipe, slot);
1483 tu6_tile_render_end(cmd, &cmd->cs, autotune_result);
1485 trace_end_render_pass(&cmd->trace, &cmd->cs, fb, tiling);
1487 if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end))
1488 u_trace_disable_event_range(cmd->trace_renderpass_start,
1489 cmd->trace_renderpass_end);
1494 tu_cs_discard_entries(&cmd->tile_store_cs);
1498 tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd,
1501 tu6_sysmem_render_begin(cmd, &cmd->cs, autotune_result);
1503 trace_start_draw_ib_sysmem(&cmd->trace, &cmd->cs);
1505 tu_cs_emit_call(&cmd->cs, &cmd->draw_cs);
1507 trace_end_draw_ib_sysmem(&cmd->trace, &cmd->cs);
1509 tu6_sysmem_render_end(cmd, &cmd->cs, autotune_result);
1511 trace_end_render_pass(&cmd->trace, &cmd->cs, cmd->state.framebuffer, cmd->state.tiling);
1881 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1884 cmd->state.vertex_buffers.iova = tu_cs_draw_state(&cmd->sub_cs, &cs, 4 * MAX_VBS).iova;
1888 cmd->state.vb[firstBinding + i].base = 0;
1889 cmd->state.vb[firstBinding + i].size = 0;
1892 cmd->state.vb[firstBinding + i].base = buf->iova + pOffsets[i];
1893 cmd->state.vb[firstBinding + i].size = pSizes ? pSizes[i] : (buf->size - pOffsets[i]);
1897 cmd->state.vb[firstBinding + i].stride = pStrides[i];
1902 A6XX_VFD_FETCH_BASE(i, .qword = cmd->state.vb[i].base),
1903 A6XX_VFD_FETCH_SIZE(i, cmd->state.vb[i].size));
1906 cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
1909 cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].iova =
1910 tu_cs_draw_state(&cmd->sub_cs, &cs, 2 * MAX_VBS).iova;
1913 tu_cs_emit_regs(&cs, A6XX_VFD_FETCH_STRIDE(i, cmd->state.vb[i].stride));
1915 cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE;
1925 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1953 if (cmd->state.index_size != index_size)
1954 tu_cs_emit_regs(&cmd->draw_cs, A6XX_PC_RESTART_INDEX(restart_index));
1958 cmd->state.index_va = buf->iova + offset;
1959 cmd->state.max_index_count = (buf->size - offset) >> index_shift;
1960 cmd->state.index_size = index_size;
1973 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1978 tu_get_descriptors_state(cmd, pipelineBindPoint);
2044 VkResult result = tu_cs_alloc(&cmd->sub_cs,
2048 cmd->record_result = result;
2062 cmd->state.desc_sets = tu_cs_draw_state(&cmd->sub_cs, &state_cs, 24);
2063 cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD;
2072 cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
2073 cs = &cmd->cs;
2087 if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) {
2088 tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3);
2089 tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
2102 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2106 &tu_get_descriptors_state(cmd, pipelineBindPoint)->push_set;
2109 VkResult result = tu_cs_alloc(&cmd->sub_cs,
2113 cmd->record_result = result;
2123 tu_descriptor_set_layout_unref(cmd->device, set->layout);
2131 tu_update_descriptor_sets(cmd->device, tu_descriptor_set_to_handle(set),
2146 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2151 &tu_get_descriptors_state(cmd, templ->bind_point)->push_set;
2154 VkResult result = tu_cs_alloc(&cmd->sub_cs,
2158 cmd->record_result = result;
2168 tu_descriptor_set_layout_unref(cmd->device, set->layout);
2176 tu_update_descriptor_set_with_template(cmd->device, set, descriptorUpdateTemplate, pData);
2191 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2192 struct tu_cs *cs = &cmd->draw_cs;
2219 cmd->state.streamout_offset[idx] = offset;
2232 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2233 struct tu_cs *cs = &cmd->draw_cs;
2243 tu_cs_emit_regs(cs, A6XX_VPC_SO_BUFFER_OFFSET(i, cmd->state.streamout_offset[i]));
2247 uint32_t offset = cmd->state.streamout_offset[idx];
2280 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2281 struct tu_cs *cs = &cmd->draw_cs;
2293 tu_cs_emit_qw(cs, global_iova(cmd, flush_base[i]));
2294 tu6_emit_event_write(cmd, cs, FLUSH_SO_0 + i);
2299 uint32_t offset = cmd->state.streamout_offset[idx];
2314 tu_cs_emit_qw(cs, global_iova(cmd, flush_base[idx]));
2332 cmd->state.rp.xfb_used = true;
2343 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2344 memcpy((void*) cmd->push_constants + offset, pValues, size);
2345 cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
2396 tu_cmd_dynamic_state(struct tu_cmd_buffer *cmd, uint32_t id, uint32_t size)
2400 assert(id < ARRAY_SIZE(cmd->state.dynamic_state));
2401 cmd->state.dynamic_state[id] = tu_cs_draw_state(&cmd->sub_cs, &cs, size);
2406 if (cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)
2409 tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3);
2410 tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DYNAMIC + id, cmd->state.dynamic_state[id]);
2420 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2424 cmd->state.compute_pipeline = pipeline;
2425 tu_cs_emit_state_ib(&cmd->cs, pipeline->program.state);
2431 cmd->state.pipeline = pipeline;
2432 cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD | TU_CMD_DIRTY_SHADER_CONSTS |
2435 struct tu_cs *cs = &cmd->draw_cs;
2440 if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) {
2458 cmd->state.rp.has_tess = true;
2471 if (cmd->state.line_mode != pipeline->line_mode) {
2472 cmd->state.line_mode = pipeline->line_mode;
2481 if (cmd->state.subpass && cmd->state.subpass->samples) {
2482 tu6_emit_msaa(cs, cmd->state.subpass->samples, cmd->state.line_mode);
2487 (pipeline->z_negative_one_to_one != cmd->state.z_negative_one_to_one)) {
2488 cmd->state.z_negative_one_to_one = pipeline->z_negative_one_to_one;
2489 cmd->state.dirty |= TU_CMD_DIRTY_VIEWPORTS;
2498 if (cmd->state.vertex_buffers.size != pipeline->num_vbs * 4) {
2499 cmd->state.vertex_buffers.size = pipeline->num_vbs * 4;
2500 cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS;
2504 cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size != pipeline->num_vbs * 2) {
2505 cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE].size = pipeline->num_vbs * 2;
2506 cmd->state.dirty |= TU_CMD_DIRTY_VB_STRIDE;
2512 if ((cmd->state.X & pipeline->X##_mask) != pipeline_bits) { \
2513 cmd->state.X &= ~pipeline->X##_mask; \
2514 cmd->state.X |= pipeline_bits; \
2515 cmd->state.dirty |= TU_CMD_DIRTY_##Y; \
2518 cmd->state.dirty &= ~TU_CMD_DIRTY_##Y; \
2536 if ((cmd->state.rb_mrt_control[i] & pipeline->rb_mrt_control_mask) !=
2538 cmd->state.rb_mrt_control[i] &= ~pipeline->rb_mrt_control_mask;
2539 cmd->state.rb_mrt_control[i] |= pipeline->rb_mrt_control[i];
2540 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
2543 if (cmd->state.rb_mrt_blend_control[i] != pipeline->rb_mrt_blend_control[i]) {
2544 cmd->state.rb_mrt_blend_control[i] = pipeline->rb_mrt_blend_control[i];
2545 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
2550 if (cmd->state.pipeline_color_write_enable != pipeline->color_write_enable) {
2551 cmd->state.pipeline_color_write_enable = pipeline->color_write_enable;
2552 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
2554 if (cmd->state.pipeline_blend_enable != pipeline->blend_enable) {
2555 cmd->state.pipeline_blend_enable = pipeline->blend_enable;
2556 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
2558 if (cmd->state.logic_op_enabled != pipeline->logic_op_enabled) {
2559 cmd->state.logic_op_enabled = pipeline->logic_op_enabled;
2560 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
2563 cmd->state.rop_reads_dst != pipeline->rop_reads_dst) {
2564 cmd->state.rop_reads_dst = pipeline->rop_reads_dst;
2565 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
2567 if (cmd->state.dynamic_state[TU_DYNAMIC_STATE_BLEND].size != pipeline->num_rts * 3 + 4) {
2568 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
2571 cmd->state.dirty &= ~TU_CMD_DIRTY_BLEND;
2575 cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
2584 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2586 memcpy(&cmd->state.viewport[firstViewport], pViewports, viewportCount * sizeof(*pViewports));
2587 cmd->state.max_viewport = MAX2(cmd->state.max_viewport, firstViewport + viewportCount);
2593 cmd->state.dirty |= TU_CMD_DIRTY_VIEWPORTS;
2602 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2605 memcpy(&cmd->state.scissor[firstScissor], pScissors, scissorCount * sizeof(*pScissors));
2606 cmd->state.max_scissor = MAX2(cmd->state.max_scissor, firstScissor + scissorCount);
2608 cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * cmd->state.max_scissor);
2609 tu6_emit_scissor(&cs, cmd->state.scissor, cmd->state.max_scissor);
2615 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2617 cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
2618 cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(lineWidth / 2.0f);
2620 cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
2629 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2630 struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_DEPTH_BIAS, 4);
2639 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2640 struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5);
2651 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2652 struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3);
2673 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2674 struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2);
2676 update_stencil_mask(&cmd->state.dynamic_stencil_mask, faceMask, compareMask);
2678 tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.dword = cmd->state.dynamic_stencil_mask));
2686 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2687 struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2);
2689 update_stencil_mask(&cmd->state.dynamic_stencil_wrmask, faceMask, writeMask);
2691 tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = cmd->state.dynamic_stencil_wrmask));
2693 cmd->state.dirty |= TU_CMD_DIRTY_LRZ;
2701 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2702 struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2);
2704 update_stencil_mask(&cmd->state.dynamic_stencil_ref, faceMask, reference);
2706 tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.dword = cmd->state.dynamic_stencil_ref));
2713 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2714 struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, 9);
2724 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2726 cmd->state.gras_su_cntl &=
2730 cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT;
2732 cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK;
2734 cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
2740 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2742 cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_FRONT_CW;
2745 cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
2747 cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
2754 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2756 cmd->state.primtype = tu6_primtype(primitiveTopology);
2779 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2781 cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
2784 cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
2786 cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
2793 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2795 cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
2798 cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
2800 cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
2807 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2809 cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
2811 cmd->state.rb_depth_cntl |=
2814 cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
2821 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2823 cmd->state.rb_depth_cntl &= ~A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE;
2826 cmd->state.rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE;
2828 cmd->state.dirty |= TU_CMD_DIRTY_RB_DEPTH_CNTL;
2835 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2837 cmd->state.rb_stencil_cntl &= ~(
2843 cmd->state.rb_stencil_cntl |=
2849 cmd->state.dirty |= TU_CMD_DIRTY_RB_STENCIL_CNTL;
2860 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2863 cmd->state.rb_stencil_cntl &= ~(
2869 cmd->state.rb_stencil_cntl |=
2877 cmd->state.rb_stencil_cntl &= ~(
2883 cmd->state.rb_stencil_cntl |=
2890 cmd->state.dirty |= TU_CMD_DIRTY_RB_STENCIL_CNTL;
2897 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2899 cmd->state.gras_su_cntl &= ~A6XX_GRAS_SU_CNTL_POLY_OFFSET;
2901 cmd->state.gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET;
2903 cmd->state.dirty |= TU_CMD_DIRTY_GRAS_SU_CNTL;
2910 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2912 cmd->state.primitive_restart_enable = primitiveRestartEnable;
2919 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2921 cmd->state.pc_raster_cntl &= ~A6XX_PC_RASTER_CNTL_DISCARD;
2922 cmd->state.vpc_unknown_9107 &= ~A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD;
2924 cmd->state.pc_raster_cntl |= A6XX_PC_RASTER_CNTL_DISCARD;
2925 cmd->state.vpc_unknown_9107 |= A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD;
2928 cmd->state.dirty |= TU_CMD_DIRTY_RASTERIZER_DISCARD;
2935 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2937 cmd->state.rb_mrt_control_rop =
2938 tu6_rb_mrt_control_rop(logicOp, &cmd->state.rop_reads_dst);
2940 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
2962 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2970 cmd->state.color_write_enable = color_write_enable;
2971 cmd->state.dirty |= TU_CMD_DIRTY_BLEND;
3409 tu_restore_suspended_pass(struct tu_cmd_buffer *cmd,
3412 cmd->state.pass = suspended->state.suspended_pass.pass;
3413 cmd->state.subpass = suspended->state.suspended_pass.subpass;
3414 cmd->state.framebuffer = suspended->state.suspended_pass.framebuffer;
3415 cmd->state.attachments = suspended->state.suspended_pass.attachments;
3416 cmd->state.render_area = suspended->state.suspended_pass.render_area;
3417 cmd->state.gmem_layout = suspended->state.suspended_pass.gmem_layout;
3418 cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout];
3419 cmd->state.lrz = suspended->state.suspended_pass.lrz;
3422 /* Take the saved pre-chain in "secondary" and copy its commands to "cmd",
3423 * appending it after any saved-up commands in "cmd".
3426 tu_append_pre_chain(struct tu_cmd_buffer *cmd,
3429 tu_cs_add_entries(&cmd->draw_cs, &secondary->pre_chain.draw_cs);
3430 tu_cs_add_entries(&cmd->draw_epilogue_cs,
3432 tu_render_pass_state_merge(&cmd->state.rp,
3436 tu_cs_emit_wfi(&cmd->draw_cs);
3437 tu_cs_emit_pkt7(&cmd->draw_cs, CP_WAIT_FOR_ME, 0);
3440 &cmd->trace, &cmd->draw_cs,
3445 /* Take the saved post-chain in "secondary" and copy it to "cmd".
3448 tu_append_post_chain(struct tu_cmd_buffer *cmd,
3451 tu_cs_add_entries(&cmd->draw_cs, &secondary->draw_cs);
3452 tu_cs_add_entries(&cmd->draw_epilogue_cs, &secondary->draw_epilogue_cs);
3455 tu_cs_emit_wfi(&cmd->draw_cs);
3456 tu_cs_emit_pkt7(&cmd->draw_cs, CP_WAIT_FOR_ME, 0);
3459 &cmd->trace, &cmd->draw_cs,
3462 cmd->state.rp = secondary->state.rp;
3466 * copy its state to "cmd". This also works instead of tu_append_post_chain(),
3472 tu_append_pre_post_chain(struct tu_cmd_buffer *cmd,
3475 tu_cs_add_entries(&cmd->draw_cs, &secondary->draw_cs);
3476 tu_cs_add_entries(&cmd->draw_epilogue_cs, &secondary->draw_epilogue_cs);
3479 tu_cs_emit_wfi(&cmd->draw_cs);
3480 tu_cs_emit_pkt7(&cmd->draw_cs, CP_WAIT_FOR_ME, 0);
3483 &cmd->trace, &cmd->draw_cs,
3486 tu_render_pass_state_merge(&cmd->state.rp,
3494 tu_save_pre_chain(struct tu_cmd_buffer *cmd)
3496 tu_cs_add_entries(&cmd->pre_chain.draw_cs,
3497 &cmd->draw_cs);
3498 tu_cs_add_entries(&cmd->pre_chain.draw_epilogue_cs,
3499 &cmd->draw_epilogue_cs);
3500 cmd->pre_chain.trace_renderpass_start =
3501 cmd->trace_renderpass_start;
3502 cmd->pre_chain.trace_renderpass_end =
3503 cmd->trace_renderpass_end;
3504 cmd->pre_chain.state = cmd->state.rp;
3512 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
3518 if (cmd->state.pass) {
3519 tu_flush_all_pending(&cmd->state.renderpass_cache);
3520 tu_emit_cache_flush_renderpass(cmd, &cmd->draw_cs);
3522 tu_flush_all_pending(&cmd->state.cache);
3523 tu_emit_cache_flush(cmd, &cmd->cs);
3533 result = tu_cs_add_entries(&cmd->draw_cs, &secondary->draw_cs);
3535 cmd->record_result = result;
3539 result = tu_cs_add_entries(&cmd->draw_epilogue_cs,
3542 cmd->record_result = result;
3550 cmd->state.lrz.valid = false;
3552 tu_render_pass_state_merge(&cmd->state.rp, &secondary->state.rp);
3558 tu_cs_add_entries(&cmd->cs, &secondary->cs);
3562 /* cmd may be empty, which means that the chain begins before cmd
3565 if (cmd->state.suspend_resume == SR_NONE) {
3566 cmd->state.suspend_resume = SR_IN_PRE_CHAIN;
3567 cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace);
3574 tu_append_pre_post_chain(cmd, secondary);
3586 if (cmd->state.suspend_resume == SR_NONE)
3587 cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace);
3589 tu_append_pre_chain(cmd, secondary);
3590 cmd->trace_renderpass_end = u_trace_end_iterator(&cmd->trace);
3596 tu_cs_end(&cmd->draw_cs);
3597 tu_cs_end(&cmd->draw_epilogue_cs);
3599 switch (cmd->state.suspend_resume) {
3606 tu_save_pre_chain(cmd);
3607 cmd->state.suspend_resume = SR_AFTER_PRE_CHAIN;
3618 tu_restore_suspended_pass(cmd, cmd);
3620 tu_cmd_render(cmd);
3621 if (cmd->state.suspend_resume == SR_IN_CHAIN)
3622 cmd->state.suspend_resume = SR_NONE;
3624 cmd->state.suspend_resume = SR_AFTER_PRE_CHAIN;
3630 tu_reset_render_pass(cmd);
3633 tu_cs_add_entries(&cmd->cs, &secondary->cs);
3641 cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace);
3642 tu_append_post_chain(cmd, secondary);
3643 cmd->trace_renderpass_end = u_trace_end_iterator(&cmd->trace);
3644 cmd->state.suspended_pass = secondary->state.suspended_pass;
3646 switch (cmd->state.suspend_resume) {
3648 cmd->state.suspend_resume = SR_IN_CHAIN;
3651 cmd->state.suspend_resume = SR_IN_CHAIN_AFTER_PRE_CHAIN;
3660 cmd->state.index_size = secondary->state.index_size; /* for restart index update */
3662 cmd->state.dirty = ~0u; /* TODO: set dirty only what needs to be */
3664 if (!cmd->state.lrz.gpu_dir_tracking && cmd->state.pass) {
3668 cmd->state.lrz.valid = false;
3676 if (cmd->state.pass) {
3677 tu_cache_init(&cmd->state.renderpass_cache);
3679 tu_cache_init(&cmd->state.cache);
3810 tu_emit_subpass_begin(struct tu_cmd_buffer *cmd)
3812 tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs);
3813 tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs);
3814 if (cmd->state.subpass->samples)
3815 tu6_emit_msaa(&cmd->draw_cs, cmd->state.subpass->samples, cmd->state.line_mode);
3816 tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false);
3818 tu_set_input_attachments(cmd, cmd->state.subpass);
3826 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
3828 if (unlikely(cmd->device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
3841 cmd->state.pass = pass;
3842 cmd->state.subpass = pass->subpasses;
3843 cmd->state.framebuffer = fb;
3844 cmd->state.render_area = pRenderPassBegin->renderArea;
3846 cmd->state.attachments =
3847 vk_alloc(&cmd->pool->vk.alloc, pass->attachment_count *
3848 sizeof(cmd->state.attachments[0]), 8,
3851 if (!cmd->state.attachments) {
3852 cmd->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
3857 cmd->state.attachments[i] = pAttachmentInfo ?
3859 cmd->state.framebuffer->attachments[i].attachment;
3861 tu_choose_gmem_layout(cmd);
3863 trace_start_render_pass(&cmd->trace, &cmd->cs);
3869 tu_subpass_barrier(cmd, &pass->subpasses[0].start_barrier, true);
3870 cmd->state.renderpass_cache.pending_flush_bits =
3871 cmd->state.cache.pending_flush_bits;
3872 cmd->state.renderpass_cache.flush_bits = 0;
3875 cmd->state.renderpass_cache.flush_bits |= TU_CMD_FLAG_CACHE_INVALIDATE;
3877 tu_lrz_begin_renderpass(cmd, pRenderPassBegin->pClearValues);
3879 cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace);
3881 tu_emit_renderpass_begin(cmd, pRenderPassBegin->pClearValues);
3882 tu_emit_subpass_begin(cmd);
3889 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
3892 tu_setup_dynamic_render_pass(cmd, pRenderingInfo);
3893 tu_setup_dynamic_framebuffer(cmd, pRenderingInfo);
3895 cmd->state.pass = &cmd->dynamic_pass;
3896 cmd->state.subpass = &cmd->dynamic_subpass;
3897 cmd->state.framebuffer = &cmd->dynamic_framebuffer;
3898 cmd->state.render_area = pRenderingInfo->renderArea;
3900 cmd->state.attachments = cmd->dynamic_attachments;
3903 uint32_t a = cmd->dynamic_subpass.color_attachments[i].attachment;
3909 cmd->state.attachments[a] = view;
3912 a = cmd->dynamic_subpass.resolve_attachments[i].attachment;
3916 cmd->state.attachments[a] = resolve_view;
3920 uint32_t a = cmd->dynamic_subpass.depth_stencil_attachment.attachment;
3929 cmd->state.attachments[a] = view;
3940 if (cmd->dynamic_subpass.resolve_count >
3941 cmd->dynamic_subpass.color_count) {
3944 a = cmd->dynamic_subpass.resolve_attachments[cmd->dynamic_subpass.color_count].attachment;
3945 cmd->state.attachments[a] = resolve_view;
3950 if (unlikely(cmd->device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
3962 cmd->state.rp.disable_gmem = true;
3966 tu_choose_gmem_layout(cmd);
3968 cmd->state.renderpass_cache.pending_flush_bits =
3969 cmd->state.cache.pending_flush_bits;
3970 cmd->state.renderpass_cache.flush_bits = 0;
3974 cmd->state.suspending = suspending;
3975 cmd->state.resuming = resuming;
3981 !cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking) {
3982 cmd->state.lrz.valid = false;
3985 tu_lrz_begin_resumed_renderpass(cmd, clear_values);
3987 tu_lrz_begin_renderpass(cmd, clear_values);
3992 cmd->state.suspended_pass.pass = cmd->state.pass;
3993 cmd->state.suspended_pass.subpass = cmd->state.subpass;
3994 cmd->state.suspended_pass.framebuffer = cmd->state.framebuffer;
3995 cmd->state.suspended_pass.render_area = cmd->state.render_area;
3996 cmd->state.suspended_pass.attachments = cmd->state.attachments;
3997 cmd->state.suspended_pass.gmem_layout = cmd->state.gmem_layout;
4001 trace_start_render_pass(&cmd->trace, &cmd->cs);
4004 if (!resuming || cmd->state.suspend_resume == SR_NONE) {
4005 cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace);
4009 tu_emit_renderpass_begin(cmd, clear_values);
4010 tu_emit_subpass_begin(cmd);
4015 switch (cmd->state.suspend_resume) {
4017 cmd->state.suspend_resume = SR_IN_CHAIN;
4020 cmd->state.suspend_resume = SR_IN_CHAIN_AFTER_PRE_CHAIN;
4030 if (resuming && cmd->state.suspend_resume == SR_NONE)
4031 cmd->state.suspend_resume = SR_IN_PRE_CHAIN;
4039 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
4041 if (unlikely(cmd->device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
4047 const struct tu_render_pass *pass = cmd->state.pass;
4048 struct tu_cs *cs = &cmd->draw_cs;
4049 const struct tu_subpass *last_subpass = cmd->state.subpass;
4051 const struct tu_subpass *subpass = cmd->state.subpass++;
4059 cmd->state.lrz.valid = false;
4060 cmd->state.dirty |= TU_CMD_DIRTY_LRZ;
4066 tu6_emit_blit_scissor(cmd, cs, true);
4075 tu_store_gmem_attachment(cmd, cs, a, gmem_a, false);
4083 perf_debug(cmd->device, "TODO: missing GMEM->GMEM resolve path\n");
4084 tu_load_gmem_attachment(cmd, cs, a, false, true);
4092 tu6_emit_sysmem_resolves(cmd, cs, subpass);
4097 tu_subpass_barrier(cmd, &cmd->state.subpass->start_barrier, false);
4099 if (cmd->state.subpass->feedback_invalidate)
4100 cmd->state.renderpass_cache.flush_bits |= TU_CMD_FLAG_CACHE_INVALIDATE;
4102 tu_emit_subpass_begin(cmd);
4177 tu6_const_size(struct tu_cmd_buffer *cmd,
4198 tu6_emit_consts(struct tu_cmd_buffer *cmd,
4204 dwords = tu6_const_size(cmd, pipeline, compute);
4210 tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs);
4213 tu6_emit_shared_consts(&cs, pipeline, cmd->push_constants, compute);
4222 tu6_emit_user_consts(&cs, pipeline, MESA_SHADER_COMPUTE, cmd->push_constants);
4225 tu6_emit_user_consts(&cs, pipeline, type, cmd->push_constants);
4229 return tu_cs_end_draw_state(&cmd->sub_cs, &cs);
4233 tu6_writes_depth(struct tu_cmd_buffer *cmd, bool depth_test_enable)
4236 cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
4239 (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK) >> A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT;
4247 tu6_writes_stencil(struct tu_cmd_buffer *cmd)
4250 cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE;
4253 (cmd->state.pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) ?
4254 (cmd->state.dynamic_stencil_wrmask & 0xff) :
4255 (cmd->state.pipeline->stencil_wrmask & 0xff);
4258 (cmd->state.pipeline->dynamic_state_mask & BIT(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) ?
4259 ((cmd->state.dynamic_stencil_wrmask & 0xff00) >> 8) :
4260 (cmd->state.pipeline->stencil_wrmask & 0xff00) >> 8;
4263 (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FAIL__MASK) >> A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT;
4265 (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_ZPASS__MASK) >> A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT;
4267 (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK) >> A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT;
4269 (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT;
4271 (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT;
4273 (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT;
4291 tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
4294 bool depth_test_enable = cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
4295 bool depth_write = tu6_writes_depth(cmd, depth_test_enable);
4296 bool stencil_write = tu6_writes_stencil(cmd);
4298 if ((cmd->state.pipeline->lrz.fs_has_kill ||
4299 cmd->state.pipeline->subpass_feedback_loop_ds) &&
4301 zmode = (cmd->state.lrz.valid && cmd->state.lrz.enabled)
4306 if (cmd->state.pipeline->lrz.force_late_z || !depth_test_enable)
4310 if (cmd->state.pipeline->lrz.early_fragment_tests)
4321 tu6_emit_blend(struct tu_cs *cs, struct tu_cmd_buffer *cmd)
4323 struct tu_pipeline *pipeline = cmd->state.pipeline;
4324 uint32_t color_write_enable = cmd->state.pipeline_color_write_enable;
4328 color_write_enable &= cmd->state.color_write_enable;
4333 tu_cs_emit(cs, cmd->state.rb_mrt_control[i] |
4334 ((cmd->state.logic_op_enabled ?
4335 cmd->state.rb_mrt_control_rop : 0) &
4337 tu_cs_emit(cs, cmd->state.rb_mrt_blend_control[i]);
4345 if (!(cmd->state.logic_op_enabled && cmd->state.rop_reads_dst))
4346 blend_enable_mask &= cmd->state.pipeline_blend_enable;
4349 tu_cs_emit(cs, cmd->state.sp_blend_cntl |
4354 tu_cs_emit(cs, cmd->state.rb_blend_cntl |
4360 tu6_draw_common(struct tu_cmd_buffer *cmd,
4366 const struct tu_pipeline *pipeline = cmd->state.pipeline;
4369 cmd->state.rp.drawcall_count++;
4371 cmd->state.rp.drawcall_bandwidth_per_sample_sum +=
4372 cmd->state.pipeline->color_bandwidth_per_sample;
4375 const uint32_t depth_bandwidth = cmd->state.pipeline->depth_cpp_per_sample;
4376 if (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE)
4377 cmd->state.rp.drawcall_bandwidth_per_sample_sum += depth_bandwidth;
4378 if (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE)
4379 cmd->state.rp.drawcall_bandwidth_per_sample_sum += depth_bandwidth;
4383 cmd->state.pipeline->stencil_cpp_per_sample;
4384 if (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE)
4385 cmd->state.rp.drawcall_bandwidth_per_sample_sum += stencil_bandwidth * 2;
4387 tu_emit_cache_flush_renderpass(cmd, cs);
4391 primitive_restart_enabled = cmd->state.primitive_restart_enable;
4401 if (!(cmd->state.dirty & ~TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD))
4405 cmd->state.dirty & (TU_CMD_DIRTY_LRZ | TU_CMD_DIRTY_RB_DEPTH_CNTL |
4410 uint32_t size = cmd->device->physical_device->info->a6xx.lrz_track_quirk ? 10 : 8;
4412 cmd->state.lrz_and_depth_plane_state =
4413 tu_cs_draw_state(&cmd->sub_cs, &cs, size);
4414 tu6_emit_lrz(cmd, &cs);
4415 tu6_build_depth_plane_z_mode(cmd, &cs);
4418 if (cmd->state.dirty & TU_CMD_DIRTY_RASTERIZER_DISCARD) {
4419 struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RASTERIZER_DISCARD, 4);
4420 tu_cs_emit_regs(&cs, A6XX_PC_RASTER_CNTL(.dword = cmd->state.pc_raster_cntl));
4421 tu_cs_emit_regs(&cs, A6XX_VPC_UNKNOWN_9107(.dword = cmd->state.vpc_unknown_9107));
4424 if (cmd->state.dirty & TU_CMD_DIRTY_GRAS_SU_CNTL) {
4425 struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2);
4426 tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.gras_su_cntl));
4429 if (cmd->state.dirty & TU_CMD_DIRTY_RB_DEPTH_CNTL) {
4430 struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2);
4431 uint32_t rb_depth_cntl = cmd->state.rb_depth_cntl;
4439 tu6_apply_depth_bounds_workaround(cmd->device, &rb_depth_cntl);
4447 if (cmd->state.dirty & TU_CMD_DIRTY_RB_STENCIL_CNTL) {
4448 struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2);
4449 tu_cs_emit_regs(&cs, A6XX_RB_STENCIL_CONTROL(.dword = cmd->state.rb_stencil_cntl));
4452 if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS)
4453 cmd->state.shader_const = tu6_emit_consts(cmd, pipeline, false);
4455 if (cmd->state.dirty & TU_CMD_DIRTY_VIEWPORTS) {
4456 struct tu_cs cs = tu_cmd_dynamic_state(cmd, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * cmd->state.max_viewport);
4457 tu6_emit_viewport(&cs, cmd->state.viewport, cmd->state.max_viewport,
4461 if (cmd->state.dirty & TU_CMD_DIRTY_BLEND) {
4462 struct tu_cs cs = tu_cmd_dynamic_state(cmd, TU_DYNAMIC_STATE_BLEND,
4463 4 + 3 * cmd->state.pipeline->num_rts);
4464 tu6_emit_blend(&cs, cmd);
4477 if (cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE) {
4488 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_CONST, cmd->state.shader_const);
4489 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
4491 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
4492 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
4493 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state);
4495 for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.dynamic_state); i++) {
4498 cmd->state.dynamic_state[i] :
4507 ((cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS) ? 1 : 0) +
4508 ((cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD) ? 1 : 0) +
4509 ((cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) ? 1 : 0) +
4510 ((cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) ? 1 : 0) +
4513 if ((cmd->state.dirty & TU_CMD_DIRTY_VB_STRIDE) &&
4519 if ((cmd->state.dirty & TU_CMD_DIRTY_BLEND) &&
4528 if (cmd->state.dirty & TU_CMD_DIRTY_SHADER_CONSTS)
4529 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_CONST, cmd->state.shader_const);
4530 if (cmd->state.dirty & TU_CMD_DIRTY_DESC_SETS_LOAD)
4532 if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS)
4533 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VB, cmd->state.vertex_buffers);
4536 cmd->state.dynamic_state[TU_DYNAMIC_STATE_VB_STRIDE]);
4540 cmd->state.dynamic_state[TU_DYNAMIC_STATE_BLEND]);
4542 if (cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS)
4543 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
4546 tu_cs_emit_draw_state(cs, TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, cmd->state.lrz_and_depth_plane_state);
4556 cmd->state.dirty &= TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
4561 tu_draw_initiator(struct tu_cmd_buffer *cmd, enum pc_di_src_sel src_sel)
4563 const struct tu_pipeline *pipeline = cmd->state.pipeline;
4575 primtype = cmd->state.primtype;
4582 CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(cmd->state.index_size) |
4610 vs_params_offset(struct tu_cmd_buffer *cmd)
4613 &cmd->state.pipeline->program.link[MESA_SHADER_VERTEX];
4631 tu6_emit_empty_vs_params(struct tu_cmd_buffer *cmd)
4633 if (cmd->state.vs_params.iova) {
4634 cmd->state.vs_params = (struct tu_draw_state) {};
4635 cmd->state.dirty |= TU_CMD_DIRTY_VS_PARAMS;
4640 tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
4647 if (!(cmd->state.dirty & (TU_CMD_DIRTY_DRAW_STATE | TU_CMD_DIRTY_VS_PARAMS)) &&
4648 vertex_offset == cmd->state.last_vs_params.vertex_offset &&
4649 first_instance == cmd->state.last_vs_params.first_instance) {
4653 uint32_t offset = vs_params_offset(cmd);
4656 VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 3 + (offset ? 8 : 0), &cs);
4658 cmd->record_result = result;
4682 cmd->state.last_vs_params.vertex_offset = vertex_offset;
4683 cmd->state.last_vs_params.first_instance = first_instance;
4685 struct tu_cs_entry entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
4686 cmd->state.vs_params = (struct tu_draw_state) {entry.bo->iova + entry.offset, entry.size / 4};
4688 cmd->state.dirty |= TU_CMD_DIRTY_VS_PARAMS;
4698 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
4699 struct tu_cs *cs = &cmd->draw_cs;
4701 tu6_emit_vs_params(cmd, firstVertex, firstInstance);
4703 tu6_draw_common(cmd, cs, false, vertexCount);
4706 tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
4719 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
4720 struct tu_cs *cs = &cmd->draw_cs;
4722 tu6_emit_vs_params(cmd, vertexOffset, firstInstance);
4724 tu6_draw_common(cmd, cs, true, indexCount);
4727 tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
4731 tu_cs_emit_qw(cs, cmd->state.index_va);
4732 tu_cs_emit(cs, cmd->state.max_index_count);
4742 draw_wfm(struct tu_cmd_buffer *cmd)
4744 cmd->state.renderpass_cache.flush_bits |=
4745 cmd->state.renderpass_cache.pending_flush_bits & TU_CMD_FLAG_WAIT_FOR_ME;
4746 cmd->state.renderpass_cache.pending_flush_bits &= ~TU_CMD_FLAG_WAIT_FOR_ME;
4756 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
4758 struct tu_cs *cs = &cmd->draw_cs;
4760 tu6_emit_empty_vs_params(cmd);
4762 if (cmd->device->physical_device->info->a6xx.indirect_draw_wfm_quirk)
4763 draw_wfm(cmd);
4765 tu6_draw_common(cmd, cs, false, 0);
4768 tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
4770 A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd)));
4783 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
4785 struct tu_cs *cs = &cmd->draw_cs;
4787 tu6_emit_empty_vs_params(cmd);
4789 if (cmd->device->physical_device->info->a6xx.indirect_draw_wfm_quirk)
4790 draw_wfm(cmd);
4792 tu6_draw_common(cmd, cs, true, 0);
4795 tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
4797 A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd)));
4799 tu_cs_emit_qw(cs, cmd->state.index_va);
4800 tu_cs_emit(cs, cmd->state.max_index_count);
4814 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
4817 struct tu_cs *cs = &cmd->draw_cs;
4819 tu6_emit_empty_vs_params(cmd);
4826 draw_wfm(cmd);
4828 tu6_draw_common(cmd, cs, false, 0);
4831 tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
4833 A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd)));
4849 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
4852 struct tu_cs *cs = &cmd->draw_cs;
4854 tu6_emit_empty_vs_params(cmd);
4856 draw_wfm(cmd);
4858 tu6_draw_common(cmd, cs, true, 0);
4861 tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
4863 A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(vs_params_offset(cmd)));
4865 tu_cs_emit_qw(cs, cmd->state.index_va);
4866 tu_cs_emit(cs, cmd->state.max_index_count);
4881 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
4883 struct tu_cs *cs = &cmd->draw_cs;
4890 draw_wfm(cmd);
4892 tu6_emit_vs_params(cmd, 0, firstInstance);
4894 tu6_draw_common(cmd, cs, false, 0);
4897 tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_XFB));
4929 tu_emit_compute_driver_params(struct tu_cmd_buffer *cmd,
4991 tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[i]));
4996 tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
5004 tu_cs_emit_qw(cs, global_iova(cmd, cs_indirect_xyz[0]));
5033 tu_dispatch(struct tu_cmd_buffer *cmd,
5040 struct tu_cs *cs = &cmd->cs;
5041 struct tu_pipeline *pipeline = cmd->state.compute_pipeline;
5046 tu_emit_cache_flush(cmd, cs);
5049 tu_cs_emit_state_ib(cs, tu6_emit_consts(cmd, pipeline, true));
5051 tu_emit_compute_driver_params(cmd, cs, pipeline, info);
5053 if (cmd->state.dirty & TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD)
5056 cmd->state.dirty &= ~TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
5080 trace_start_compute(&cmd->trace, cs);
5100 trace_end_compute(&cmd->trace, cs,
5220 tu_barrier(struct tu_cmd_buffer *cmd,
5231 bool gmem = cmd->state.ccu_state == TU_CMD_CCU_GMEM &&
5232 !cmd->state.pass;
5285 if (cmd->state.pass) {
5314 cmd->state.rp.disable_gmem = true;
5319 cmd->state.pass ? &cmd->state.renderpass_cache : &cmd->state.cache;
5337 write_event(struct tu_cmd_buffer *cmd, struct tu_event *event,
5340 struct tu_cs *cs = &cmd->cs;
5343 assert(!cmd->state.pass);
5345 tu_emit_cache_flush(cmd, cs);
5372 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
5383 write_event(cmd, event, src_stage_mask, 1);
5391 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
5394 write_event(cmd, event, stageMask, 0);
5403 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
5404 struct tu_cs *cs = cmd->state.pass ? &cmd->draw_cs : &cmd->cs;
5418 tu_barrier(cmd, pDependencyInfos);
5432 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
5434 cmd->state.predication_active = true;
5436 struct tu_cs *cs = cmd->state.pass ? &cmd->draw_cs : &cmd->cs;
5442 if (cmd->state.pass)
5443 tu_emit_cache_flush_renderpass(cmd, cs);
5445 tu_emit_cache_flush(cmd, cs);
5457 tu_cs_emit_qw(cs, global_iova(cmd, predicate));
5467 tu_cs_emit_qw(cs, global_iova(cmd, predicate));
5473 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
5475 cmd->state.predication_active = false;
5477 struct tu_cs *cs = cmd->state.pass ? &cmd->draw_cs : &cmd->cs;
5491 TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
5496 struct tu_cs *cs = cmd->state.pass ? &cmd->draw_cs : &cmd->cs;
5498 cmd->state.pass ? &cmd->state.renderpass_cache : &cmd->state.cache;
5532 if (cmd->state.pass) {
5533 tu_emit_cache_flush_renderpass(cmd, cs);
5535 tu_emit_cache_flush(cmd, cs);