1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * SPDX-License-Identifier: MIT 5 * 6 * based in part on anv driver which is: 7 * Copyright © 2015 Intel Corporation 8 */ 9 10#ifndef TU_CMD_BUFFER_H 11#define TU_CMD_BUFFER_H 12 13#include "tu_common.h" 14 15#include "tu_cs.h" 16#include "tu_descriptor_set.h" 17#include "tu_device.h" 18#include "tu_lrz.h" 19#include "tu_pass.h" 20#include "tu_pipeline.h" 21 22enum tu_draw_state_group_id 23{ 24 TU_DRAW_STATE_PROGRAM_CONFIG, 25 TU_DRAW_STATE_PROGRAM, 26 TU_DRAW_STATE_PROGRAM_BINNING, 27 TU_DRAW_STATE_VB, 28 TU_DRAW_STATE_VI, 29 TU_DRAW_STATE_VI_BINNING, 30 TU_DRAW_STATE_RAST, 31 TU_DRAW_STATE_CONST, 32 TU_DRAW_STATE_DESC_SETS, 33 TU_DRAW_STATE_DESC_SETS_LOAD, 34 TU_DRAW_STATE_VS_PARAMS, 35 TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM, 36 TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM, 37 TU_DRAW_STATE_LRZ_AND_DEPTH_PLANE, 38 TU_DRAW_STATE_PRIM_MODE_GMEM, 39 TU_DRAW_STATE_PRIM_MODE_SYSMEM, 40 41 /* dynamic state related draw states */ 42 TU_DRAW_STATE_DYNAMIC, 43 TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT, 44}; 45 46struct tu_descriptor_state 47{ 48 struct tu_descriptor_set *sets[MAX_SETS]; 49 struct tu_descriptor_set push_set; 50 uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE]; 51}; 52 53enum tu_cmd_dirty_bits 54{ 55 TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0), 56 TU_CMD_DIRTY_VB_STRIDE = BIT(1), 57 TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2), 58 TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3), 59 TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4), 60 TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5), 61 TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6), 62 TU_CMD_DIRTY_SHADER_CONSTS = BIT(7), 63 TU_CMD_DIRTY_LRZ = BIT(8), 64 TU_CMD_DIRTY_VS_PARAMS = BIT(9), 65 TU_CMD_DIRTY_RASTERIZER_DISCARD = BIT(10), 66 TU_CMD_DIRTY_VIEWPORTS = BIT(11), 67 TU_CMD_DIRTY_BLEND = BIT(12), 68 /* all draw states were disabled and need to be re-enabled: */ 69 TU_CMD_DIRTY_DRAW_STATE = BIT(13) 70}; 71 72/* There are only three cache domains we have to care about: the CCU, or 73 * color cache unit, which is used for color and depth/stencil attachments 74 * and copy/blit destinations, and is split conceptually into color and depth, 75 * and the universal cache or UCHE which is used for pretty much everything 76 * else, except for the CP (uncached) and host. We need to flush whenever data 77 * crosses these boundaries. 78 */ 79 80enum tu_cmd_access_mask { 81 TU_ACCESS_UCHE_READ = 1 << 0, 82 TU_ACCESS_UCHE_WRITE = 1 << 1, 83 TU_ACCESS_CCU_COLOR_READ = 1 << 2, 84 TU_ACCESS_CCU_COLOR_WRITE = 1 << 3, 85 TU_ACCESS_CCU_DEPTH_READ = 1 << 4, 86 TU_ACCESS_CCU_DEPTH_WRITE = 1 << 5, 87 88 /* Experiments have shown that while it's safe to avoid flushing the CCU 89 * after each blit/renderpass, it's not safe to assume that subsequent 90 * lookups with a different attachment state will hit unflushed cache 91 * entries. That is, the CCU needs to be flushed and possibly invalidated 92 * when accessing memory with a different attachment state. Writing to an 93 * attachment under the following conditions after clearing using the 94 * normal 2d engine path is known to have issues: 95 * 96 * - It isn't the 0'th layer. 97 * - There are more than one attachment, and this isn't the 0'th attachment 98 * (this seems to also depend on the cpp of the attachments). 99 * 100 * Our best guess is that the layer/MRT state is used when computing 101 * the location of a cache entry in CCU, to avoid conflicts. We assume that 102 * any access in a renderpass after or before an access by a transfer needs 103 * a flush/invalidate, and use the _INCOHERENT variants to represent access 104 * by a renderpass. 105 */ 106 TU_ACCESS_CCU_COLOR_INCOHERENT_READ = 1 << 6, 107 TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE = 1 << 7, 108 TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8, 109 TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9, 110 111 /* Accesses which bypasses any cache. e.g. writes via the host, 112 * CP_EVENT_WRITE::BLIT, and the CP are SYSMEM_WRITE. 113 */ 114 TU_ACCESS_SYSMEM_READ = 1 << 10, 115 TU_ACCESS_SYSMEM_WRITE = 1 << 11, 116 117 /* Memory writes from the CP start in-order with draws and event writes, 118 * but execute asynchronously and hence need a CP_WAIT_MEM_WRITES if read. 119 */ 120 TU_ACCESS_CP_WRITE = 1 << 12, 121 122 TU_ACCESS_READ = 123 TU_ACCESS_UCHE_READ | 124 TU_ACCESS_CCU_COLOR_READ | 125 TU_ACCESS_CCU_DEPTH_READ | 126 TU_ACCESS_CCU_COLOR_INCOHERENT_READ | 127 TU_ACCESS_CCU_DEPTH_INCOHERENT_READ | 128 TU_ACCESS_SYSMEM_READ, 129 130 TU_ACCESS_WRITE = 131 TU_ACCESS_UCHE_WRITE | 132 TU_ACCESS_CCU_COLOR_WRITE | 133 TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE | 134 TU_ACCESS_CCU_DEPTH_WRITE | 135 TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE | 136 TU_ACCESS_SYSMEM_WRITE | 137 TU_ACCESS_CP_WRITE, 138 139 TU_ACCESS_ALL = 140 TU_ACCESS_READ | 141 TU_ACCESS_WRITE, 142}; 143 144/* Starting with a6xx, the pipeline is split into several "clusters" (really 145 * pipeline stages). Each stage has its own pair of register banks and can 146 * switch them independently, so that earlier stages can run ahead of later 147 * ones. e.g. the FS of draw N and the VS of draw N + 1 can be executing at 148 * the same time. 149 * 150 * As a result of this, we need to insert a WFI when an earlier stage depends 151 * on the result of a later stage. CP_DRAW_* and CP_BLIT will wait for any 152 * pending WFI's to complete before starting, and usually before reading 153 * indirect params even, so a WFI also acts as a full "pipeline stall". 154 * 155 * Note, the names of the stages come from CLUSTER_* in devcoredump. We 156 * include all the stages for completeness, even ones which do not read/write 157 * anything. 158 */ 159 160enum tu_stage { 161 /* This doesn't correspond to a cluster, but we need it for tracking 162 * indirect draw parameter reads etc. 163 */ 164 TU_STAGE_CP, 165 166 /* - Fetch index buffer 167 * - Fetch vertex attributes, dispatch VS 168 */ 169 TU_STAGE_FE, 170 171 /* Execute all geometry stages (VS thru GS) */ 172 TU_STAGE_SP_VS, 173 174 /* Write to VPC, do primitive assembly. */ 175 TU_STAGE_PC_VS, 176 177 /* Rasterization. RB_DEPTH_BUFFER_BASE only exists in CLUSTER_PS according 178 * to devcoredump so presumably this stage stalls for TU_STAGE_PS when 179 * early depth testing is enabled before dispatching fragments? However 180 * GRAS reads and writes LRZ directly. 181 */ 182 TU_STAGE_GRAS, 183 184 /* Execute FS */ 185 TU_STAGE_SP_PS, 186 187 /* - Fragment tests 188 * - Write color/depth 189 * - Streamout writes (???) 190 * - Varying interpolation (???) 191 */ 192 TU_STAGE_PS, 193}; 194 195enum tu_cmd_flush_bits { 196 TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0, 197 TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1, 198 TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2, 199 TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3, 200 TU_CMD_FLAG_CACHE_FLUSH = 1 << 4, 201 TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5, 202 TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6, 203 TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7, 204 TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8, 205 206 TU_CMD_FLAG_ALL_FLUSH = 207 TU_CMD_FLAG_CCU_FLUSH_DEPTH | 208 TU_CMD_FLAG_CCU_FLUSH_COLOR | 209 TU_CMD_FLAG_CACHE_FLUSH | 210 /* Treat the CP as a sort of "cache" which may need to be "flushed" via 211 * waiting for writes to land with WAIT_FOR_MEM_WRITES. 212 */ 213 TU_CMD_FLAG_WAIT_MEM_WRITES, 214 215 TU_CMD_FLAG_ALL_INVALIDATE = 216 TU_CMD_FLAG_CCU_INVALIDATE_DEPTH | 217 TU_CMD_FLAG_CCU_INVALIDATE_COLOR | 218 TU_CMD_FLAG_CACHE_INVALIDATE | 219 /* Treat CP_WAIT_FOR_ME as a "cache" that needs to be invalidated when a 220 * a command that needs CP_WAIT_FOR_ME is executed. This means we may 221 * insert an extra WAIT_FOR_ME before an indirect command requiring it 222 * in case there was another command before the current command buffer 223 * that it needs to wait for. 224 */ 225 TU_CMD_FLAG_WAIT_FOR_ME, 226}; 227 228/* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty 229 * heavy, involving a CCU cache flush/invalidate and a WFI in order to change 230 * which part of the gmem is used by the CCU. Here we keep track of what the 231 * state of the CCU. 232 */ 233enum tu_cmd_ccu_state { 234 TU_CMD_CCU_SYSMEM, 235 TU_CMD_CCU_GMEM, 236 TU_CMD_CCU_UNKNOWN, 237}; 238 239struct tu_cache_state { 240 /* Caches which must be made available (flushed) eventually if there are 241 * any users outside that cache domain, and caches which must be 242 * invalidated eventually if there are any reads. 243 */ 244 enum tu_cmd_flush_bits pending_flush_bits; 245 /* Pending flushes */ 246 enum tu_cmd_flush_bits flush_bits; 247}; 248 249struct tu_vs_params { 250 uint32_t vertex_offset; 251 uint32_t first_instance; 252}; 253 254/* This should be for state that is set inside a renderpass and used at 255 * renderpass end time, e.g. to decide whether to use sysmem. This needs 256 * special handling for secondary cmdbufs and suspending/resuming render 257 * passes where the state may need to be combined afterwards. 258 */ 259struct tu_render_pass_state 260{ 261 bool xfb_used; 262 bool has_tess; 263 bool has_prim_generated_query_in_rp; 264 bool disable_gmem; 265 266 /* Track whether conditional predicate for COND_REG_EXEC is changed in draw_cs */ 267 bool draw_cs_writes_to_cond_pred; 268 269 uint32_t drawcall_count; 270 271 /* A calculated "draw cost" value for renderpass, which tries to 272 * estimate the bandwidth-per-sample of all the draws according 273 * to: 274 * 275 * foreach_draw (...) { 276 * sum += pipeline->color_bandwidth_per_sample; 277 * if (depth_test_enabled) 278 * sum += pipeline->depth_cpp_per_sample; 279 * if (depth_write_enabled) 280 * sum += pipeline->depth_cpp_per_sample; 281 * if (stencil_write_enabled) 282 * sum += pipeline->stencil_cpp_per_sample * 2; 283 * } 284 * drawcall_bandwidth_per_sample = sum / drawcall_count; 285 * 286 * It allows us to estimate the total bandwidth of drawcalls later, by 287 * calculating (drawcall_bandwidth_per_sample * zpass_sample_count). 288 * 289 * This does ignore depth buffer traffic for samples which do not 290 * pass due to depth-test fail, and some other details. But it is 291 * just intended to be a rough estimate that is easy to calculate. 292 */ 293 uint32_t drawcall_bandwidth_per_sample_sum; 294}; 295 296struct tu_cmd_state 297{ 298 uint32_t dirty; 299 300 struct tu_pipeline *pipeline; 301 struct tu_pipeline *compute_pipeline; 302 303 struct tu_render_pass_state rp; 304 305 /* Vertex buffers, viewports, and scissors 306 * the states for these can be updated partially, so we need to save these 307 * to be able to emit a complete draw state 308 */ 309 struct { 310 uint64_t base; 311 uint32_t size; 312 uint32_t stride; 313 } vb[MAX_VBS]; 314 VkViewport viewport[MAX_VIEWPORTS]; 315 VkRect2D scissor[MAX_SCISSORS]; 316 uint32_t max_viewport, max_scissor; 317 318 /* for dynamic states that can't be emitted directly */ 319 uint32_t dynamic_stencil_mask; 320 uint32_t dynamic_stencil_wrmask; 321 uint32_t dynamic_stencil_ref; 322 323 uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl; 324 uint32_t pc_raster_cntl, vpc_unknown_9107; 325 uint32_t rb_mrt_control[MAX_RTS], rb_mrt_blend_control[MAX_RTS]; 326 uint32_t rb_mrt_control_rop; 327 uint32_t rb_blend_cntl, sp_blend_cntl; 328 uint32_t pipeline_color_write_enable, pipeline_blend_enable; 329 uint32_t color_write_enable; 330 bool logic_op_enabled; 331 bool rop_reads_dst; 332 enum pc_di_primtype primtype; 333 bool primitive_restart_enable; 334 335 /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */ 336 struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; 337 struct tu_draw_state vertex_buffers; 338 struct tu_draw_state shader_const; 339 struct tu_draw_state desc_sets; 340 341 struct tu_draw_state vs_params; 342 343 /* Index buffer */ 344 uint64_t index_va; 345 uint32_t max_index_count; 346 uint8_t index_size; 347 348 /* because streamout base has to be 32-byte aligned 349 * there is an extra offset to deal with when it is 350 * unaligned 351 */ 352 uint8_t streamout_offset[IR3_MAX_SO_BUFFERS]; 353 354 /* Renderpasses are tricky, because we may need to flush differently if 355 * using sysmem vs. gmem and therefore we have to delay any flushing that 356 * happens before a renderpass. So we have to have two copies of the flush 357 * state, one for intra-renderpass flushes (i.e. renderpass dependencies) 358 * and one for outside a renderpass. 359 */ 360 struct tu_cache_state cache; 361 struct tu_cache_state renderpass_cache; 362 363 enum tu_cmd_ccu_state ccu_state; 364 365 /* Decides which GMEM layout to use from the tu_pass, based on whether the CCU 366 * might get used by tu_store_gmem_attachment(). 367 */ 368 enum tu_gmem_layout gmem_layout; 369 370 const struct tu_render_pass *pass; 371 const struct tu_subpass *subpass; 372 const struct tu_framebuffer *framebuffer; 373 const struct tu_tiling_config *tiling; 374 VkRect2D render_area; 375 376 const struct tu_image_view **attachments; 377 378 /* State that in the dynamic case comes from VkRenderingInfo and needs to 379 * be saved/restored when suspending. This holds the state for the last 380 * suspended renderpass, which may point to this command buffer's dynamic_* 381 * or another command buffer if executed on a secondary. 382 */ 383 struct { 384 const struct tu_render_pass *pass; 385 const struct tu_subpass *subpass; 386 const struct tu_framebuffer *framebuffer; 387 VkRect2D render_area; 388 enum tu_gmem_layout gmem_layout; 389 390 const struct tu_image_view **attachments; 391 392 struct tu_lrz_state lrz; 393 } suspended_pass; 394 395 bool tessfactor_addr_set; 396 bool predication_active; 397 enum a5xx_line_mode line_mode; 398 bool z_negative_one_to_one; 399 400 /* VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT and 401 * VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT are allowed to run simultaniously, 402 * but they use the same {START,STOP}_PRIMITIVE_CTRS control. 403 */ 404 uint32_t prim_counters_running; 405 406 bool prim_generated_query_running_before_rp; 407 408 /* These are the states of the suspend/resume state machine. In addition to 409 * tracking whether we're in the middle of a chain of suspending and 410 * resuming passes that will be merged, we need to track whether the 411 * command buffer begins in the middle of such a chain, for when it gets 412 * merged with other command buffers. We call such a chain that begins 413 * before the command buffer starts a "pre-chain". 414 * 415 * Note that when this command buffer is finished, this state is untouched 416 * but it gains a different meaning. For example, if we finish in state 417 * SR_IN_CHAIN, we finished in the middle of a suspend/resume chain, so 418 * there's a suspend/resume chain that extends past the end of the command 419 * buffer. In this sense it's the "opposite" of SR_AFTER_PRE_CHAIN, which 420 * means that there's a suspend/resume chain that extends before the 421 * beginning. 422 */ 423 enum { 424 /* Either there are no suspend/resume chains, or they are entirely 425 * contained in the current command buffer. 426 * 427 * BeginCommandBuffer() <- start of current command buffer 428 * ... 429 * // we are here 430 */ 431 SR_NONE = 0, 432 433 /* We are in the middle of a suspend/resume chain that starts before the 434 * current command buffer. This happens when the command buffer begins 435 * with a resuming render pass and all of the passes up to the current 436 * one are suspending. In this state, our part of the chain is not saved 437 * and is in the current draw_cs/state. 438 * 439 * BeginRendering() ... EndRendering(suspending) 440 * BeginCommandBuffer() <- start of current command buffer 441 * BeginRendering(resuming) ... EndRendering(suspending) 442 * BeginRendering(resuming) ... EndRendering(suspending) 443 * ... 444 * // we are here 445 */ 446 SR_IN_PRE_CHAIN, 447 448 /* We are currently outside of any suspend/resume chains, but there is a 449 * chain starting before the current command buffer. It is saved in 450 * pre_chain. 451 * 452 * BeginRendering() ... EndRendering(suspending) 453 * BeginCommandBuffer() <- start of current command buffer 454 * // This part is stashed in pre_chain 455 * BeginRendering(resuming) ... EndRendering(suspending) 456 * BeginRendering(resuming) ... EndRendering(suspending) 457 * ... 458 * BeginRendering(resuming) ... EndRendering() // end of chain 459 * ... 460 * // we are here 461 */ 462 SR_AFTER_PRE_CHAIN, 463 464 /* We are in the middle of a suspend/resume chain and there is no chain 465 * starting before the current command buffer. 466 * 467 * BeginCommandBuffer() <- start of current command buffer 468 * ... 469 * BeginRendering() ... EndRendering(suspending) 470 * BeginRendering(resuming) ... EndRendering(suspending) 471 * BeginRendering(resuming) ... EndRendering(suspending) 472 * ... 473 * // we are here 474 */ 475 SR_IN_CHAIN, 476 477 /* We are in the middle of a suspend/resume chain and there is another, 478 * separate, chain starting before the current command buffer. 479 * 480 * BeginRendering() ... EndRendering(suspending) 481 * CommandBufferBegin() <- start of current command buffer 482 * // This part is stashed in pre_chain 483 * BeginRendering(resuming) ... EndRendering(suspending) 484 * BeginRendering(resuming) ... EndRendering(suspending) 485 * ... 486 * BeginRendering(resuming) ... EndRendering() // end of chain 487 * ... 488 * BeginRendering() ... EndRendering(suspending) 489 * BeginRendering(resuming) ... EndRendering(suspending) 490 * BeginRendering(resuming) ... EndRendering(suspending) 491 * ... 492 * // we are here 493 */ 494 SR_IN_CHAIN_AFTER_PRE_CHAIN, 495 } suspend_resume; 496 497 bool suspending, resuming; 498 499 struct tu_lrz_state lrz; 500 501 struct tu_draw_state lrz_and_depth_plane_state; 502 503 struct tu_vs_params last_vs_params; 504}; 505 506struct tu_cmd_pool 507{ 508 struct vk_command_pool vk; 509 510 struct list_head cmd_buffers; 511 struct list_head free_cmd_buffers; 512}; 513VK_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, vk.base, VkCommandPool, 514 VK_OBJECT_TYPE_COMMAND_POOL) 515 516enum tu_cmd_buffer_status 517{ 518 TU_CMD_BUFFER_STATUS_INVALID, 519 TU_CMD_BUFFER_STATUS_INITIAL, 520 TU_CMD_BUFFER_STATUS_RECORDING, 521 TU_CMD_BUFFER_STATUS_EXECUTABLE, 522 TU_CMD_BUFFER_STATUS_PENDING, 523}; 524 525struct tu_cmd_buffer 526{ 527 struct vk_command_buffer vk; 528 529 struct tu_device *device; 530 531 struct tu_cmd_pool *pool; 532 struct list_head pool_link; 533 534 struct u_trace trace; 535 struct u_trace_iterator trace_renderpass_start; 536 struct u_trace_iterator trace_renderpass_end; 537 538 struct list_head renderpass_autotune_results; 539 struct tu_autotune_results_buffer* autotune_buffer; 540 541 VkCommandBufferUsageFlags usage_flags; 542 enum tu_cmd_buffer_status status; 543 544 VkQueryPipelineStatisticFlags inherited_pipeline_statistics; 545 546 struct tu_cmd_state state; 547 uint32_t queue_family_index; 548 549 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4]; 550 VkShaderStageFlags push_constant_stages; 551 struct tu_descriptor_set meta_push_descriptors; 552 553 struct tu_descriptor_state descriptors[MAX_BIND_POINTS]; 554 555 struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1)]; 556 struct tu_subpass_attachment dynamic_color_attachments[MAX_RTS]; 557 struct tu_subpass_attachment dynamic_resolve_attachments[MAX_RTS + 1]; 558 const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1)]; 559 560 struct tu_render_pass dynamic_pass; 561 struct tu_subpass dynamic_subpass; 562 struct tu_framebuffer dynamic_framebuffer; 563 564 VkResult record_result; 565 566 struct tu_cs cs; 567 struct tu_cs draw_cs; 568 struct tu_cs tile_store_cs; 569 struct tu_cs draw_epilogue_cs; 570 struct tu_cs sub_cs; 571 572 /* If the first render pass in the command buffer is resuming, then it is 573 * part of a suspend/resume chain that starts before the current command 574 * buffer and needs to be merged later. In this case, its incomplete state 575 * is stored in pre_chain. In the symmetric case where the last render pass 576 * is suspending, we just skip ending the render pass and its state is 577 * stored in draw_cs/the current state. The first and last render pass 578 * might be part of different chains, which is why all the state may need 579 * to be saved separately here. 580 */ 581 struct { 582 struct tu_cs draw_cs; 583 struct tu_cs draw_epilogue_cs; 584 585 struct u_trace_iterator trace_renderpass_start, trace_renderpass_end; 586 587 struct tu_render_pass_state state; 588 } pre_chain; 589 590 uint32_t vsc_draw_strm_pitch; 591 uint32_t vsc_prim_strm_pitch; 592}; 593VK_DEFINE_HANDLE_CASTS(tu_cmd_buffer, vk.base, VkCommandBuffer, 594 VK_OBJECT_TYPE_COMMAND_BUFFER) 595 596static inline uint32_t 597tu_attachment_gmem_offset(struct tu_cmd_buffer *cmd, 598 const struct tu_render_pass_attachment *att) 599{ 600 assert(cmd->state.gmem_layout < TU_GMEM_LAYOUT_COUNT); 601 return att->gmem_offset[cmd->state.gmem_layout]; 602} 603 604static inline uint32_t 605tu_attachment_gmem_offset_stencil(struct tu_cmd_buffer *cmd, 606 const struct tu_render_pass_attachment *att) 607{ 608 assert(cmd->state.gmem_layout < TU_GMEM_LAYOUT_COUNT); 609 return att->gmem_offset_stencil[cmd->state.gmem_layout]; 610} 611 612void tu_render_pass_state_merge(struct tu_render_pass_state *dst, 613 const struct tu_render_pass_state *src); 614 615VkResult tu_cmd_buffer_begin(struct tu_cmd_buffer *cmd_buffer, 616 VkCommandBufferUsageFlags usage_flags); 617 618void tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer, 619 struct tu_cs *cs); 620 621void tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer, 622 struct tu_cs *cs, 623 enum tu_cmd_ccu_state ccu_state); 624 625void 626tu_append_pre_chain(struct tu_cmd_buffer *cmd, 627 struct tu_cmd_buffer *secondary); 628 629void 630tu_append_pre_post_chain(struct tu_cmd_buffer *cmd, 631 struct tu_cmd_buffer *secondary); 632 633void 634tu_append_post_chain(struct tu_cmd_buffer *cmd, 635 struct tu_cmd_buffer *secondary); 636 637void 638tu_restore_suspended_pass(struct tu_cmd_buffer *cmd, 639 struct tu_cmd_buffer *suspended); 640 641void tu_cmd_render(struct tu_cmd_buffer *cmd); 642 643void 644tu6_emit_event_write(struct tu_cmd_buffer *cmd, 645 struct tu_cs *cs, 646 enum vgt_event_type event); 647 648static inline struct tu_descriptor_state * 649tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer, 650 VkPipelineBindPoint bind_point) 651{ 652 return &cmd_buffer->descriptors[bind_point]; 653} 654 655void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples, 656 enum a5xx_line_mode line_mode); 657 658void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2); 659 660void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1); 661 662void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs); 663 664void tu6_apply_depth_bounds_workaround(struct tu_device *device, 665 uint32_t *rb_depth_cntl); 666 667void 668update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask); 669 670#endif /* TU_CMD_BUFFER_H */ 671