1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * SPDX-License-Identifier: MIT 5 * 6 * based in part on anv driver which is: 7 * Copyright © 2015 Intel Corporation 8 */ 9 10#include "tu_pass.h" 11 12#include "vk_util.h" 13 14#include "tu_cmd_buffer.h" 15#include "tu_device.h" 16#include "tu_image.h" 17 18/* Return true if we have to fallback to sysmem rendering because the 19 * dependency can't be satisfied with tiled rendering. 20 */ 21 22static bool 23dep_invalid_for_gmem(const VkSubpassDependency2 *dep, 24 VkPipelineStageFlags2 src_stage_mask, 25 VkPipelineStageFlags2 dst_stage_mask) 26{ 27 /* External dependencies don't matter here. */ 28 if (dep->srcSubpass == VK_SUBPASS_EXTERNAL || 29 dep->dstSubpass == VK_SUBPASS_EXTERNAL) 30 return false; 31 32 /* We can conceptually break down the process of rewriting a sysmem 33 * renderpass into a gmem one into two parts: 34 * 35 * 1. Split each draw and multisample resolve into N copies, one for each 36 * bin. (If hardware binning, add one more copy where the FS is disabled 37 * for the binning pass). This is always allowed because the vertex stage 38 * is allowed to run an arbitrary number of times and there are no extra 39 * ordering constraints within a draw. 40 * 2. Take the last copy of the second-to-last draw and slide it down to 41 * before the last copy of the last draw. Repeat for each earlier draw 42 * until the draw pass for the last bin is complete, then repeat for each 43 * earlier bin until we finish with the first bin. 44 * 45 * During this rearranging process, we can't slide draws past each other in 46 * a way that breaks the subpass dependencies. For each draw, we must slide 47 * it past (copies of) the rest of the draws in the renderpass. We can 48 * slide a draw past another if there isn't a dependency between them, or 49 * if the dependenc(ies) are dependencies between framebuffer-space stages 50 * only with the BY_REGION bit set. Note that this includes 51 * self-dependencies, since these may result in pipeline barriers that also 52 * break the rearranging process. 53 */ 54 55 /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer 56 * Region Dependencies": 57 */ 58 const VkPipelineStageFlags2 framebuffer_space_stages = 59 VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | 60 VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | 61 VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | 62 VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; 63 64 return 65 (src_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) || 66 (dst_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)) || 67 !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT); 68} 69 70static void 71tu_render_pass_add_subpass_dep(struct tu_render_pass *pass, 72 const VkSubpassDependency2 *dep) 73{ 74 uint32_t src = dep->srcSubpass; 75 uint32_t dst = dep->dstSubpass; 76 77 /* Ignore subpass self-dependencies as they allow the app to call 78 * vkCmdPipelineBarrier() inside the render pass and the driver should only 79 * do the barrier when called, not when starting the render pass. 80 * 81 * We cannot decide whether to allow gmem rendering before a barrier 82 * is actually emitted, so we delay the decision until then. 83 */ 84 if (src == dst) 85 return; 86 87 /* From the Vulkan 1.2.195 spec: 88 * 89 * "If an instance of VkMemoryBarrier2 is included in the pNext chain, srcStageMask, 90 * dstStageMask, srcAccessMask, and dstAccessMask parameters are ignored. The synchronization 91 * and access scopes instead are defined by the parameters of VkMemoryBarrier2." 92 */ 93 const VkMemoryBarrier2 *barrier = 94 vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2); 95 VkPipelineStageFlags2 src_stage_mask = barrier ? barrier->srcStageMask : dep->srcStageMask; 96 VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask; 97 VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask; 98 VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask; 99 100 if (dep_invalid_for_gmem(dep, src_stage_mask, dst_stage_mask)) { 101 perf_debug((struct tu_device *)pass->base.device, "Disabling gmem rendering due to invalid subpass dependency"); 102 for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++) 103 pass->gmem_pixels[i] = 0; 104 } 105 106 struct tu_subpass_barrier *dst_barrier; 107 if (dst == VK_SUBPASS_EXTERNAL) { 108 dst_barrier = &pass->end_barrier; 109 } else { 110 dst_barrier = &pass->subpasses[dst].start_barrier; 111 } 112 113 dst_barrier->src_stage_mask |= src_stage_mask; 114 dst_barrier->dst_stage_mask |= dst_stage_mask; 115 dst_barrier->src_access_mask |= src_access_mask; 116 dst_barrier->dst_access_mask |= dst_access_mask; 117} 118 119/* We currently only care about undefined layouts, because we have to 120 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as 121 * UNDEFINED for anything not linear tiled, but we don't know yet whether the 122 * images used are tiled, so just assume they are. 123 */ 124 125static bool 126layout_undefined(VkImageLayout layout) 127{ 128 return layout == VK_IMAGE_LAYOUT_UNDEFINED || 129 layout == VK_IMAGE_LAYOUT_PREINITIALIZED; 130} 131 132/* This implements the following bit of spec text: 133 * 134 * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the 135 * first subpass that uses an attachment, then an implicit subpass 136 * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is 137 * used in. The implicit subpass dependency only exists if there 138 * exists an automatic layout transition away from initialLayout. 139 * The subpass dependency operates as if defined with the 140 * following parameters: 141 * 142 * VkSubpassDependency implicitDependency = { 143 * .srcSubpass = VK_SUBPASS_EXTERNAL; 144 * .dstSubpass = firstSubpass; // First subpass attachment is used in 145 * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; 146 * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 147 * .srcAccessMask = 0; 148 * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 149 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 150 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 151 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 152 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 153 * .dependencyFlags = 0; 154 * }; 155 * 156 * Similarly, if there is no subpass dependency from the last subpass 157 * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit 158 * subpass dependency exists from the last subpass it is used in to 159 * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists 160 * if there exists an automatic layout transition into finalLayout. 161 * The subpass dependency operates as if defined with the following 162 * parameters: 163 * 164 * VkSubpassDependency implicitDependency = { 165 * .srcSubpass = lastSubpass; // Last subpass attachment is used in 166 * .dstSubpass = VK_SUBPASS_EXTERNAL; 167 * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 168 * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; 169 * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 170 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 171 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 172 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 173 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 174 * .dstAccessMask = 0; 175 * .dependencyFlags = 0; 176 * }; 177 * 178 * Note: currently this is the only use we have for layout transitions, 179 * besides needing to invalidate CCU at the beginning, so we also flag 180 * transitions from UNDEFINED here. 181 */ 182static void 183tu_render_pass_add_implicit_deps(struct tu_render_pass *pass, 184 const VkRenderPassCreateInfo2 *info) 185{ 186 const VkAttachmentDescription2* att = info->pAttachments; 187 bool has_external_src[info->subpassCount]; 188 bool has_external_dst[info->subpassCount]; 189 bool att_used[pass->attachment_count]; 190 191 memset(has_external_src, 0, sizeof(has_external_src)); 192 memset(has_external_dst, 0, sizeof(has_external_dst)); 193 194 for (uint32_t i = 0; i < info->dependencyCount; i++) { 195 uint32_t src = info->pDependencies[i].srcSubpass; 196 uint32_t dst = info->pDependencies[i].dstSubpass; 197 198 if (src == dst) 199 continue; 200 201 if (src == VK_SUBPASS_EXTERNAL) 202 has_external_src[dst] = true; 203 if (dst == VK_SUBPASS_EXTERNAL) 204 has_external_dst[src] = true; 205 } 206 207 memset(att_used, 0, sizeof(att_used)); 208 209 for (unsigned i = 0; i < info->subpassCount; i++) { 210 const VkSubpassDescription2 *subpass = &info->pSubpasses[i]; 211 bool src_implicit_dep = false; 212 213 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) { 214 uint32_t a = subpass->pInputAttachments[j].attachment; 215 216 if (a == VK_ATTACHMENT_UNUSED) 217 continue; 218 219 uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ? 220 vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) : 221 VK_IMAGE_LAYOUT_UNDEFINED; 222 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false); 223 224 if ((att[a].initialLayout != subpass->pInputAttachments[j].layout || 225 stencil_initial_layout != stencil_layout) && 226 !att_used[a] && !has_external_src[i]) 227 src_implicit_dep = true; 228 att_used[a] = true; 229 } 230 231 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 232 uint32_t a = subpass->pColorAttachments[j].attachment; 233 if (a == VK_ATTACHMENT_UNUSED) 234 continue; 235 if (att[a].initialLayout != subpass->pColorAttachments[j].layout && 236 !att_used[a] && !has_external_src[i]) 237 src_implicit_dep = true; 238 att_used[a] = true; 239 } 240 241 if (subpass->pDepthStencilAttachment && 242 subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { 243 uint32_t a = subpass->pDepthStencilAttachment->attachment; 244 uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att); 245 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false); 246 247 if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout || 248 stencil_initial_layout != stencil_layout) && 249 !att_used[a] && !has_external_src[i]) { 250 src_implicit_dep = true; 251 } 252 att_used[a] = true; 253 } 254 255 if (subpass->pResolveAttachments) { 256 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 257 uint32_t a = subpass->pResolveAttachments[j].attachment; 258 if (a == VK_ATTACHMENT_UNUSED) 259 continue; 260 if (att[a].initialLayout != subpass->pResolveAttachments[j].layout && 261 !att_used[a] && !has_external_src[i]) 262 src_implicit_dep = true; 263 att_used[a] = true; 264 } 265 } 266 267 const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 268 vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 269 270 if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment && 271 ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 272 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 273 uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att); 274 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false); 275 276 if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout || 277 stencil_initial_layout != stencil_layout) && 278 !att_used[a] && !has_external_src[i]) 279 src_implicit_dep = true; 280 att_used[a] = true; 281 } 282 283 if (src_implicit_dep) { 284 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) { 285 .srcSubpass = VK_SUBPASS_EXTERNAL, 286 .dstSubpass = i, 287 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 288 .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 289 .srcAccessMask = 0, 290 .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 291 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 292 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 293 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 294 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 295 .dependencyFlags = 0, 296 }); 297 } 298 } 299 300 memset(att_used, 0, sizeof(att_used)); 301 302 for (int i = info->subpassCount - 1; i >= 0; i--) { 303 const VkSubpassDescription2 *subpass = &info->pSubpasses[i]; 304 bool dst_implicit_dep = false; 305 306 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) { 307 uint32_t a = subpass->pInputAttachments[j].attachment; 308 if (a == VK_ATTACHMENT_UNUSED) 309 continue; 310 311 uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ? 312 vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) : 313 VK_IMAGE_LAYOUT_UNDEFINED; 314 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true); 315 316 if ((att[a].finalLayout != subpass->pInputAttachments[j].layout || 317 stencil_final_layout != stencil_layout) && 318 !att_used[a] && !has_external_dst[i]) 319 dst_implicit_dep = true; 320 att_used[a] = true; 321 } 322 323 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 324 uint32_t a = subpass->pColorAttachments[j].attachment; 325 if (a == VK_ATTACHMENT_UNUSED) 326 continue; 327 if (att[a].finalLayout != subpass->pColorAttachments[j].layout && 328 !att_used[a] && !has_external_dst[i]) 329 dst_implicit_dep = true; 330 att_used[a] = true; 331 } 332 333 if (subpass->pDepthStencilAttachment && 334 subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { 335 uint32_t a = subpass->pDepthStencilAttachment->attachment; 336 uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att); 337 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true); 338 339 if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout || 340 stencil_final_layout != stencil_layout) && 341 !att_used[a] && !has_external_dst[i]) { 342 dst_implicit_dep = true; 343 } 344 att_used[a] = true; 345 } 346 347 if (subpass->pResolveAttachments) { 348 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 349 uint32_t a = subpass->pResolveAttachments[j].attachment; 350 if (a == VK_ATTACHMENT_UNUSED) 351 continue; 352 if (att[a].finalLayout != subpass->pResolveAttachments[j].layout && 353 !att_used[a] && !has_external_dst[i]) 354 dst_implicit_dep = true; 355 att_used[a] = true; 356 } 357 } 358 359 const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 360 vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 361 362 if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment && 363 ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 364 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 365 uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att); 366 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true); 367 368 if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout || 369 stencil_final_layout != stencil_layout) && 370 !att_used[a] && !has_external_src[i]) 371 dst_implicit_dep = true; 372 att_used[a] = true; 373 } 374 375 if (dst_implicit_dep) { 376 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) { 377 .srcSubpass = i, 378 .dstSubpass = VK_SUBPASS_EXTERNAL, 379 .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 380 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 381 .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 382 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 383 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 384 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 385 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 386 .dstAccessMask = 0, 387 .dependencyFlags = 0, 388 }); 389 } 390 } 391 392 /* Handle UNDEFINED transitions, similar to the handling in tu_barrier(). 393 * Assume that if an attachment has an initial layout of UNDEFINED, it gets 394 * transitioned eventually. 395 */ 396 for (unsigned i = 0; i < info->attachmentCount; i++) { 397 if (layout_undefined(att[i].initialLayout)) { 398 if (vk_format_is_depth_or_stencil(att[i].format)) { 399 pass->subpasses[0].start_barrier.incoherent_ccu_depth = true; 400 } else { 401 pass->subpasses[0].start_barrier.incoherent_ccu_color = true; 402 } 403 } 404 } 405} 406 407/* If an input attachment is used without an intervening write to the same 408 * attachment, then we can just use the original image, even in GMEM mode. 409 * This is an optimization, but it's also important because it allows us to 410 * avoid having to invalidate UCHE at the beginning of each tile due to it 411 * becoming invalid. The only reads of GMEM via UCHE should be after an 412 * earlier subpass modified it, which only works if there's already an 413 * appropriate dependency that will add the CACHE_INVALIDATE anyway. We 414 * don't consider this in the dependency code, so this is also required for 415 * correctness. 416 */ 417static void 418tu_render_pass_patch_input_gmem(struct tu_render_pass *pass) 419{ 420 bool written[pass->attachment_count]; 421 422 memset(written, 0, sizeof(written)); 423 424 for (unsigned i = 0; i < pass->subpass_count; i++) { 425 struct tu_subpass *subpass = &pass->subpasses[i]; 426 427 for (unsigned j = 0; j < subpass->input_count; j++) { 428 uint32_t a = subpass->input_attachments[j].attachment; 429 if (a == VK_ATTACHMENT_UNUSED) 430 continue; 431 subpass->input_attachments[j].patch_input_gmem = written[a]; 432 } 433 434 for (unsigned j = 0; j < subpass->color_count; j++) { 435 uint32_t a = subpass->color_attachments[j].attachment; 436 if (a == VK_ATTACHMENT_UNUSED) 437 continue; 438 written[a] = true; 439 440 for (unsigned k = 0; k < subpass->input_count; k++) { 441 if (subpass->input_attachments[k].attachment == a && 442 !subpass->input_attachments[k].patch_input_gmem) { 443 /* For render feedback loops, we have no idea whether the use 444 * as a color attachment or input attachment will come first, 445 * so we have to always use GMEM in case the color attachment 446 * comes first and defensively invalidate UCHE in case the 447 * input attachment comes first. 448 */ 449 subpass->feedback_invalidate = true; 450 subpass->input_attachments[k].patch_input_gmem = true; 451 } 452 } 453 } 454 455 for (unsigned j = 0; j < subpass->resolve_count; j++) { 456 uint32_t a = subpass->resolve_attachments[j].attachment; 457 if (a == VK_ATTACHMENT_UNUSED) 458 continue; 459 written[a] = true; 460 } 461 462 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 463 written[subpass->depth_stencil_attachment.attachment] = true; 464 for (unsigned k = 0; k < subpass->input_count; k++) { 465 if (subpass->input_attachments[k].attachment == 466 subpass->depth_stencil_attachment.attachment && 467 !subpass->input_attachments[k].patch_input_gmem) { 468 subpass->feedback_invalidate = true; 469 subpass->input_attachments[k].patch_input_gmem = true; 470 } 471 } 472 } 473 } 474} 475 476static void 477tu_render_pass_check_feedback_loop(struct tu_render_pass *pass) 478{ 479 for (unsigned i = 0; i < pass->subpass_count; i++) { 480 struct tu_subpass *subpass = &pass->subpasses[i]; 481 482 for (unsigned j = 0; j < subpass->color_count; j++) { 483 uint32_t a = subpass->color_attachments[j].attachment; 484 if (a == VK_ATTACHMENT_UNUSED) 485 continue; 486 for (unsigned k = 0; k < subpass->input_count; k++) { 487 if (subpass->input_attachments[k].attachment == a) { 488 subpass->feedback_loop_color = true; 489 break; 490 } 491 } 492 } 493 494 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 495 for (unsigned k = 0; k < subpass->input_count; k++) { 496 if (subpass->input_attachments[k].attachment == 497 subpass->depth_stencil_attachment.attachment) { 498 subpass->feedback_loop_ds = true; 499 break; 500 } 501 } 502 } 503 } 504} 505 506static void update_samples(struct tu_subpass *subpass, 507 VkSampleCountFlagBits samples) 508{ 509 assert(subpass->samples == 0 || subpass->samples == samples); 510 subpass->samples = samples; 511} 512 513static void 514tu_render_pass_cond_config(struct tu_render_pass *pass) 515{ 516 for (uint32_t i = 0; i < pass->attachment_count; i++) { 517 struct tu_render_pass_attachment *att = &pass->attachments[i]; 518 519 att->cond_load_allowed = 520 (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved; 521 att->cond_store_allowed = 522 (att->store || att->store_stencil) && !att->clear_mask; 523 } 524} 525 526static void 527tu_render_pass_gmem_config(struct tu_render_pass *pass, 528 const struct tu_physical_device *phys_dev) 529{ 530 for (enum tu_gmem_layout layout = 0; layout < TU_GMEM_LAYOUT_COUNT; 531 layout++) { 532 /* From the VK_KHR_multiview spec: 533 * 534 * Multiview is all-or-nothing for a render pass - that is, either all 535 * subpasses must have a non-zero view mask (though some subpasses may 536 * have only one view) or all must be zero. 537 * 538 * This means we only have to check one of the view masks. 539 */ 540 if (pass->subpasses[0].multiview_mask) { 541 /* It seems multiview must use sysmem rendering. */ 542 pass->gmem_pixels[layout] = 0; 543 continue; 544 } 545 546 /* log2(gmem_align/(tile_align_w*tile_align_h)) */ 547 uint32_t block_align_shift = 3; 548 uint32_t tile_align_w = phys_dev->info->tile_align_w; 549 uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * 550 phys_dev->info->tile_align_h; 551 552 /* calculate total bytes per pixel */ 553 uint32_t cpp_total = 0; 554 for (uint32_t i = 0; i < pass->attachment_count; i++) { 555 struct tu_render_pass_attachment *att = &pass->attachments[i]; 556 bool cpp1 = (att->cpp == 1); 557 if (att->gmem) { 558 cpp_total += att->cpp; 559 560 /* take into account the separate stencil: */ 561 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 562 cpp1 = (att->samples == 1); 563 cpp_total += att->samples; 564 } 565 566 /* texture pitch must be aligned to 64, use a tile_align_w that is 567 * a multiple of 64 for cpp==1 attachment to work as input 568 * attachment 569 */ 570 if (cpp1 && tile_align_w % 64 != 0) { 571 tile_align_w *= 2; 572 block_align_shift -= 1; 573 } 574 } 575 } 576 577 pass->tile_align_w = tile_align_w; 578 579 /* no gmem attachments */ 580 if (cpp_total == 0) { 581 /* any value non-zero value so tiling config works with no 582 * attachments 583 */ 584 pass->gmem_pixels[layout] = 1024 * 1024; 585 continue; 586 } 587 588 /* TODO: this algorithm isn't optimal 589 * for example, two attachments with cpp = {1, 4} 590 * result: nblocks = {12, 52}, pixels = 196608 591 * optimal: nblocks = {13, 51}, pixels = 208896 592 */ 593 uint32_t gmem_size = layout == TU_GMEM_LAYOUT_FULL 594 ? phys_dev->gmem_size 595 : phys_dev->ccu_offset_gmem; 596 uint32_t gmem_blocks = gmem_size / gmem_align; 597 uint32_t offset = 0, pixels = ~0u, i; 598 for (i = 0; i < pass->attachment_count; i++) { 599 struct tu_render_pass_attachment *att = &pass->attachments[i]; 600 if (!att->gmem) 601 continue; 602 603 att->gmem_offset[layout] = offset; 604 605 uint32_t align = MAX2(1, att->cpp >> block_align_shift); 606 uint32_t nblocks = 607 MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align); 608 609 if (nblocks > gmem_blocks) 610 break; 611 612 gmem_blocks -= nblocks; 613 cpp_total -= att->cpp; 614 offset += nblocks * gmem_align; 615 pixels = MIN2(pixels, nblocks * gmem_align / att->cpp); 616 617 /* repeat the same for separate stencil */ 618 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 619 att->gmem_offset_stencil[layout] = offset; 620 621 /* note: for s8_uint, block align is always 1 */ 622 uint32_t nblocks = gmem_blocks * att->samples / cpp_total; 623 if (nblocks > gmem_blocks) 624 break; 625 626 gmem_blocks -= nblocks; 627 cpp_total -= att->samples; 628 offset += nblocks * gmem_align; 629 pixels = MIN2(pixels, nblocks * gmem_align / att->samples); 630 } 631 } 632 633 /* if the loop didn't complete then the gmem config is impossible */ 634 if (i == pass->attachment_count) 635 pass->gmem_pixels[layout] = pixels; 636 } 637} 638 639static void 640tu_render_pass_bandwidth_config(struct tu_render_pass *pass) 641{ 642 for (uint32_t i = 0; i < pass->attachment_count; i++) { 643 const struct tu_render_pass_attachment *att = &pass->attachments[i]; 644 645 /* approximate tu_load_gmem_attachment */ 646 if (att->load) 647 pass->gmem_bandwidth_per_pixel += att->cpp; 648 649 /* approximate tu_store_gmem_attachment */ 650 if (att->store) 651 pass->gmem_bandwidth_per_pixel += att->cpp; 652 653 /* approximate tu_clear_sysmem_attachment */ 654 if (att->clear_mask) 655 pass->sysmem_bandwidth_per_pixel += att->cpp; 656 657 /* approximate tu6_emit_sysmem_resolves */ 658 if (att->will_be_resolved) { 659 pass->sysmem_bandwidth_per_pixel += 660 att->cpp + att->cpp / att->samples; 661 } 662 } 663} 664 665static void 666attachment_set_ops(struct tu_device *device, 667 struct tu_render_pass_attachment *att, 668 VkAttachmentLoadOp load_op, 669 VkAttachmentLoadOp stencil_load_op, 670 VkAttachmentStoreOp store_op, 671 VkAttachmentStoreOp stencil_store_op) 672{ 673 if (device->instance->debug_flags & TU_DEBUG_DONT_CARE_AS_LOAD) { 674 if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) 675 load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 676 if (stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) 677 stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 678 } 679 680 /* load/store ops */ 681 att->clear_mask = 682 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0; 683 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD); 684 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE); 685 686 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR); 687 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD); 688 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE); 689 690 switch (att->format) { 691 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */ 692 if (att->clear_mask) 693 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT; 694 if (stencil_clear) 695 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; 696 if (stencil_load) 697 att->load = true; 698 if (stencil_store) 699 att->store = true; 700 break; 701 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */ 702 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0; 703 att->load = stencil_load; 704 att->store = stencil_store; 705 break; 706 case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */ 707 if (att->clear_mask) 708 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT; 709 if (stencil_clear) 710 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; 711 if (stencil_load) 712 att->load_stencil = true; 713 if (stencil_store) 714 att->store_stencil = true; 715 break; 716 default: 717 break; 718 } 719} 720 721static bool 722is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve) 723{ 724 if (depth_stencil_resolve && 725 depth_stencil_resolve->pDepthStencilResolveAttachment && 726 depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 727 return true; 728 } 729 return false; 730} 731 732static void 733tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const VkRenderPassCreateInfo2 *pCreateInfo) 734{ 735 struct tu_subpass *subpass = &pass->subpasses[i]; 736 737 pass->attachments[a].gmem = true; 738 update_samples(subpass, pCreateInfo->pAttachments[a].samples); 739 pass->attachments[a].clear_views |= subpass->multiview_mask; 740} 741 742VKAPI_ATTR VkResult VKAPI_CALL 743tu_CreateRenderPass2(VkDevice _device, 744 const VkRenderPassCreateInfo2 *pCreateInfo, 745 const VkAllocationCallbacks *pAllocator, 746 VkRenderPass *pRenderPass) 747{ 748 TU_FROM_HANDLE(tu_device, device, _device); 749 750 if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) 751 return vk_common_CreateRenderPass2(_device, pCreateInfo, pAllocator, 752 pRenderPass); 753 754 struct tu_render_pass *pass; 755 size_t size; 756 size_t attachments_offset; 757 758 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2); 759 760 size = sizeof(*pass); 761 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); 762 attachments_offset = size; 763 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); 764 765 pass = vk_object_zalloc(&device->vk, pAllocator, size, 766 VK_OBJECT_TYPE_RENDER_PASS); 767 if (pass == NULL) 768 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 769 770 pass->attachment_count = pCreateInfo->attachmentCount; 771 pass->subpass_count = pCreateInfo->subpassCount; 772 pass->attachments = (void *) pass + attachments_offset; 773 774 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 775 struct tu_render_pass_attachment *att = &pass->attachments[i]; 776 777 att->format = pCreateInfo->pAttachments[i].format; 778 att->samples = pCreateInfo->pAttachments[i].samples; 779 /* for d32s8, cpp is for the depth image, and 780 * att->samples will be used as the cpp for the stencil image 781 */ 782 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) 783 att->cpp = 4 * att->samples; 784 else 785 att->cpp = vk_format_get_blocksize(att->format) * att->samples; 786 /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */ 787 att->gmem = false; 788 789 VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp; 790 VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp; 791 792 attachment_set_ops(device, att, loadOp, stencilLoadOp, 793 pCreateInfo->pAttachments[i].storeOp, 794 pCreateInfo->pAttachments[i].stencilStoreOp); 795 } 796 uint32_t subpass_attachment_count = 0; 797 struct tu_subpass_attachment *p; 798 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 799 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 800 const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 801 vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 802 803 subpass_attachment_count += 804 desc->inputAttachmentCount + desc->colorAttachmentCount + 805 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + 806 (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0); 807 } 808 809 if (subpass_attachment_count) { 810 pass->subpass_attachments = vk_alloc2( 811 &device->vk.alloc, pAllocator, 812 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8, 813 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 814 if (pass->subpass_attachments == NULL) { 815 vk_object_free(&device->vk, pAllocator, pass); 816 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 817 } 818 } else 819 pass->subpass_attachments = NULL; 820 821 p = pass->subpass_attachments; 822 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 823 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 824 const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 825 vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 826 struct tu_subpass *subpass = &pass->subpasses[i]; 827 828 subpass->input_count = desc->inputAttachmentCount; 829 subpass->color_count = desc->colorAttachmentCount; 830 subpass->resolve_count = 0; 831 subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve); 832 subpass->samples = 0; 833 subpass->srgb_cntl = 0; 834 835 const VkSubpassDescriptionFlagBits raster_order_access_bits = 836 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM | 837 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM | 838 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM; 839 840 subpass->raster_order_attachment_access = desc->flags & raster_order_access_bits; 841 842 subpass->multiview_mask = desc->viewMask; 843 844 if (desc->inputAttachmentCount > 0) { 845 subpass->input_attachments = p; 846 p += desc->inputAttachmentCount; 847 848 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { 849 uint32_t a = desc->pInputAttachments[j].attachment; 850 subpass->input_attachments[j].attachment = a; 851 /* Note: attachments only used as input attachments will be read 852 * directly instead of through gmem, so we don't mark input 853 * attachments as needing gmem. 854 */ 855 } 856 } 857 858 if (desc->colorAttachmentCount > 0) { 859 subpass->color_attachments = p; 860 p += desc->colorAttachmentCount; 861 862 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { 863 uint32_t a = desc->pColorAttachments[j].attachment; 864 subpass->color_attachments[j].attachment = a; 865 866 if (a != VK_ATTACHMENT_UNUSED) { 867 tu_subpass_use_attachment(pass, i, a, pCreateInfo); 868 869 if (vk_format_is_srgb(pass->attachments[a].format)) 870 subpass->srgb_cntl |= 1 << j; 871 } 872 } 873 } 874 875 subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL; 876 if (desc->pResolveAttachments) { 877 p += desc->colorAttachmentCount; 878 subpass->resolve_count += desc->colorAttachmentCount; 879 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { 880 subpass->resolve_attachments[j].attachment = 881 desc->pResolveAttachments[j].attachment; 882 883 uint32_t src_a = desc->pColorAttachments[j].attachment; 884 if (src_a != VK_ATTACHMENT_UNUSED) { 885 pass->attachments[src_a].will_be_resolved = 886 desc->pResolveAttachments[j].attachment != VK_ATTACHMENT_UNUSED; 887 } 888 } 889 } 890 891 if (subpass->resolve_depth_stencil) { 892 p++; 893 subpass->resolve_count++; 894 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 895 subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a; 896 897 uint32_t src_a = desc->pDepthStencilAttachment->attachment; 898 if (src_a != VK_ATTACHMENT_UNUSED) { 899 pass->attachments[src_a].will_be_resolved = a != VK_ATTACHMENT_UNUSED; 900 } 901 } 902 903 uint32_t a = desc->pDepthStencilAttachment ? 904 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED; 905 subpass->depth_stencil_attachment.attachment = a; 906 if (a != VK_ATTACHMENT_UNUSED) 907 tu_subpass_use_attachment(pass, i, a, pCreateInfo); 908 } 909 910 tu_render_pass_patch_input_gmem(pass); 911 912 tu_render_pass_check_feedback_loop(pass); 913 914 /* disable unused attachments */ 915 for (uint32_t i = 0; i < pass->attachment_count; i++) { 916 struct tu_render_pass_attachment *att = &pass->attachments[i]; 917 if (!att->gmem) { 918 att->clear_mask = 0; 919 att->load = false; 920 } 921 } 922 923 tu_render_pass_cond_config(pass); 924 tu_render_pass_gmem_config(pass, device->physical_device); 925 tu_render_pass_bandwidth_config(pass); 926 927 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { 928 tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]); 929 } 930 931 tu_render_pass_add_implicit_deps(pass, pCreateInfo); 932 933 *pRenderPass = tu_render_pass_to_handle(pass); 934 935 return VK_SUCCESS; 936} 937 938VKAPI_ATTR void VKAPI_CALL 939tu_DestroyRenderPass(VkDevice _device, 940 VkRenderPass _pass, 941 const VkAllocationCallbacks *pAllocator) 942{ 943 TU_FROM_HANDLE(tu_device, device, _device); 944 945 if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) { 946 vk_common_DestroyRenderPass(_device, _pass, pAllocator); 947 return; 948 } 949 950 TU_FROM_HANDLE(tu_render_pass, pass, _pass); 951 952 if (!_pass) 953 return; 954 955 vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments); 956 vk_object_free(&device->vk, pAllocator, pass); 957} 958 959static void 960tu_setup_dynamic_attachment(struct tu_render_pass_attachment *att, 961 struct tu_image_view *view) 962{ 963 att->format = view->vk.format; 964 att->samples = view->image->layout->nr_samples; 965 966 /* for d32s8, cpp is for the depth image, and 967 * att->samples will be used as the cpp for the stencil image 968 */ 969 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) 970 att->cpp = 4 * att->samples; 971 else 972 att->cpp = vk_format_get_blocksize(att->format) * att->samples; 973} 974 975void 976tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, 977 const VkRenderingInfo *info) 978{ 979 struct tu_device *device = cmd_buffer->device; 980 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass; 981 struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass; 982 983 pass->subpass_count = 1; 984 pass->attachments = cmd_buffer->dynamic_rp_attachments; 985 986 subpass->color_count = subpass->resolve_count = info->colorAttachmentCount; 987 subpass->resolve_depth_stencil = false; 988 subpass->color_attachments = cmd_buffer->dynamic_color_attachments; 989 subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments; 990 subpass->feedback_invalidate = false; 991 subpass->feedback_loop_ds = subpass->feedback_loop_color = false; 992 subpass->input_count = 0; 993 subpass->samples = 0; 994 subpass->srgb_cntl = 0; 995 subpass->raster_order_attachment_access = false; 996 subpass->multiview_mask = info->viewMask; 997 998 uint32_t a = 0; 999 for (uint32_t i = 0; i < info->colorAttachmentCount; i++) { 1000 struct tu_render_pass_attachment *att = &pass->attachments[a]; 1001 const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i]; 1002 1003 if (att_info->imageView == VK_NULL_HANDLE) { 1004 subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED; 1005 subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; 1006 continue; 1007 } 1008 1009 TU_FROM_HANDLE(tu_image_view, view, att_info->imageView); 1010 tu_setup_dynamic_attachment(att, view); 1011 att->gmem = true; 1012 att->clear_views = info->viewMask; 1013 attachment_set_ops(device, att, att_info->loadOp, 0, 1014 att_info->storeOp, 0); 1015 subpass->color_attachments[i].attachment = a++; 1016 1017 subpass->samples = view->image->layout->nr_samples; 1018 1019 if (vk_format_is_srgb(view->vk.format)) 1020 subpass->srgb_cntl |= 1 << i; 1021 1022 if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) { 1023 struct tu_render_pass_attachment *resolve_att = &pass->attachments[a]; 1024 TU_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView); 1025 tu_setup_dynamic_attachment(resolve_att, resolve_view); 1026 resolve_att->gmem = false; 1027 attachment_set_ops(device, resolve_att, 1028 VK_ATTACHMENT_LOAD_OP_DONT_CARE, 0, 1029 VK_ATTACHMENT_STORE_OP_STORE, 0); 1030 subpass->resolve_attachments[i].attachment = a++; 1031 att->will_be_resolved = true; 1032 } else { 1033 subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; 1034 att->will_be_resolved = false; 1035 } 1036 } 1037 1038 if (info->pDepthAttachment || info->pStencilAttachment) { 1039 const struct VkRenderingAttachmentInfo *common_info = 1040 (info->pDepthAttachment && 1041 info->pDepthAttachment->imageView != VK_NULL_HANDLE) ? 1042 info->pDepthAttachment : 1043 info->pStencilAttachment; 1044 1045 if (common_info && common_info->imageView != VK_NULL_HANDLE) { 1046 TU_FROM_HANDLE(tu_image_view, view, common_info->imageView); 1047 1048 struct tu_render_pass_attachment *att = &pass->attachments[a]; 1049 tu_setup_dynamic_attachment(att, view); 1050 att->gmem = true; 1051 att->clear_views = info->viewMask; 1052 subpass->depth_stencil_attachment.attachment = a++; 1053 1054 attachment_set_ops(device, att, 1055 info->pDepthAttachment ? info->pDepthAttachment->loadOp : 0, 1056 info->pStencilAttachment ? info->pStencilAttachment->loadOp : 0, 1057 info->pDepthAttachment ? info->pDepthAttachment->storeOp : 0, 1058 info->pStencilAttachment ? info->pStencilAttachment->storeOp : 0); 1059 1060 subpass->samples = view->image->layout->nr_samples; 1061 1062 if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) { 1063 unsigned i = subpass->resolve_count++; 1064 struct tu_render_pass_attachment *resolve_att = &pass->attachments[a]; 1065 TU_FROM_HANDLE(tu_image_view, resolve_view, 1066 common_info->resolveImageView); 1067 tu_setup_dynamic_attachment(resolve_att, resolve_view); 1068 resolve_att->gmem = false; 1069 attachment_set_ops(device, resolve_att, 1070 VK_ATTACHMENT_LOAD_OP_DONT_CARE, 1071 VK_ATTACHMENT_LOAD_OP_DONT_CARE, 1072 VK_ATTACHMENT_STORE_OP_STORE, 1073 VK_ATTACHMENT_STORE_OP_STORE); 1074 subpass->resolve_attachments[i].attachment = a++; 1075 att->will_be_resolved = true; 1076 subpass->resolve_depth_stencil = true; 1077 } else { 1078 att->will_be_resolved = false; 1079 } 1080 } else { 1081 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; 1082 } 1083 } else { 1084 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; 1085 } 1086 1087 pass->attachment_count = a; 1088 1089 tu_render_pass_cond_config(pass); 1090 tu_render_pass_gmem_config(pass, device->physical_device); 1091 tu_render_pass_bandwidth_config(pass); 1092} 1093 1094void 1095tu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer, 1096 const VkCommandBufferInheritanceRenderingInfo *info) 1097{ 1098 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass; 1099 struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass; 1100 1101 pass->subpass_count = 1; 1102 pass->attachments = cmd_buffer->dynamic_rp_attachments; 1103 1104 subpass->color_count = info->colorAttachmentCount; 1105 subpass->resolve_count = 0; 1106 subpass->resolve_depth_stencil = false; 1107 subpass->color_attachments = cmd_buffer->dynamic_color_attachments; 1108 subpass->resolve_attachments = NULL; 1109 subpass->feedback_invalidate = false; 1110 subpass->feedback_loop_ds = subpass->feedback_loop_color = false; 1111 subpass->input_count = 0; 1112 subpass->samples = 0; 1113 subpass->srgb_cntl = 0; 1114 subpass->raster_order_attachment_access = false; 1115 subpass->multiview_mask = info->viewMask; 1116 subpass->samples = info->rasterizationSamples; 1117 1118 unsigned a = 0; 1119 for (unsigned i = 0; i < info->colorAttachmentCount; i++) { 1120 struct tu_render_pass_attachment *att = &pass->attachments[a]; 1121 VkFormat format = info->pColorAttachmentFormats[i]; 1122 1123 if (format == VK_FORMAT_UNDEFINED) { 1124 subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED; 1125 continue; 1126 } 1127 1128 att->format = format; 1129 att->samples = info->rasterizationSamples; 1130 subpass->samples = info->rasterizationSamples; 1131 subpass->color_attachments[i].attachment = a++; 1132 1133 /* conservatively assume that the attachment may be conditionally 1134 * loaded/stored. 1135 */ 1136 att->cond_load_allowed = att->cond_store_allowed = true; 1137 } 1138 1139 if (info->depthAttachmentFormat != VK_FORMAT_UNDEFINED || 1140 info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) { 1141 struct tu_render_pass_attachment *att = &pass->attachments[a]; 1142 att->format = info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ? 1143 info->depthAttachmentFormat : info->stencilAttachmentFormat; 1144 att->samples = info->rasterizationSamples; 1145 subpass->depth_stencil_attachment.attachment = a++; 1146 att->cond_load_allowed = att->cond_store_allowed = true; 1147 } else { 1148 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; 1149 } 1150} 1151 1152VKAPI_ATTR void VKAPI_CALL 1153tu_GetRenderAreaGranularity(VkDevice _device, 1154 VkRenderPass renderPass, 1155 VkExtent2D *pGranularity) 1156{ 1157 TU_FROM_HANDLE(tu_device, device, _device); 1158 pGranularity->width = device->physical_device->info->gmem_align_w; 1159 pGranularity->height = device->physical_device->info->gmem_align_h; 1160} 1161 1162uint32_t 1163tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index) 1164{ 1165 if (subpass->resolve_depth_stencil && 1166 index == (subpass->resolve_count - 1)) 1167 return subpass->depth_stencil_attachment.attachment; 1168 1169 return subpass->color_attachments[index].attachment; 1170} 1171