1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat. 3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen 4bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * based in part on anv driver which is: 7bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 8bf215546Sopenharmony_ci */ 9bf215546Sopenharmony_ci 10bf215546Sopenharmony_ci#include "tu_pass.h" 11bf215546Sopenharmony_ci 12bf215546Sopenharmony_ci#include "vk_util.h" 13bf215546Sopenharmony_ci 14bf215546Sopenharmony_ci#include "tu_cmd_buffer.h" 15bf215546Sopenharmony_ci#include "tu_device.h" 16bf215546Sopenharmony_ci#include "tu_image.h" 17bf215546Sopenharmony_ci 18bf215546Sopenharmony_ci/* Return true if we have to fallback to sysmem rendering because the 19bf215546Sopenharmony_ci * dependency can't be satisfied with tiled rendering. 20bf215546Sopenharmony_ci */ 21bf215546Sopenharmony_ci 22bf215546Sopenharmony_cistatic bool 23bf215546Sopenharmony_cidep_invalid_for_gmem(const VkSubpassDependency2 *dep, 24bf215546Sopenharmony_ci VkPipelineStageFlags2 src_stage_mask, 25bf215546Sopenharmony_ci VkPipelineStageFlags2 dst_stage_mask) 26bf215546Sopenharmony_ci{ 27bf215546Sopenharmony_ci /* External dependencies don't matter here. */ 28bf215546Sopenharmony_ci if (dep->srcSubpass == VK_SUBPASS_EXTERNAL || 29bf215546Sopenharmony_ci dep->dstSubpass == VK_SUBPASS_EXTERNAL) 30bf215546Sopenharmony_ci return false; 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci /* We can conceptually break down the process of rewriting a sysmem 33bf215546Sopenharmony_ci * renderpass into a gmem one into two parts: 34bf215546Sopenharmony_ci * 35bf215546Sopenharmony_ci * 1. Split each draw and multisample resolve into N copies, one for each 36bf215546Sopenharmony_ci * bin. (If hardware binning, add one more copy where the FS is disabled 37bf215546Sopenharmony_ci * for the binning pass). This is always allowed because the vertex stage 38bf215546Sopenharmony_ci * is allowed to run an arbitrary number of times and there are no extra 39bf215546Sopenharmony_ci * ordering constraints within a draw. 40bf215546Sopenharmony_ci * 2. Take the last copy of the second-to-last draw and slide it down to 41bf215546Sopenharmony_ci * before the last copy of the last draw. Repeat for each earlier draw 42bf215546Sopenharmony_ci * until the draw pass for the last bin is complete, then repeat for each 43bf215546Sopenharmony_ci * earlier bin until we finish with the first bin. 44bf215546Sopenharmony_ci * 45bf215546Sopenharmony_ci * During this rearranging process, we can't slide draws past each other in 46bf215546Sopenharmony_ci * a way that breaks the subpass dependencies. For each draw, we must slide 47bf215546Sopenharmony_ci * it past (copies of) the rest of the draws in the renderpass. We can 48bf215546Sopenharmony_ci * slide a draw past another if there isn't a dependency between them, or 49bf215546Sopenharmony_ci * if the dependenc(ies) are dependencies between framebuffer-space stages 50bf215546Sopenharmony_ci * only with the BY_REGION bit set. Note that this includes 51bf215546Sopenharmony_ci * self-dependencies, since these may result in pipeline barriers that also 52bf215546Sopenharmony_ci * break the rearranging process. 53bf215546Sopenharmony_ci */ 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer 56bf215546Sopenharmony_ci * Region Dependencies": 57bf215546Sopenharmony_ci */ 58bf215546Sopenharmony_ci const VkPipelineStageFlags2 framebuffer_space_stages = 59bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | 60bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | 61bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | 62bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci return 65bf215546Sopenharmony_ci (src_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) || 66bf215546Sopenharmony_ci (dst_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)) || 67bf215546Sopenharmony_ci !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT); 68bf215546Sopenharmony_ci} 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_cistatic void 71bf215546Sopenharmony_citu_render_pass_add_subpass_dep(struct tu_render_pass *pass, 72bf215546Sopenharmony_ci const VkSubpassDependency2 *dep) 73bf215546Sopenharmony_ci{ 74bf215546Sopenharmony_ci uint32_t src = dep->srcSubpass; 75bf215546Sopenharmony_ci uint32_t dst = dep->dstSubpass; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci /* Ignore subpass self-dependencies as they allow the app to call 78bf215546Sopenharmony_ci * vkCmdPipelineBarrier() inside the render pass and the driver should only 79bf215546Sopenharmony_ci * do the barrier when called, not when starting the render pass. 80bf215546Sopenharmony_ci * 81bf215546Sopenharmony_ci * We cannot decide whether to allow gmem rendering before a barrier 82bf215546Sopenharmony_ci * is actually emitted, so we delay the decision until then. 83bf215546Sopenharmony_ci */ 84bf215546Sopenharmony_ci if (src == dst) 85bf215546Sopenharmony_ci return; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci /* From the Vulkan 1.2.195 spec: 88bf215546Sopenharmony_ci * 89bf215546Sopenharmony_ci * "If an instance of VkMemoryBarrier2 is included in the pNext chain, srcStageMask, 90bf215546Sopenharmony_ci * dstStageMask, srcAccessMask, and dstAccessMask parameters are ignored. The synchronization 91bf215546Sopenharmony_ci * and access scopes instead are defined by the parameters of VkMemoryBarrier2." 92bf215546Sopenharmony_ci */ 93bf215546Sopenharmony_ci const VkMemoryBarrier2 *barrier = 94bf215546Sopenharmony_ci vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2); 95bf215546Sopenharmony_ci VkPipelineStageFlags2 src_stage_mask = barrier ? barrier->srcStageMask : dep->srcStageMask; 96bf215546Sopenharmony_ci VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask; 97bf215546Sopenharmony_ci VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask; 98bf215546Sopenharmony_ci VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask; 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci if (dep_invalid_for_gmem(dep, src_stage_mask, dst_stage_mask)) { 101bf215546Sopenharmony_ci perf_debug((struct tu_device *)pass->base.device, "Disabling gmem rendering due to invalid subpass dependency"); 102bf215546Sopenharmony_ci for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++) 103bf215546Sopenharmony_ci pass->gmem_pixels[i] = 0; 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci struct tu_subpass_barrier *dst_barrier; 107bf215546Sopenharmony_ci if (dst == VK_SUBPASS_EXTERNAL) { 108bf215546Sopenharmony_ci dst_barrier = &pass->end_barrier; 109bf215546Sopenharmony_ci } else { 110bf215546Sopenharmony_ci dst_barrier = &pass->subpasses[dst].start_barrier; 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci dst_barrier->src_stage_mask |= src_stage_mask; 114bf215546Sopenharmony_ci dst_barrier->dst_stage_mask |= dst_stage_mask; 115bf215546Sopenharmony_ci dst_barrier->src_access_mask |= src_access_mask; 116bf215546Sopenharmony_ci dst_barrier->dst_access_mask |= dst_access_mask; 117bf215546Sopenharmony_ci} 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci/* We currently only care about undefined layouts, because we have to 120bf215546Sopenharmony_ci * flush/invalidate CCU for those. PREINITIALIZED is the same thing as 121bf215546Sopenharmony_ci * UNDEFINED for anything not linear tiled, but we don't know yet whether the 122bf215546Sopenharmony_ci * images used are tiled, so just assume they are. 123bf215546Sopenharmony_ci */ 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_cistatic bool 126bf215546Sopenharmony_cilayout_undefined(VkImageLayout layout) 127bf215546Sopenharmony_ci{ 128bf215546Sopenharmony_ci return layout == VK_IMAGE_LAYOUT_UNDEFINED || 129bf215546Sopenharmony_ci layout == VK_IMAGE_LAYOUT_PREINITIALIZED; 130bf215546Sopenharmony_ci} 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci/* This implements the following bit of spec text: 133bf215546Sopenharmony_ci * 134bf215546Sopenharmony_ci * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the 135bf215546Sopenharmony_ci * first subpass that uses an attachment, then an implicit subpass 136bf215546Sopenharmony_ci * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is 137bf215546Sopenharmony_ci * used in. The implicit subpass dependency only exists if there 138bf215546Sopenharmony_ci * exists an automatic layout transition away from initialLayout. 139bf215546Sopenharmony_ci * The subpass dependency operates as if defined with the 140bf215546Sopenharmony_ci * following parameters: 141bf215546Sopenharmony_ci * 142bf215546Sopenharmony_ci * VkSubpassDependency implicitDependency = { 143bf215546Sopenharmony_ci * .srcSubpass = VK_SUBPASS_EXTERNAL; 144bf215546Sopenharmony_ci * .dstSubpass = firstSubpass; // First subpass attachment is used in 145bf215546Sopenharmony_ci * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; 146bf215546Sopenharmony_ci * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 147bf215546Sopenharmony_ci * .srcAccessMask = 0; 148bf215546Sopenharmony_ci * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 149bf215546Sopenharmony_ci * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 150bf215546Sopenharmony_ci * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 151bf215546Sopenharmony_ci * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 152bf215546Sopenharmony_ci * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 153bf215546Sopenharmony_ci * .dependencyFlags = 0; 154bf215546Sopenharmony_ci * }; 155bf215546Sopenharmony_ci * 156bf215546Sopenharmony_ci * Similarly, if there is no subpass dependency from the last subpass 157bf215546Sopenharmony_ci * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit 158bf215546Sopenharmony_ci * subpass dependency exists from the last subpass it is used in to 159bf215546Sopenharmony_ci * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists 160bf215546Sopenharmony_ci * if there exists an automatic layout transition into finalLayout. 161bf215546Sopenharmony_ci * The subpass dependency operates as if defined with the following 162bf215546Sopenharmony_ci * parameters: 163bf215546Sopenharmony_ci * 164bf215546Sopenharmony_ci * VkSubpassDependency implicitDependency = { 165bf215546Sopenharmony_ci * .srcSubpass = lastSubpass; // Last subpass attachment is used in 166bf215546Sopenharmony_ci * .dstSubpass = VK_SUBPASS_EXTERNAL; 167bf215546Sopenharmony_ci * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 168bf215546Sopenharmony_ci * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; 169bf215546Sopenharmony_ci * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 170bf215546Sopenharmony_ci * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 171bf215546Sopenharmony_ci * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 172bf215546Sopenharmony_ci * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 173bf215546Sopenharmony_ci * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; 174bf215546Sopenharmony_ci * .dstAccessMask = 0; 175bf215546Sopenharmony_ci * .dependencyFlags = 0; 176bf215546Sopenharmony_ci * }; 177bf215546Sopenharmony_ci * 178bf215546Sopenharmony_ci * Note: currently this is the only use we have for layout transitions, 179bf215546Sopenharmony_ci * besides needing to invalidate CCU at the beginning, so we also flag 180bf215546Sopenharmony_ci * transitions from UNDEFINED here. 181bf215546Sopenharmony_ci */ 182bf215546Sopenharmony_cistatic void 183bf215546Sopenharmony_citu_render_pass_add_implicit_deps(struct tu_render_pass *pass, 184bf215546Sopenharmony_ci const VkRenderPassCreateInfo2 *info) 185bf215546Sopenharmony_ci{ 186bf215546Sopenharmony_ci const VkAttachmentDescription2* att = info->pAttachments; 187bf215546Sopenharmony_ci bool has_external_src[info->subpassCount]; 188bf215546Sopenharmony_ci bool has_external_dst[info->subpassCount]; 189bf215546Sopenharmony_ci bool att_used[pass->attachment_count]; 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci memset(has_external_src, 0, sizeof(has_external_src)); 192bf215546Sopenharmony_ci memset(has_external_dst, 0, sizeof(has_external_dst)); 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->dependencyCount; i++) { 195bf215546Sopenharmony_ci uint32_t src = info->pDependencies[i].srcSubpass; 196bf215546Sopenharmony_ci uint32_t dst = info->pDependencies[i].dstSubpass; 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci if (src == dst) 199bf215546Sopenharmony_ci continue; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci if (src == VK_SUBPASS_EXTERNAL) 202bf215546Sopenharmony_ci has_external_src[dst] = true; 203bf215546Sopenharmony_ci if (dst == VK_SUBPASS_EXTERNAL) 204bf215546Sopenharmony_ci has_external_dst[src] = true; 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci memset(att_used, 0, sizeof(att_used)); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci for (unsigned i = 0; i < info->subpassCount; i++) { 210bf215546Sopenharmony_ci const VkSubpassDescription2 *subpass = &info->pSubpasses[i]; 211bf215546Sopenharmony_ci bool src_implicit_dep = false; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) { 214bf215546Sopenharmony_ci uint32_t a = subpass->pInputAttachments[j].attachment; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 217bf215546Sopenharmony_ci continue; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ? 220bf215546Sopenharmony_ci vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) : 221bf215546Sopenharmony_ci VK_IMAGE_LAYOUT_UNDEFINED; 222bf215546Sopenharmony_ci uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false); 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci if ((att[a].initialLayout != subpass->pInputAttachments[j].layout || 225bf215546Sopenharmony_ci stencil_initial_layout != stencil_layout) && 226bf215546Sopenharmony_ci !att_used[a] && !has_external_src[i]) 227bf215546Sopenharmony_ci src_implicit_dep = true; 228bf215546Sopenharmony_ci att_used[a] = true; 229bf215546Sopenharmony_ci } 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 232bf215546Sopenharmony_ci uint32_t a = subpass->pColorAttachments[j].attachment; 233bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 234bf215546Sopenharmony_ci continue; 235bf215546Sopenharmony_ci if (att[a].initialLayout != subpass->pColorAttachments[j].layout && 236bf215546Sopenharmony_ci !att_used[a] && !has_external_src[i]) 237bf215546Sopenharmony_ci src_implicit_dep = true; 238bf215546Sopenharmony_ci att_used[a] = true; 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci if (subpass->pDepthStencilAttachment && 242bf215546Sopenharmony_ci subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { 243bf215546Sopenharmony_ci uint32_t a = subpass->pDepthStencilAttachment->attachment; 244bf215546Sopenharmony_ci uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att); 245bf215546Sopenharmony_ci uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false); 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout || 248bf215546Sopenharmony_ci stencil_initial_layout != stencil_layout) && 249bf215546Sopenharmony_ci !att_used[a] && !has_external_src[i]) { 250bf215546Sopenharmony_ci src_implicit_dep = true; 251bf215546Sopenharmony_ci } 252bf215546Sopenharmony_ci att_used[a] = true; 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci if (subpass->pResolveAttachments) { 256bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 257bf215546Sopenharmony_ci uint32_t a = subpass->pResolveAttachments[j].attachment; 258bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 259bf215546Sopenharmony_ci continue; 260bf215546Sopenharmony_ci if (att[a].initialLayout != subpass->pResolveAttachments[j].layout && 261bf215546Sopenharmony_ci !att_used[a] && !has_external_src[i]) 262bf215546Sopenharmony_ci src_implicit_dep = true; 263bf215546Sopenharmony_ci att_used[a] = true; 264bf215546Sopenharmony_ci } 265bf215546Sopenharmony_ci } 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 268bf215546Sopenharmony_ci vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment && 271bf215546Sopenharmony_ci ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 272bf215546Sopenharmony_ci uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 273bf215546Sopenharmony_ci uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att); 274bf215546Sopenharmony_ci uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false); 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout || 277bf215546Sopenharmony_ci stencil_initial_layout != stencil_layout) && 278bf215546Sopenharmony_ci !att_used[a] && !has_external_src[i]) 279bf215546Sopenharmony_ci src_implicit_dep = true; 280bf215546Sopenharmony_ci att_used[a] = true; 281bf215546Sopenharmony_ci } 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci if (src_implicit_dep) { 284bf215546Sopenharmony_ci tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) { 285bf215546Sopenharmony_ci .srcSubpass = VK_SUBPASS_EXTERNAL, 286bf215546Sopenharmony_ci .dstSubpass = i, 287bf215546Sopenharmony_ci .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 288bf215546Sopenharmony_ci .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 289bf215546Sopenharmony_ci .srcAccessMask = 0, 290bf215546Sopenharmony_ci .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 291bf215546Sopenharmony_ci VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 292bf215546Sopenharmony_ci VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 293bf215546Sopenharmony_ci VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 294bf215546Sopenharmony_ci VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 295bf215546Sopenharmony_ci .dependencyFlags = 0, 296bf215546Sopenharmony_ci }); 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci memset(att_used, 0, sizeof(att_used)); 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci for (int i = info->subpassCount - 1; i >= 0; i--) { 303bf215546Sopenharmony_ci const VkSubpassDescription2 *subpass = &info->pSubpasses[i]; 304bf215546Sopenharmony_ci bool dst_implicit_dep = false; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) { 307bf215546Sopenharmony_ci uint32_t a = subpass->pInputAttachments[j].attachment; 308bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 309bf215546Sopenharmony_ci continue; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ? 312bf215546Sopenharmony_ci vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) : 313bf215546Sopenharmony_ci VK_IMAGE_LAYOUT_UNDEFINED; 314bf215546Sopenharmony_ci uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true); 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci if ((att[a].finalLayout != subpass->pInputAttachments[j].layout || 317bf215546Sopenharmony_ci stencil_final_layout != stencil_layout) && 318bf215546Sopenharmony_ci !att_used[a] && !has_external_dst[i]) 319bf215546Sopenharmony_ci dst_implicit_dep = true; 320bf215546Sopenharmony_ci att_used[a] = true; 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 324bf215546Sopenharmony_ci uint32_t a = subpass->pColorAttachments[j].attachment; 325bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 326bf215546Sopenharmony_ci continue; 327bf215546Sopenharmony_ci if (att[a].finalLayout != subpass->pColorAttachments[j].layout && 328bf215546Sopenharmony_ci !att_used[a] && !has_external_dst[i]) 329bf215546Sopenharmony_ci dst_implicit_dep = true; 330bf215546Sopenharmony_ci att_used[a] = true; 331bf215546Sopenharmony_ci } 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci if (subpass->pDepthStencilAttachment && 334bf215546Sopenharmony_ci subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { 335bf215546Sopenharmony_ci uint32_t a = subpass->pDepthStencilAttachment->attachment; 336bf215546Sopenharmony_ci uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att); 337bf215546Sopenharmony_ci uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true); 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout || 340bf215546Sopenharmony_ci stencil_final_layout != stencil_layout) && 341bf215546Sopenharmony_ci !att_used[a] && !has_external_dst[i]) { 342bf215546Sopenharmony_ci dst_implicit_dep = true; 343bf215546Sopenharmony_ci } 344bf215546Sopenharmony_ci att_used[a] = true; 345bf215546Sopenharmony_ci } 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci if (subpass->pResolveAttachments) { 348bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) { 349bf215546Sopenharmony_ci uint32_t a = subpass->pResolveAttachments[j].attachment; 350bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 351bf215546Sopenharmony_ci continue; 352bf215546Sopenharmony_ci if (att[a].finalLayout != subpass->pResolveAttachments[j].layout && 353bf215546Sopenharmony_ci !att_used[a] && !has_external_dst[i]) 354bf215546Sopenharmony_ci dst_implicit_dep = true; 355bf215546Sopenharmony_ci att_used[a] = true; 356bf215546Sopenharmony_ci } 357bf215546Sopenharmony_ci } 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 360bf215546Sopenharmony_ci vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment && 363bf215546Sopenharmony_ci ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 364bf215546Sopenharmony_ci uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 365bf215546Sopenharmony_ci uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att); 366bf215546Sopenharmony_ci uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true); 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout || 369bf215546Sopenharmony_ci stencil_final_layout != stencil_layout) && 370bf215546Sopenharmony_ci !att_used[a] && !has_external_src[i]) 371bf215546Sopenharmony_ci dst_implicit_dep = true; 372bf215546Sopenharmony_ci att_used[a] = true; 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci if (dst_implicit_dep) { 376bf215546Sopenharmony_ci tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) { 377bf215546Sopenharmony_ci .srcSubpass = i, 378bf215546Sopenharmony_ci .dstSubpass = VK_SUBPASS_EXTERNAL, 379bf215546Sopenharmony_ci .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 380bf215546Sopenharmony_ci .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 381bf215546Sopenharmony_ci .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | 382bf215546Sopenharmony_ci VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 383bf215546Sopenharmony_ci VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | 384bf215546Sopenharmony_ci VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | 385bf215546Sopenharmony_ci VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 386bf215546Sopenharmony_ci .dstAccessMask = 0, 387bf215546Sopenharmony_ci .dependencyFlags = 0, 388bf215546Sopenharmony_ci }); 389bf215546Sopenharmony_ci } 390bf215546Sopenharmony_ci } 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci /* Handle UNDEFINED transitions, similar to the handling in tu_barrier(). 393bf215546Sopenharmony_ci * Assume that if an attachment has an initial layout of UNDEFINED, it gets 394bf215546Sopenharmony_ci * transitioned eventually. 395bf215546Sopenharmony_ci */ 396bf215546Sopenharmony_ci for (unsigned i = 0; i < info->attachmentCount; i++) { 397bf215546Sopenharmony_ci if (layout_undefined(att[i].initialLayout)) { 398bf215546Sopenharmony_ci if (vk_format_is_depth_or_stencil(att[i].format)) { 399bf215546Sopenharmony_ci pass->subpasses[0].start_barrier.incoherent_ccu_depth = true; 400bf215546Sopenharmony_ci } else { 401bf215546Sopenharmony_ci pass->subpasses[0].start_barrier.incoherent_ccu_color = true; 402bf215546Sopenharmony_ci } 403bf215546Sopenharmony_ci } 404bf215546Sopenharmony_ci } 405bf215546Sopenharmony_ci} 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci/* If an input attachment is used without an intervening write to the same 408bf215546Sopenharmony_ci * attachment, then we can just use the original image, even in GMEM mode. 409bf215546Sopenharmony_ci * This is an optimization, but it's also important because it allows us to 410bf215546Sopenharmony_ci * avoid having to invalidate UCHE at the beginning of each tile due to it 411bf215546Sopenharmony_ci * becoming invalid. The only reads of GMEM via UCHE should be after an 412bf215546Sopenharmony_ci * earlier subpass modified it, which only works if there's already an 413bf215546Sopenharmony_ci * appropriate dependency that will add the CACHE_INVALIDATE anyway. We 414bf215546Sopenharmony_ci * don't consider this in the dependency code, so this is also required for 415bf215546Sopenharmony_ci * correctness. 416bf215546Sopenharmony_ci */ 417bf215546Sopenharmony_cistatic void 418bf215546Sopenharmony_citu_render_pass_patch_input_gmem(struct tu_render_pass *pass) 419bf215546Sopenharmony_ci{ 420bf215546Sopenharmony_ci bool written[pass->attachment_count]; 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci memset(written, 0, sizeof(written)); 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci for (unsigned i = 0; i < pass->subpass_count; i++) { 425bf215546Sopenharmony_ci struct tu_subpass *subpass = &pass->subpasses[i]; 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->input_count; j++) { 428bf215546Sopenharmony_ci uint32_t a = subpass->input_attachments[j].attachment; 429bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 430bf215546Sopenharmony_ci continue; 431bf215546Sopenharmony_ci subpass->input_attachments[j].patch_input_gmem = written[a]; 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->color_count; j++) { 435bf215546Sopenharmony_ci uint32_t a = subpass->color_attachments[j].attachment; 436bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 437bf215546Sopenharmony_ci continue; 438bf215546Sopenharmony_ci written[a] = true; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci for (unsigned k = 0; k < subpass->input_count; k++) { 441bf215546Sopenharmony_ci if (subpass->input_attachments[k].attachment == a && 442bf215546Sopenharmony_ci !subpass->input_attachments[k].patch_input_gmem) { 443bf215546Sopenharmony_ci /* For render feedback loops, we have no idea whether the use 444bf215546Sopenharmony_ci * as a color attachment or input attachment will come first, 445bf215546Sopenharmony_ci * so we have to always use GMEM in case the color attachment 446bf215546Sopenharmony_ci * comes first and defensively invalidate UCHE in case the 447bf215546Sopenharmony_ci * input attachment comes first. 448bf215546Sopenharmony_ci */ 449bf215546Sopenharmony_ci subpass->feedback_invalidate = true; 450bf215546Sopenharmony_ci subpass->input_attachments[k].patch_input_gmem = true; 451bf215546Sopenharmony_ci } 452bf215546Sopenharmony_ci } 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->resolve_count; j++) { 456bf215546Sopenharmony_ci uint32_t a = subpass->resolve_attachments[j].attachment; 457bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 458bf215546Sopenharmony_ci continue; 459bf215546Sopenharmony_ci written[a] = true; 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 463bf215546Sopenharmony_ci written[subpass->depth_stencil_attachment.attachment] = true; 464bf215546Sopenharmony_ci for (unsigned k = 0; k < subpass->input_count; k++) { 465bf215546Sopenharmony_ci if (subpass->input_attachments[k].attachment == 466bf215546Sopenharmony_ci subpass->depth_stencil_attachment.attachment && 467bf215546Sopenharmony_ci !subpass->input_attachments[k].patch_input_gmem) { 468bf215546Sopenharmony_ci subpass->feedback_invalidate = true; 469bf215546Sopenharmony_ci subpass->input_attachments[k].patch_input_gmem = true; 470bf215546Sopenharmony_ci } 471bf215546Sopenharmony_ci } 472bf215546Sopenharmony_ci } 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci} 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_cistatic void 477bf215546Sopenharmony_citu_render_pass_check_feedback_loop(struct tu_render_pass *pass) 478bf215546Sopenharmony_ci{ 479bf215546Sopenharmony_ci for (unsigned i = 0; i < pass->subpass_count; i++) { 480bf215546Sopenharmony_ci struct tu_subpass *subpass = &pass->subpasses[i]; 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci for (unsigned j = 0; j < subpass->color_count; j++) { 483bf215546Sopenharmony_ci uint32_t a = subpass->color_attachments[j].attachment; 484bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 485bf215546Sopenharmony_ci continue; 486bf215546Sopenharmony_ci for (unsigned k = 0; k < subpass->input_count; k++) { 487bf215546Sopenharmony_ci if (subpass->input_attachments[k].attachment == a) { 488bf215546Sopenharmony_ci subpass->feedback_loop_color = true; 489bf215546Sopenharmony_ci break; 490bf215546Sopenharmony_ci } 491bf215546Sopenharmony_ci } 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { 495bf215546Sopenharmony_ci for (unsigned k = 0; k < subpass->input_count; k++) { 496bf215546Sopenharmony_ci if (subpass->input_attachments[k].attachment == 497bf215546Sopenharmony_ci subpass->depth_stencil_attachment.attachment) { 498bf215546Sopenharmony_ci subpass->feedback_loop_ds = true; 499bf215546Sopenharmony_ci break; 500bf215546Sopenharmony_ci } 501bf215546Sopenharmony_ci } 502bf215546Sopenharmony_ci } 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci} 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_cistatic void update_samples(struct tu_subpass *subpass, 507bf215546Sopenharmony_ci VkSampleCountFlagBits samples) 508bf215546Sopenharmony_ci{ 509bf215546Sopenharmony_ci assert(subpass->samples == 0 || subpass->samples == samples); 510bf215546Sopenharmony_ci subpass->samples = samples; 511bf215546Sopenharmony_ci} 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_cistatic void 514bf215546Sopenharmony_citu_render_pass_cond_config(struct tu_render_pass *pass) 515bf215546Sopenharmony_ci{ 516bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; i++) { 517bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[i]; 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci att->cond_load_allowed = 520bf215546Sopenharmony_ci (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved; 521bf215546Sopenharmony_ci att->cond_store_allowed = 522bf215546Sopenharmony_ci (att->store || att->store_stencil) && !att->clear_mask; 523bf215546Sopenharmony_ci } 524bf215546Sopenharmony_ci} 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_cistatic void 527bf215546Sopenharmony_citu_render_pass_gmem_config(struct tu_render_pass *pass, 528bf215546Sopenharmony_ci const struct tu_physical_device *phys_dev) 529bf215546Sopenharmony_ci{ 530bf215546Sopenharmony_ci for (enum tu_gmem_layout layout = 0; layout < TU_GMEM_LAYOUT_COUNT; 531bf215546Sopenharmony_ci layout++) { 532bf215546Sopenharmony_ci /* From the VK_KHR_multiview spec: 533bf215546Sopenharmony_ci * 534bf215546Sopenharmony_ci * Multiview is all-or-nothing for a render pass - that is, either all 535bf215546Sopenharmony_ci * subpasses must have a non-zero view mask (though some subpasses may 536bf215546Sopenharmony_ci * have only one view) or all must be zero. 537bf215546Sopenharmony_ci * 538bf215546Sopenharmony_ci * This means we only have to check one of the view masks. 539bf215546Sopenharmony_ci */ 540bf215546Sopenharmony_ci if (pass->subpasses[0].multiview_mask) { 541bf215546Sopenharmony_ci /* It seems multiview must use sysmem rendering. */ 542bf215546Sopenharmony_ci pass->gmem_pixels[layout] = 0; 543bf215546Sopenharmony_ci continue; 544bf215546Sopenharmony_ci } 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci /* log2(gmem_align/(tile_align_w*tile_align_h)) */ 547bf215546Sopenharmony_ci uint32_t block_align_shift = 3; 548bf215546Sopenharmony_ci uint32_t tile_align_w = phys_dev->info->tile_align_w; 549bf215546Sopenharmony_ci uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * 550bf215546Sopenharmony_ci phys_dev->info->tile_align_h; 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci /* calculate total bytes per pixel */ 553bf215546Sopenharmony_ci uint32_t cpp_total = 0; 554bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; i++) { 555bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[i]; 556bf215546Sopenharmony_ci bool cpp1 = (att->cpp == 1); 557bf215546Sopenharmony_ci if (att->gmem) { 558bf215546Sopenharmony_ci cpp_total += att->cpp; 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci /* take into account the separate stencil: */ 561bf215546Sopenharmony_ci if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 562bf215546Sopenharmony_ci cpp1 = (att->samples == 1); 563bf215546Sopenharmony_ci cpp_total += att->samples; 564bf215546Sopenharmony_ci } 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci /* texture pitch must be aligned to 64, use a tile_align_w that is 567bf215546Sopenharmony_ci * a multiple of 64 for cpp==1 attachment to work as input 568bf215546Sopenharmony_ci * attachment 569bf215546Sopenharmony_ci */ 570bf215546Sopenharmony_ci if (cpp1 && tile_align_w % 64 != 0) { 571bf215546Sopenharmony_ci tile_align_w *= 2; 572bf215546Sopenharmony_ci block_align_shift -= 1; 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci } 575bf215546Sopenharmony_ci } 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci pass->tile_align_w = tile_align_w; 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_ci /* no gmem attachments */ 580bf215546Sopenharmony_ci if (cpp_total == 0) { 581bf215546Sopenharmony_ci /* any value non-zero value so tiling config works with no 582bf215546Sopenharmony_ci * attachments 583bf215546Sopenharmony_ci */ 584bf215546Sopenharmony_ci pass->gmem_pixels[layout] = 1024 * 1024; 585bf215546Sopenharmony_ci continue; 586bf215546Sopenharmony_ci } 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci /* TODO: this algorithm isn't optimal 589bf215546Sopenharmony_ci * for example, two attachments with cpp = {1, 4} 590bf215546Sopenharmony_ci * result: nblocks = {12, 52}, pixels = 196608 591bf215546Sopenharmony_ci * optimal: nblocks = {13, 51}, pixels = 208896 592bf215546Sopenharmony_ci */ 593bf215546Sopenharmony_ci uint32_t gmem_size = layout == TU_GMEM_LAYOUT_FULL 594bf215546Sopenharmony_ci ? phys_dev->gmem_size 595bf215546Sopenharmony_ci : phys_dev->ccu_offset_gmem; 596bf215546Sopenharmony_ci uint32_t gmem_blocks = gmem_size / gmem_align; 597bf215546Sopenharmony_ci uint32_t offset = 0, pixels = ~0u, i; 598bf215546Sopenharmony_ci for (i = 0; i < pass->attachment_count; i++) { 599bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[i]; 600bf215546Sopenharmony_ci if (!att->gmem) 601bf215546Sopenharmony_ci continue; 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci att->gmem_offset[layout] = offset; 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci uint32_t align = MAX2(1, att->cpp >> block_align_shift); 606bf215546Sopenharmony_ci uint32_t nblocks = 607bf215546Sopenharmony_ci MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align); 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci if (nblocks > gmem_blocks) 610bf215546Sopenharmony_ci break; 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci gmem_blocks -= nblocks; 613bf215546Sopenharmony_ci cpp_total -= att->cpp; 614bf215546Sopenharmony_ci offset += nblocks * gmem_align; 615bf215546Sopenharmony_ci pixels = MIN2(pixels, nblocks * gmem_align / att->cpp); 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci /* repeat the same for separate stencil */ 618bf215546Sopenharmony_ci if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 619bf215546Sopenharmony_ci att->gmem_offset_stencil[layout] = offset; 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci /* note: for s8_uint, block align is always 1 */ 622bf215546Sopenharmony_ci uint32_t nblocks = gmem_blocks * att->samples / cpp_total; 623bf215546Sopenharmony_ci if (nblocks > gmem_blocks) 624bf215546Sopenharmony_ci break; 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci gmem_blocks -= nblocks; 627bf215546Sopenharmony_ci cpp_total -= att->samples; 628bf215546Sopenharmony_ci offset += nblocks * gmem_align; 629bf215546Sopenharmony_ci pixels = MIN2(pixels, nblocks * gmem_align / att->samples); 630bf215546Sopenharmony_ci } 631bf215546Sopenharmony_ci } 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci /* if the loop didn't complete then the gmem config is impossible */ 634bf215546Sopenharmony_ci if (i == pass->attachment_count) 635bf215546Sopenharmony_ci pass->gmem_pixels[layout] = pixels; 636bf215546Sopenharmony_ci } 637bf215546Sopenharmony_ci} 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_cistatic void 640bf215546Sopenharmony_citu_render_pass_bandwidth_config(struct tu_render_pass *pass) 641bf215546Sopenharmony_ci{ 642bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; i++) { 643bf215546Sopenharmony_ci const struct tu_render_pass_attachment *att = &pass->attachments[i]; 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci /* approximate tu_load_gmem_attachment */ 646bf215546Sopenharmony_ci if (att->load) 647bf215546Sopenharmony_ci pass->gmem_bandwidth_per_pixel += att->cpp; 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci /* approximate tu_store_gmem_attachment */ 650bf215546Sopenharmony_ci if (att->store) 651bf215546Sopenharmony_ci pass->gmem_bandwidth_per_pixel += att->cpp; 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci /* approximate tu_clear_sysmem_attachment */ 654bf215546Sopenharmony_ci if (att->clear_mask) 655bf215546Sopenharmony_ci pass->sysmem_bandwidth_per_pixel += att->cpp; 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci /* approximate tu6_emit_sysmem_resolves */ 658bf215546Sopenharmony_ci if (att->will_be_resolved) { 659bf215546Sopenharmony_ci pass->sysmem_bandwidth_per_pixel += 660bf215546Sopenharmony_ci att->cpp + att->cpp / att->samples; 661bf215546Sopenharmony_ci } 662bf215546Sopenharmony_ci } 663bf215546Sopenharmony_ci} 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_cistatic void 666bf215546Sopenharmony_ciattachment_set_ops(struct tu_device *device, 667bf215546Sopenharmony_ci struct tu_render_pass_attachment *att, 668bf215546Sopenharmony_ci VkAttachmentLoadOp load_op, 669bf215546Sopenharmony_ci VkAttachmentLoadOp stencil_load_op, 670bf215546Sopenharmony_ci VkAttachmentStoreOp store_op, 671bf215546Sopenharmony_ci VkAttachmentStoreOp stencil_store_op) 672bf215546Sopenharmony_ci{ 673bf215546Sopenharmony_ci if (device->instance->debug_flags & TU_DEBUG_DONT_CARE_AS_LOAD) { 674bf215546Sopenharmony_ci if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) 675bf215546Sopenharmony_ci load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 676bf215546Sopenharmony_ci if (stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) 677bf215546Sopenharmony_ci stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 678bf215546Sopenharmony_ci } 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci /* load/store ops */ 681bf215546Sopenharmony_ci att->clear_mask = 682bf215546Sopenharmony_ci (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0; 683bf215546Sopenharmony_ci att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD); 684bf215546Sopenharmony_ci att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE); 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR); 687bf215546Sopenharmony_ci bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD); 688bf215546Sopenharmony_ci bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE); 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci switch (att->format) { 691bf215546Sopenharmony_ci case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */ 692bf215546Sopenharmony_ci if (att->clear_mask) 693bf215546Sopenharmony_ci att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT; 694bf215546Sopenharmony_ci if (stencil_clear) 695bf215546Sopenharmony_ci att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; 696bf215546Sopenharmony_ci if (stencil_load) 697bf215546Sopenharmony_ci att->load = true; 698bf215546Sopenharmony_ci if (stencil_store) 699bf215546Sopenharmony_ci att->store = true; 700bf215546Sopenharmony_ci break; 701bf215546Sopenharmony_ci case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */ 702bf215546Sopenharmony_ci att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0; 703bf215546Sopenharmony_ci att->load = stencil_load; 704bf215546Sopenharmony_ci att->store = stencil_store; 705bf215546Sopenharmony_ci break; 706bf215546Sopenharmony_ci case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */ 707bf215546Sopenharmony_ci if (att->clear_mask) 708bf215546Sopenharmony_ci att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT; 709bf215546Sopenharmony_ci if (stencil_clear) 710bf215546Sopenharmony_ci att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; 711bf215546Sopenharmony_ci if (stencil_load) 712bf215546Sopenharmony_ci att->load_stencil = true; 713bf215546Sopenharmony_ci if (stencil_store) 714bf215546Sopenharmony_ci att->store_stencil = true; 715bf215546Sopenharmony_ci break; 716bf215546Sopenharmony_ci default: 717bf215546Sopenharmony_ci break; 718bf215546Sopenharmony_ci } 719bf215546Sopenharmony_ci} 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_cistatic bool 722bf215546Sopenharmony_ciis_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve) 723bf215546Sopenharmony_ci{ 724bf215546Sopenharmony_ci if (depth_stencil_resolve && 725bf215546Sopenharmony_ci depth_stencil_resolve->pDepthStencilResolveAttachment && 726bf215546Sopenharmony_ci depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { 727bf215546Sopenharmony_ci return true; 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci return false; 730bf215546Sopenharmony_ci} 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_cistatic void 733bf215546Sopenharmony_citu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const VkRenderPassCreateInfo2 *pCreateInfo) 734bf215546Sopenharmony_ci{ 735bf215546Sopenharmony_ci struct tu_subpass *subpass = &pass->subpasses[i]; 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci pass->attachments[a].gmem = true; 738bf215546Sopenharmony_ci update_samples(subpass, pCreateInfo->pAttachments[a].samples); 739bf215546Sopenharmony_ci pass->attachments[a].clear_views |= subpass->multiview_mask; 740bf215546Sopenharmony_ci} 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 743bf215546Sopenharmony_citu_CreateRenderPass2(VkDevice _device, 744bf215546Sopenharmony_ci const VkRenderPassCreateInfo2 *pCreateInfo, 745bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 746bf215546Sopenharmony_ci VkRenderPass *pRenderPass) 747bf215546Sopenharmony_ci{ 748bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_device, device, _device); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) 751bf215546Sopenharmony_ci return vk_common_CreateRenderPass2(_device, pCreateInfo, pAllocator, 752bf215546Sopenharmony_ci pRenderPass); 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci struct tu_render_pass *pass; 755bf215546Sopenharmony_ci size_t size; 756bf215546Sopenharmony_ci size_t attachments_offset; 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2); 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci size = sizeof(*pass); 761bf215546Sopenharmony_ci size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); 762bf215546Sopenharmony_ci attachments_offset = size; 763bf215546Sopenharmony_ci size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci pass = vk_object_zalloc(&device->vk, pAllocator, size, 766bf215546Sopenharmony_ci VK_OBJECT_TYPE_RENDER_PASS); 767bf215546Sopenharmony_ci if (pass == NULL) 768bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci pass->attachment_count = pCreateInfo->attachmentCount; 771bf215546Sopenharmony_ci pass->subpass_count = pCreateInfo->subpassCount; 772bf215546Sopenharmony_ci pass->attachments = (void *) pass + attachments_offset; 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 775bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[i]; 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci att->format = pCreateInfo->pAttachments[i].format; 778bf215546Sopenharmony_ci att->samples = pCreateInfo->pAttachments[i].samples; 779bf215546Sopenharmony_ci /* for d32s8, cpp is for the depth image, and 780bf215546Sopenharmony_ci * att->samples will be used as the cpp for the stencil image 781bf215546Sopenharmony_ci */ 782bf215546Sopenharmony_ci if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) 783bf215546Sopenharmony_ci att->cpp = 4 * att->samples; 784bf215546Sopenharmony_ci else 785bf215546Sopenharmony_ci att->cpp = vk_format_get_blocksize(att->format) * att->samples; 786bf215546Sopenharmony_ci /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */ 787bf215546Sopenharmony_ci att->gmem = false; 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp; 790bf215546Sopenharmony_ci VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp; 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci attachment_set_ops(device, att, loadOp, stencilLoadOp, 793bf215546Sopenharmony_ci pCreateInfo->pAttachments[i].storeOp, 794bf215546Sopenharmony_ci pCreateInfo->pAttachments[i].stencilStoreOp); 795bf215546Sopenharmony_ci } 796bf215546Sopenharmony_ci uint32_t subpass_attachment_count = 0; 797bf215546Sopenharmony_ci struct tu_subpass_attachment *p; 798bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 799bf215546Sopenharmony_ci const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 800bf215546Sopenharmony_ci const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 801bf215546Sopenharmony_ci vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci subpass_attachment_count += 804bf215546Sopenharmony_ci desc->inputAttachmentCount + desc->colorAttachmentCount + 805bf215546Sopenharmony_ci (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + 806bf215546Sopenharmony_ci (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0); 807bf215546Sopenharmony_ci } 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci if (subpass_attachment_count) { 810bf215546Sopenharmony_ci pass->subpass_attachments = vk_alloc2( 811bf215546Sopenharmony_ci &device->vk.alloc, pAllocator, 812bf215546Sopenharmony_ci subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8, 813bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 814bf215546Sopenharmony_ci if (pass->subpass_attachments == NULL) { 815bf215546Sopenharmony_ci vk_object_free(&device->vk, pAllocator, pass); 816bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 817bf215546Sopenharmony_ci } 818bf215546Sopenharmony_ci } else 819bf215546Sopenharmony_ci pass->subpass_attachments = NULL; 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci p = pass->subpass_attachments; 822bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 823bf215546Sopenharmony_ci const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 824bf215546Sopenharmony_ci const VkSubpassDescriptionDepthStencilResolve *ds_resolve = 825bf215546Sopenharmony_ci vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 826bf215546Sopenharmony_ci struct tu_subpass *subpass = &pass->subpasses[i]; 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci subpass->input_count = desc->inputAttachmentCount; 829bf215546Sopenharmony_ci subpass->color_count = desc->colorAttachmentCount; 830bf215546Sopenharmony_ci subpass->resolve_count = 0; 831bf215546Sopenharmony_ci subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve); 832bf215546Sopenharmony_ci subpass->samples = 0; 833bf215546Sopenharmony_ci subpass->srgb_cntl = 0; 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci const VkSubpassDescriptionFlagBits raster_order_access_bits = 836bf215546Sopenharmony_ci VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM | 837bf215546Sopenharmony_ci VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM | 838bf215546Sopenharmony_ci VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM; 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci subpass->raster_order_attachment_access = desc->flags & raster_order_access_bits; 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci subpass->multiview_mask = desc->viewMask; 843bf215546Sopenharmony_ci 844bf215546Sopenharmony_ci if (desc->inputAttachmentCount > 0) { 845bf215546Sopenharmony_ci subpass->input_attachments = p; 846bf215546Sopenharmony_ci p += desc->inputAttachmentCount; 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { 849bf215546Sopenharmony_ci uint32_t a = desc->pInputAttachments[j].attachment; 850bf215546Sopenharmony_ci subpass->input_attachments[j].attachment = a; 851bf215546Sopenharmony_ci /* Note: attachments only used as input attachments will be read 852bf215546Sopenharmony_ci * directly instead of through gmem, so we don't mark input 853bf215546Sopenharmony_ci * attachments as needing gmem. 854bf215546Sopenharmony_ci */ 855bf215546Sopenharmony_ci } 856bf215546Sopenharmony_ci } 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci if (desc->colorAttachmentCount > 0) { 859bf215546Sopenharmony_ci subpass->color_attachments = p; 860bf215546Sopenharmony_ci p += desc->colorAttachmentCount; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { 863bf215546Sopenharmony_ci uint32_t a = desc->pColorAttachments[j].attachment; 864bf215546Sopenharmony_ci subpass->color_attachments[j].attachment = a; 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci if (a != VK_ATTACHMENT_UNUSED) { 867bf215546Sopenharmony_ci tu_subpass_use_attachment(pass, i, a, pCreateInfo); 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci if (vk_format_is_srgb(pass->attachments[a].format)) 870bf215546Sopenharmony_ci subpass->srgb_cntl |= 1 << j; 871bf215546Sopenharmony_ci } 872bf215546Sopenharmony_ci } 873bf215546Sopenharmony_ci } 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_ci subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL; 876bf215546Sopenharmony_ci if (desc->pResolveAttachments) { 877bf215546Sopenharmony_ci p += desc->colorAttachmentCount; 878bf215546Sopenharmony_ci subpass->resolve_count += desc->colorAttachmentCount; 879bf215546Sopenharmony_ci for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { 880bf215546Sopenharmony_ci subpass->resolve_attachments[j].attachment = 881bf215546Sopenharmony_ci desc->pResolveAttachments[j].attachment; 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci uint32_t src_a = desc->pColorAttachments[j].attachment; 884bf215546Sopenharmony_ci if (src_a != VK_ATTACHMENT_UNUSED) { 885bf215546Sopenharmony_ci pass->attachments[src_a].will_be_resolved = 886bf215546Sopenharmony_ci desc->pResolveAttachments[j].attachment != VK_ATTACHMENT_UNUSED; 887bf215546Sopenharmony_ci } 888bf215546Sopenharmony_ci } 889bf215546Sopenharmony_ci } 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci if (subpass->resolve_depth_stencil) { 892bf215546Sopenharmony_ci p++; 893bf215546Sopenharmony_ci subpass->resolve_count++; 894bf215546Sopenharmony_ci uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; 895bf215546Sopenharmony_ci subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a; 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci uint32_t src_a = desc->pDepthStencilAttachment->attachment; 898bf215546Sopenharmony_ci if (src_a != VK_ATTACHMENT_UNUSED) { 899bf215546Sopenharmony_ci pass->attachments[src_a].will_be_resolved = a != VK_ATTACHMENT_UNUSED; 900bf215546Sopenharmony_ci } 901bf215546Sopenharmony_ci } 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci uint32_t a = desc->pDepthStencilAttachment ? 904bf215546Sopenharmony_ci desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED; 905bf215546Sopenharmony_ci subpass->depth_stencil_attachment.attachment = a; 906bf215546Sopenharmony_ci if (a != VK_ATTACHMENT_UNUSED) 907bf215546Sopenharmony_ci tu_subpass_use_attachment(pass, i, a, pCreateInfo); 908bf215546Sopenharmony_ci } 909bf215546Sopenharmony_ci 910bf215546Sopenharmony_ci tu_render_pass_patch_input_gmem(pass); 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci tu_render_pass_check_feedback_loop(pass); 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci /* disable unused attachments */ 915bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; i++) { 916bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[i]; 917bf215546Sopenharmony_ci if (!att->gmem) { 918bf215546Sopenharmony_ci att->clear_mask = 0; 919bf215546Sopenharmony_ci att->load = false; 920bf215546Sopenharmony_ci } 921bf215546Sopenharmony_ci } 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci tu_render_pass_cond_config(pass); 924bf215546Sopenharmony_ci tu_render_pass_gmem_config(pass, device->physical_device); 925bf215546Sopenharmony_ci tu_render_pass_bandwidth_config(pass); 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { 928bf215546Sopenharmony_ci tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]); 929bf215546Sopenharmony_ci } 930bf215546Sopenharmony_ci 931bf215546Sopenharmony_ci tu_render_pass_add_implicit_deps(pass, pCreateInfo); 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci *pRenderPass = tu_render_pass_to_handle(pass); 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_ci return VK_SUCCESS; 936bf215546Sopenharmony_ci} 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 939bf215546Sopenharmony_citu_DestroyRenderPass(VkDevice _device, 940bf215546Sopenharmony_ci VkRenderPass _pass, 941bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 942bf215546Sopenharmony_ci{ 943bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_device, device, _device); 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) { 946bf215546Sopenharmony_ci vk_common_DestroyRenderPass(_device, _pass, pAllocator); 947bf215546Sopenharmony_ci return; 948bf215546Sopenharmony_ci } 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_render_pass, pass, _pass); 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci if (!_pass) 953bf215546Sopenharmony_ci return; 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments); 956bf215546Sopenharmony_ci vk_object_free(&device->vk, pAllocator, pass); 957bf215546Sopenharmony_ci} 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_cistatic void 960bf215546Sopenharmony_citu_setup_dynamic_attachment(struct tu_render_pass_attachment *att, 961bf215546Sopenharmony_ci struct tu_image_view *view) 962bf215546Sopenharmony_ci{ 963bf215546Sopenharmony_ci att->format = view->vk.format; 964bf215546Sopenharmony_ci att->samples = view->image->layout->nr_samples; 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci /* for d32s8, cpp is for the depth image, and 967bf215546Sopenharmony_ci * att->samples will be used as the cpp for the stencil image 968bf215546Sopenharmony_ci */ 969bf215546Sopenharmony_ci if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) 970bf215546Sopenharmony_ci att->cpp = 4 * att->samples; 971bf215546Sopenharmony_ci else 972bf215546Sopenharmony_ci att->cpp = vk_format_get_blocksize(att->format) * att->samples; 973bf215546Sopenharmony_ci} 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_civoid 976bf215546Sopenharmony_citu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, 977bf215546Sopenharmony_ci const VkRenderingInfo *info) 978bf215546Sopenharmony_ci{ 979bf215546Sopenharmony_ci struct tu_device *device = cmd_buffer->device; 980bf215546Sopenharmony_ci struct tu_render_pass *pass = &cmd_buffer->dynamic_pass; 981bf215546Sopenharmony_ci struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass; 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci pass->subpass_count = 1; 984bf215546Sopenharmony_ci pass->attachments = cmd_buffer->dynamic_rp_attachments; 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci subpass->color_count = subpass->resolve_count = info->colorAttachmentCount; 987bf215546Sopenharmony_ci subpass->resolve_depth_stencil = false; 988bf215546Sopenharmony_ci subpass->color_attachments = cmd_buffer->dynamic_color_attachments; 989bf215546Sopenharmony_ci subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments; 990bf215546Sopenharmony_ci subpass->feedback_invalidate = false; 991bf215546Sopenharmony_ci subpass->feedback_loop_ds = subpass->feedback_loop_color = false; 992bf215546Sopenharmony_ci subpass->input_count = 0; 993bf215546Sopenharmony_ci subpass->samples = 0; 994bf215546Sopenharmony_ci subpass->srgb_cntl = 0; 995bf215546Sopenharmony_ci subpass->raster_order_attachment_access = false; 996bf215546Sopenharmony_ci subpass->multiview_mask = info->viewMask; 997bf215546Sopenharmony_ci 998bf215546Sopenharmony_ci uint32_t a = 0; 999bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->colorAttachmentCount; i++) { 1000bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[a]; 1001bf215546Sopenharmony_ci const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i]; 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_ci if (att_info->imageView == VK_NULL_HANDLE) { 1004bf215546Sopenharmony_ci subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED; 1005bf215546Sopenharmony_ci subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; 1006bf215546Sopenharmony_ci continue; 1007bf215546Sopenharmony_ci } 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image_view, view, att_info->imageView); 1010bf215546Sopenharmony_ci tu_setup_dynamic_attachment(att, view); 1011bf215546Sopenharmony_ci att->gmem = true; 1012bf215546Sopenharmony_ci att->clear_views = info->viewMask; 1013bf215546Sopenharmony_ci attachment_set_ops(device, att, att_info->loadOp, 0, 1014bf215546Sopenharmony_ci att_info->storeOp, 0); 1015bf215546Sopenharmony_ci subpass->color_attachments[i].attachment = a++; 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci subpass->samples = view->image->layout->nr_samples; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci if (vk_format_is_srgb(view->vk.format)) 1020bf215546Sopenharmony_ci subpass->srgb_cntl |= 1 << i; 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) { 1023bf215546Sopenharmony_ci struct tu_render_pass_attachment *resolve_att = &pass->attachments[a]; 1024bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView); 1025bf215546Sopenharmony_ci tu_setup_dynamic_attachment(resolve_att, resolve_view); 1026bf215546Sopenharmony_ci resolve_att->gmem = false; 1027bf215546Sopenharmony_ci attachment_set_ops(device, resolve_att, 1028bf215546Sopenharmony_ci VK_ATTACHMENT_LOAD_OP_DONT_CARE, 0, 1029bf215546Sopenharmony_ci VK_ATTACHMENT_STORE_OP_STORE, 0); 1030bf215546Sopenharmony_ci subpass->resolve_attachments[i].attachment = a++; 1031bf215546Sopenharmony_ci att->will_be_resolved = true; 1032bf215546Sopenharmony_ci } else { 1033bf215546Sopenharmony_ci subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; 1034bf215546Sopenharmony_ci att->will_be_resolved = false; 1035bf215546Sopenharmony_ci } 1036bf215546Sopenharmony_ci } 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_ci if (info->pDepthAttachment || info->pStencilAttachment) { 1039bf215546Sopenharmony_ci const struct VkRenderingAttachmentInfo *common_info = 1040bf215546Sopenharmony_ci (info->pDepthAttachment && 1041bf215546Sopenharmony_ci info->pDepthAttachment->imageView != VK_NULL_HANDLE) ? 1042bf215546Sopenharmony_ci info->pDepthAttachment : 1043bf215546Sopenharmony_ci info->pStencilAttachment; 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci if (common_info && common_info->imageView != VK_NULL_HANDLE) { 1046bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image_view, view, common_info->imageView); 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[a]; 1049bf215546Sopenharmony_ci tu_setup_dynamic_attachment(att, view); 1050bf215546Sopenharmony_ci att->gmem = true; 1051bf215546Sopenharmony_ci att->clear_views = info->viewMask; 1052bf215546Sopenharmony_ci subpass->depth_stencil_attachment.attachment = a++; 1053bf215546Sopenharmony_ci 1054bf215546Sopenharmony_ci attachment_set_ops(device, att, 1055bf215546Sopenharmony_ci info->pDepthAttachment ? info->pDepthAttachment->loadOp : 0, 1056bf215546Sopenharmony_ci info->pStencilAttachment ? info->pStencilAttachment->loadOp : 0, 1057bf215546Sopenharmony_ci info->pDepthAttachment ? info->pDepthAttachment->storeOp : 0, 1058bf215546Sopenharmony_ci info->pStencilAttachment ? info->pStencilAttachment->storeOp : 0); 1059bf215546Sopenharmony_ci 1060bf215546Sopenharmony_ci subpass->samples = view->image->layout->nr_samples; 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) { 1063bf215546Sopenharmony_ci unsigned i = subpass->resolve_count++; 1064bf215546Sopenharmony_ci struct tu_render_pass_attachment *resolve_att = &pass->attachments[a]; 1065bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image_view, resolve_view, 1066bf215546Sopenharmony_ci common_info->resolveImageView); 1067bf215546Sopenharmony_ci tu_setup_dynamic_attachment(resolve_att, resolve_view); 1068bf215546Sopenharmony_ci resolve_att->gmem = false; 1069bf215546Sopenharmony_ci attachment_set_ops(device, resolve_att, 1070bf215546Sopenharmony_ci VK_ATTACHMENT_LOAD_OP_DONT_CARE, 1071bf215546Sopenharmony_ci VK_ATTACHMENT_LOAD_OP_DONT_CARE, 1072bf215546Sopenharmony_ci VK_ATTACHMENT_STORE_OP_STORE, 1073bf215546Sopenharmony_ci VK_ATTACHMENT_STORE_OP_STORE); 1074bf215546Sopenharmony_ci subpass->resolve_attachments[i].attachment = a++; 1075bf215546Sopenharmony_ci att->will_be_resolved = true; 1076bf215546Sopenharmony_ci subpass->resolve_depth_stencil = true; 1077bf215546Sopenharmony_ci } else { 1078bf215546Sopenharmony_ci att->will_be_resolved = false; 1079bf215546Sopenharmony_ci } 1080bf215546Sopenharmony_ci } else { 1081bf215546Sopenharmony_ci subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; 1082bf215546Sopenharmony_ci } 1083bf215546Sopenharmony_ci } else { 1084bf215546Sopenharmony_ci subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; 1085bf215546Sopenharmony_ci } 1086bf215546Sopenharmony_ci 1087bf215546Sopenharmony_ci pass->attachment_count = a; 1088bf215546Sopenharmony_ci 1089bf215546Sopenharmony_ci tu_render_pass_cond_config(pass); 1090bf215546Sopenharmony_ci tu_render_pass_gmem_config(pass, device->physical_device); 1091bf215546Sopenharmony_ci tu_render_pass_bandwidth_config(pass); 1092bf215546Sopenharmony_ci} 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_civoid 1095bf215546Sopenharmony_citu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer, 1096bf215546Sopenharmony_ci const VkCommandBufferInheritanceRenderingInfo *info) 1097bf215546Sopenharmony_ci{ 1098bf215546Sopenharmony_ci struct tu_render_pass *pass = &cmd_buffer->dynamic_pass; 1099bf215546Sopenharmony_ci struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass; 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_ci pass->subpass_count = 1; 1102bf215546Sopenharmony_ci pass->attachments = cmd_buffer->dynamic_rp_attachments; 1103bf215546Sopenharmony_ci 1104bf215546Sopenharmony_ci subpass->color_count = info->colorAttachmentCount; 1105bf215546Sopenharmony_ci subpass->resolve_count = 0; 1106bf215546Sopenharmony_ci subpass->resolve_depth_stencil = false; 1107bf215546Sopenharmony_ci subpass->color_attachments = cmd_buffer->dynamic_color_attachments; 1108bf215546Sopenharmony_ci subpass->resolve_attachments = NULL; 1109bf215546Sopenharmony_ci subpass->feedback_invalidate = false; 1110bf215546Sopenharmony_ci subpass->feedback_loop_ds = subpass->feedback_loop_color = false; 1111bf215546Sopenharmony_ci subpass->input_count = 0; 1112bf215546Sopenharmony_ci subpass->samples = 0; 1113bf215546Sopenharmony_ci subpass->srgb_cntl = 0; 1114bf215546Sopenharmony_ci subpass->raster_order_attachment_access = false; 1115bf215546Sopenharmony_ci subpass->multiview_mask = info->viewMask; 1116bf215546Sopenharmony_ci subpass->samples = info->rasterizationSamples; 1117bf215546Sopenharmony_ci 1118bf215546Sopenharmony_ci unsigned a = 0; 1119bf215546Sopenharmony_ci for (unsigned i = 0; i < info->colorAttachmentCount; i++) { 1120bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[a]; 1121bf215546Sopenharmony_ci VkFormat format = info->pColorAttachmentFormats[i]; 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci if (format == VK_FORMAT_UNDEFINED) { 1124bf215546Sopenharmony_ci subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED; 1125bf215546Sopenharmony_ci continue; 1126bf215546Sopenharmony_ci } 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci att->format = format; 1129bf215546Sopenharmony_ci att->samples = info->rasterizationSamples; 1130bf215546Sopenharmony_ci subpass->samples = info->rasterizationSamples; 1131bf215546Sopenharmony_ci subpass->color_attachments[i].attachment = a++; 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci /* conservatively assume that the attachment may be conditionally 1134bf215546Sopenharmony_ci * loaded/stored. 1135bf215546Sopenharmony_ci */ 1136bf215546Sopenharmony_ci att->cond_load_allowed = att->cond_store_allowed = true; 1137bf215546Sopenharmony_ci } 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci if (info->depthAttachmentFormat != VK_FORMAT_UNDEFINED || 1140bf215546Sopenharmony_ci info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) { 1141bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = &pass->attachments[a]; 1142bf215546Sopenharmony_ci att->format = info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ? 1143bf215546Sopenharmony_ci info->depthAttachmentFormat : info->stencilAttachmentFormat; 1144bf215546Sopenharmony_ci att->samples = info->rasterizationSamples; 1145bf215546Sopenharmony_ci subpass->depth_stencil_attachment.attachment = a++; 1146bf215546Sopenharmony_ci att->cond_load_allowed = att->cond_store_allowed = true; 1147bf215546Sopenharmony_ci } else { 1148bf215546Sopenharmony_ci subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; 1149bf215546Sopenharmony_ci } 1150bf215546Sopenharmony_ci} 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1153bf215546Sopenharmony_citu_GetRenderAreaGranularity(VkDevice _device, 1154bf215546Sopenharmony_ci VkRenderPass renderPass, 1155bf215546Sopenharmony_ci VkExtent2D *pGranularity) 1156bf215546Sopenharmony_ci{ 1157bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_device, device, _device); 1158bf215546Sopenharmony_ci pGranularity->width = device->physical_device->info->gmem_align_w; 1159bf215546Sopenharmony_ci pGranularity->height = device->physical_device->info->gmem_align_h; 1160bf215546Sopenharmony_ci} 1161bf215546Sopenharmony_ci 1162bf215546Sopenharmony_ciuint32_t 1163bf215546Sopenharmony_citu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index) 1164bf215546Sopenharmony_ci{ 1165bf215546Sopenharmony_ci if (subpass->resolve_depth_stencil && 1166bf215546Sopenharmony_ci index == (subpass->resolve_count - 1)) 1167bf215546Sopenharmony_ci return subpass->depth_stencil_attachment.attachment; 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ci return subpass->color_attachments[index].attachment; 1170bf215546Sopenharmony_ci} 1171