1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat.
3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen
4bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * based in part on anv driver which is:
7bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation
8bf215546Sopenharmony_ci */
9bf215546Sopenharmony_ci
10bf215546Sopenharmony_ci#include "tu_pass.h"
11bf215546Sopenharmony_ci
12bf215546Sopenharmony_ci#include "vk_util.h"
13bf215546Sopenharmony_ci
14bf215546Sopenharmony_ci#include "tu_cmd_buffer.h"
15bf215546Sopenharmony_ci#include "tu_device.h"
16bf215546Sopenharmony_ci#include "tu_image.h"
17bf215546Sopenharmony_ci
18bf215546Sopenharmony_ci/* Return true if we have to fallback to sysmem rendering because the
19bf215546Sopenharmony_ci * dependency can't be satisfied with tiled rendering.
20bf215546Sopenharmony_ci */
21bf215546Sopenharmony_ci
22bf215546Sopenharmony_cistatic bool
23bf215546Sopenharmony_cidep_invalid_for_gmem(const VkSubpassDependency2 *dep,
24bf215546Sopenharmony_ci                     VkPipelineStageFlags2 src_stage_mask,
25bf215546Sopenharmony_ci                     VkPipelineStageFlags2 dst_stage_mask)
26bf215546Sopenharmony_ci{
27bf215546Sopenharmony_ci   /* External dependencies don't matter here. */
28bf215546Sopenharmony_ci   if (dep->srcSubpass == VK_SUBPASS_EXTERNAL ||
29bf215546Sopenharmony_ci       dep->dstSubpass == VK_SUBPASS_EXTERNAL)
30bf215546Sopenharmony_ci      return false;
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci   /* We can conceptually break down the process of rewriting a sysmem
33bf215546Sopenharmony_ci    * renderpass into a gmem one into two parts:
34bf215546Sopenharmony_ci    *
35bf215546Sopenharmony_ci    * 1. Split each draw and multisample resolve into N copies, one for each
36bf215546Sopenharmony_ci    * bin. (If hardware binning, add one more copy where the FS is disabled
37bf215546Sopenharmony_ci    * for the binning pass). This is always allowed because the vertex stage
38bf215546Sopenharmony_ci    * is allowed to run an arbitrary number of times and there are no extra
39bf215546Sopenharmony_ci    * ordering constraints within a draw.
40bf215546Sopenharmony_ci    * 2. Take the last copy of the second-to-last draw and slide it down to
41bf215546Sopenharmony_ci    * before the last copy of the last draw. Repeat for each earlier draw
42bf215546Sopenharmony_ci    * until the draw pass for the last bin is complete, then repeat for each
43bf215546Sopenharmony_ci    * earlier bin until we finish with the first bin.
44bf215546Sopenharmony_ci    *
45bf215546Sopenharmony_ci    * During this rearranging process, we can't slide draws past each other in
46bf215546Sopenharmony_ci    * a way that breaks the subpass dependencies. For each draw, we must slide
47bf215546Sopenharmony_ci    * it past (copies of) the rest of the draws in the renderpass. We can
48bf215546Sopenharmony_ci    * slide a draw past another if there isn't a dependency between them, or
49bf215546Sopenharmony_ci    * if the dependenc(ies) are dependencies between framebuffer-space stages
50bf215546Sopenharmony_ci    * only with the BY_REGION bit set. Note that this includes
51bf215546Sopenharmony_ci    * self-dependencies, since these may result in pipeline barriers that also
52bf215546Sopenharmony_ci    * break the rearranging process.
53bf215546Sopenharmony_ci    */
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer
56bf215546Sopenharmony_ci    * Region Dependencies":
57bf215546Sopenharmony_ci    */
58bf215546Sopenharmony_ci   const VkPipelineStageFlags2 framebuffer_space_stages =
59bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
60bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
61bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
62bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   return
65bf215546Sopenharmony_ci      (src_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) ||
66bf215546Sopenharmony_ci      (dst_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)) ||
67bf215546Sopenharmony_ci      !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT);
68bf215546Sopenharmony_ci}
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_cistatic void
71bf215546Sopenharmony_citu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
72bf215546Sopenharmony_ci                               const VkSubpassDependency2 *dep)
73bf215546Sopenharmony_ci{
74bf215546Sopenharmony_ci   uint32_t src = dep->srcSubpass;
75bf215546Sopenharmony_ci   uint32_t dst = dep->dstSubpass;
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   /* Ignore subpass self-dependencies as they allow the app to call
78bf215546Sopenharmony_ci    * vkCmdPipelineBarrier() inside the render pass and the driver should only
79bf215546Sopenharmony_ci    * do the barrier when called, not when starting the render pass.
80bf215546Sopenharmony_ci    *
81bf215546Sopenharmony_ci    * We cannot decide whether to allow gmem rendering before a barrier
82bf215546Sopenharmony_ci    * is actually emitted, so we delay the decision until then.
83bf215546Sopenharmony_ci    */
84bf215546Sopenharmony_ci   if (src == dst)
85bf215546Sopenharmony_ci      return;
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci   /* From the Vulkan 1.2.195 spec:
88bf215546Sopenharmony_ci    *
89bf215546Sopenharmony_ci    * "If an instance of VkMemoryBarrier2 is included in the pNext chain, srcStageMask,
90bf215546Sopenharmony_ci    *  dstStageMask, srcAccessMask, and dstAccessMask parameters are ignored. The synchronization
91bf215546Sopenharmony_ci    *  and access scopes instead are defined by the parameters of VkMemoryBarrier2."
92bf215546Sopenharmony_ci    */
93bf215546Sopenharmony_ci   const VkMemoryBarrier2 *barrier =
94bf215546Sopenharmony_ci      vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2);
95bf215546Sopenharmony_ci   VkPipelineStageFlags2 src_stage_mask = barrier ? barrier->srcStageMask : dep->srcStageMask;
96bf215546Sopenharmony_ci   VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask;
97bf215546Sopenharmony_ci   VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask;
98bf215546Sopenharmony_ci   VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask;
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci   if (dep_invalid_for_gmem(dep, src_stage_mask, dst_stage_mask)) {
101bf215546Sopenharmony_ci      perf_debug((struct tu_device *)pass->base.device, "Disabling gmem rendering due to invalid subpass dependency");
102bf215546Sopenharmony_ci      for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++)
103bf215546Sopenharmony_ci         pass->gmem_pixels[i] = 0;
104bf215546Sopenharmony_ci   }
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   struct tu_subpass_barrier *dst_barrier;
107bf215546Sopenharmony_ci   if (dst == VK_SUBPASS_EXTERNAL) {
108bf215546Sopenharmony_ci      dst_barrier = &pass->end_barrier;
109bf215546Sopenharmony_ci   } else {
110bf215546Sopenharmony_ci      dst_barrier = &pass->subpasses[dst].start_barrier;
111bf215546Sopenharmony_ci   }
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   dst_barrier->src_stage_mask |= src_stage_mask;
114bf215546Sopenharmony_ci   dst_barrier->dst_stage_mask |= dst_stage_mask;
115bf215546Sopenharmony_ci   dst_barrier->src_access_mask |= src_access_mask;
116bf215546Sopenharmony_ci   dst_barrier->dst_access_mask |= dst_access_mask;
117bf215546Sopenharmony_ci}
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci/* We currently only care about undefined layouts, because we have to
120bf215546Sopenharmony_ci * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
121bf215546Sopenharmony_ci * UNDEFINED for anything not linear tiled, but we don't know yet whether the
122bf215546Sopenharmony_ci * images used are tiled, so just assume they are.
123bf215546Sopenharmony_ci */
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_cistatic bool
126bf215546Sopenharmony_cilayout_undefined(VkImageLayout layout)
127bf215546Sopenharmony_ci{
128bf215546Sopenharmony_ci   return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
129bf215546Sopenharmony_ci          layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
130bf215546Sopenharmony_ci}
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci/* This implements the following bit of spec text:
133bf215546Sopenharmony_ci *
134bf215546Sopenharmony_ci *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
135bf215546Sopenharmony_ci *    first subpass that uses an attachment, then an implicit subpass
136bf215546Sopenharmony_ci *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
137bf215546Sopenharmony_ci *    used in. The implicit subpass dependency only exists if there
138bf215546Sopenharmony_ci *    exists an automatic layout transition away from initialLayout.
139bf215546Sopenharmony_ci *    The subpass dependency operates as if defined with the
140bf215546Sopenharmony_ci *    following parameters:
141bf215546Sopenharmony_ci *
142bf215546Sopenharmony_ci *    VkSubpassDependency implicitDependency = {
143bf215546Sopenharmony_ci *        .srcSubpass = VK_SUBPASS_EXTERNAL;
144bf215546Sopenharmony_ci *        .dstSubpass = firstSubpass; // First subpass attachment is used in
145bf215546Sopenharmony_ci *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
146bf215546Sopenharmony_ci *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
147bf215546Sopenharmony_ci *        .srcAccessMask = 0;
148bf215546Sopenharmony_ci *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
149bf215546Sopenharmony_ci *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
150bf215546Sopenharmony_ci *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
151bf215546Sopenharmony_ci *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
152bf215546Sopenharmony_ci *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
153bf215546Sopenharmony_ci *        .dependencyFlags = 0;
154bf215546Sopenharmony_ci *    };
155bf215546Sopenharmony_ci *
156bf215546Sopenharmony_ci *    Similarly, if there is no subpass dependency from the last subpass
157bf215546Sopenharmony_ci *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
158bf215546Sopenharmony_ci *    subpass dependency exists from the last subpass it is used in to
159bf215546Sopenharmony_ci *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
160bf215546Sopenharmony_ci *    if there exists an automatic layout transition into finalLayout.
161bf215546Sopenharmony_ci *    The subpass dependency operates as if defined with the following
162bf215546Sopenharmony_ci *    parameters:
163bf215546Sopenharmony_ci *
164bf215546Sopenharmony_ci *    VkSubpassDependency implicitDependency = {
165bf215546Sopenharmony_ci *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
166bf215546Sopenharmony_ci *        .dstSubpass = VK_SUBPASS_EXTERNAL;
167bf215546Sopenharmony_ci *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
168bf215546Sopenharmony_ci *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
169bf215546Sopenharmony_ci *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
170bf215546Sopenharmony_ci *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
171bf215546Sopenharmony_ci *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
172bf215546Sopenharmony_ci *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
173bf215546Sopenharmony_ci *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
174bf215546Sopenharmony_ci *        .dstAccessMask = 0;
175bf215546Sopenharmony_ci *        .dependencyFlags = 0;
176bf215546Sopenharmony_ci *    };
177bf215546Sopenharmony_ci *
178bf215546Sopenharmony_ci * Note: currently this is the only use we have for layout transitions,
179bf215546Sopenharmony_ci * besides needing to invalidate CCU at the beginning, so we also flag
180bf215546Sopenharmony_ci * transitions from UNDEFINED here.
181bf215546Sopenharmony_ci */
182bf215546Sopenharmony_cistatic void
183bf215546Sopenharmony_citu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
184bf215546Sopenharmony_ci                                 const VkRenderPassCreateInfo2 *info)
185bf215546Sopenharmony_ci{
186bf215546Sopenharmony_ci   const VkAttachmentDescription2* att = info->pAttachments;
187bf215546Sopenharmony_ci   bool has_external_src[info->subpassCount];
188bf215546Sopenharmony_ci   bool has_external_dst[info->subpassCount];
189bf215546Sopenharmony_ci   bool att_used[pass->attachment_count];
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   memset(has_external_src, 0, sizeof(has_external_src));
192bf215546Sopenharmony_ci   memset(has_external_dst, 0, sizeof(has_external_dst));
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci   for (uint32_t i = 0; i < info->dependencyCount; i++) {
195bf215546Sopenharmony_ci      uint32_t src = info->pDependencies[i].srcSubpass;
196bf215546Sopenharmony_ci      uint32_t dst = info->pDependencies[i].dstSubpass;
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci      if (src == dst)
199bf215546Sopenharmony_ci         continue;
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci      if (src == VK_SUBPASS_EXTERNAL)
202bf215546Sopenharmony_ci         has_external_src[dst] = true;
203bf215546Sopenharmony_ci      if (dst == VK_SUBPASS_EXTERNAL)
204bf215546Sopenharmony_ci         has_external_dst[src] = true;
205bf215546Sopenharmony_ci   }
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci   memset(att_used, 0, sizeof(att_used));
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->subpassCount; i++) {
210bf215546Sopenharmony_ci      const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
211bf215546Sopenharmony_ci      bool src_implicit_dep = false;
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci      for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
214bf215546Sopenharmony_ci         uint32_t a = subpass->pInputAttachments[j].attachment;
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
217bf215546Sopenharmony_ci            continue;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci         uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
220bf215546Sopenharmony_ci               vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
221bf215546Sopenharmony_ci               VK_IMAGE_LAYOUT_UNDEFINED;
222bf215546Sopenharmony_ci         uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci         if ((att[a].initialLayout != subpass->pInputAttachments[j].layout ||
225bf215546Sopenharmony_ci             stencil_initial_layout != stencil_layout) &&
226bf215546Sopenharmony_ci             !att_used[a] && !has_external_src[i])
227bf215546Sopenharmony_ci            src_implicit_dep = true;
228bf215546Sopenharmony_ci         att_used[a] = true;
229bf215546Sopenharmony_ci      }
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci      for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
232bf215546Sopenharmony_ci         uint32_t a = subpass->pColorAttachments[j].attachment;
233bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
234bf215546Sopenharmony_ci            continue;
235bf215546Sopenharmony_ci         if (att[a].initialLayout != subpass->pColorAttachments[j].layout &&
236bf215546Sopenharmony_ci             !att_used[a] && !has_external_src[i])
237bf215546Sopenharmony_ci            src_implicit_dep = true;
238bf215546Sopenharmony_ci         att_used[a] = true;
239bf215546Sopenharmony_ci      }
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci      if (subpass->pDepthStencilAttachment &&
242bf215546Sopenharmony_ci          subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
243bf215546Sopenharmony_ci         uint32_t a = subpass->pDepthStencilAttachment->attachment;
244bf215546Sopenharmony_ci         uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
245bf215546Sopenharmony_ci         uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci         if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
248bf215546Sopenharmony_ci             stencil_initial_layout != stencil_layout) &&
249bf215546Sopenharmony_ci             !att_used[a] && !has_external_src[i]) {
250bf215546Sopenharmony_ci            src_implicit_dep = true;
251bf215546Sopenharmony_ci         }
252bf215546Sopenharmony_ci         att_used[a] = true;
253bf215546Sopenharmony_ci      }
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci      if (subpass->pResolveAttachments) {
256bf215546Sopenharmony_ci         for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
257bf215546Sopenharmony_ci            uint32_t a = subpass->pResolveAttachments[j].attachment;
258bf215546Sopenharmony_ci            if (a == VK_ATTACHMENT_UNUSED)
259bf215546Sopenharmony_ci               continue;
260bf215546Sopenharmony_ci            if (att[a].initialLayout != subpass->pResolveAttachments[j].layout &&
261bf215546Sopenharmony_ci               !att_used[a] && !has_external_src[i])
262bf215546Sopenharmony_ci               src_implicit_dep = true;
263bf215546Sopenharmony_ci            att_used[a] = true;
264bf215546Sopenharmony_ci         }
265bf215546Sopenharmony_ci      }
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
268bf215546Sopenharmony_ci         vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci      if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
271bf215546Sopenharmony_ci          ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
272bf215546Sopenharmony_ci            uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
273bf215546Sopenharmony_ci            uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
274bf215546Sopenharmony_ci            uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci            if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
277bf215546Sopenharmony_ci                stencil_initial_layout != stencil_layout) &&
278bf215546Sopenharmony_ci                !att_used[a] && !has_external_src[i])
279bf215546Sopenharmony_ci               src_implicit_dep = true;
280bf215546Sopenharmony_ci            att_used[a] = true;
281bf215546Sopenharmony_ci      }
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci      if (src_implicit_dep) {
284bf215546Sopenharmony_ci         tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) {
285bf215546Sopenharmony_ci            .srcSubpass = VK_SUBPASS_EXTERNAL,
286bf215546Sopenharmony_ci            .dstSubpass = i,
287bf215546Sopenharmony_ci            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
288bf215546Sopenharmony_ci            .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
289bf215546Sopenharmony_ci            .srcAccessMask = 0,
290bf215546Sopenharmony_ci            .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
291bf215546Sopenharmony_ci                             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
292bf215546Sopenharmony_ci                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
293bf215546Sopenharmony_ci                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
294bf215546Sopenharmony_ci                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
295bf215546Sopenharmony_ci            .dependencyFlags = 0,
296bf215546Sopenharmony_ci         });
297bf215546Sopenharmony_ci      }
298bf215546Sopenharmony_ci   }
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   memset(att_used, 0, sizeof(att_used));
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   for (int i = info->subpassCount - 1; i >= 0; i--) {
303bf215546Sopenharmony_ci      const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
304bf215546Sopenharmony_ci      bool dst_implicit_dep = false;
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci      for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
307bf215546Sopenharmony_ci         uint32_t a = subpass->pInputAttachments[j].attachment;
308bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
309bf215546Sopenharmony_ci            continue;
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci         uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
312bf215546Sopenharmony_ci               vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
313bf215546Sopenharmony_ci               VK_IMAGE_LAYOUT_UNDEFINED;
314bf215546Sopenharmony_ci         uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci         if ((att[a].finalLayout != subpass->pInputAttachments[j].layout ||
317bf215546Sopenharmony_ci             stencil_final_layout != stencil_layout) &&
318bf215546Sopenharmony_ci             !att_used[a] && !has_external_dst[i])
319bf215546Sopenharmony_ci            dst_implicit_dep = true;
320bf215546Sopenharmony_ci         att_used[a] = true;
321bf215546Sopenharmony_ci      }
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci      for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
324bf215546Sopenharmony_ci         uint32_t a = subpass->pColorAttachments[j].attachment;
325bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
326bf215546Sopenharmony_ci            continue;
327bf215546Sopenharmony_ci         if (att[a].finalLayout != subpass->pColorAttachments[j].layout &&
328bf215546Sopenharmony_ci             !att_used[a] && !has_external_dst[i])
329bf215546Sopenharmony_ci            dst_implicit_dep = true;
330bf215546Sopenharmony_ci         att_used[a] = true;
331bf215546Sopenharmony_ci      }
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci      if (subpass->pDepthStencilAttachment &&
334bf215546Sopenharmony_ci          subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
335bf215546Sopenharmony_ci         uint32_t a = subpass->pDepthStencilAttachment->attachment;
336bf215546Sopenharmony_ci         uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
337bf215546Sopenharmony_ci         uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci         if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
340bf215546Sopenharmony_ci             stencil_final_layout != stencil_layout) &&
341bf215546Sopenharmony_ci             !att_used[a] && !has_external_dst[i]) {
342bf215546Sopenharmony_ci            dst_implicit_dep = true;
343bf215546Sopenharmony_ci         }
344bf215546Sopenharmony_ci         att_used[a] = true;
345bf215546Sopenharmony_ci      }
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci      if (subpass->pResolveAttachments) {
348bf215546Sopenharmony_ci         for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
349bf215546Sopenharmony_ci            uint32_t a = subpass->pResolveAttachments[j].attachment;
350bf215546Sopenharmony_ci            if (a == VK_ATTACHMENT_UNUSED)
351bf215546Sopenharmony_ci               continue;
352bf215546Sopenharmony_ci            if (att[a].finalLayout != subpass->pResolveAttachments[j].layout &&
353bf215546Sopenharmony_ci                !att_used[a] && !has_external_dst[i])
354bf215546Sopenharmony_ci               dst_implicit_dep = true;
355bf215546Sopenharmony_ci            att_used[a] = true;
356bf215546Sopenharmony_ci         }
357bf215546Sopenharmony_ci      }
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
360bf215546Sopenharmony_ci         vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci      if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
363bf215546Sopenharmony_ci          ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
364bf215546Sopenharmony_ci            uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
365bf215546Sopenharmony_ci            uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
366bf215546Sopenharmony_ci            uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci            if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
369bf215546Sopenharmony_ci                stencil_final_layout != stencil_layout) &&
370bf215546Sopenharmony_ci                !att_used[a] && !has_external_src[i])
371bf215546Sopenharmony_ci               dst_implicit_dep = true;
372bf215546Sopenharmony_ci            att_used[a] = true;
373bf215546Sopenharmony_ci      }
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci      if (dst_implicit_dep) {
376bf215546Sopenharmony_ci         tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) {
377bf215546Sopenharmony_ci            .srcSubpass = i,
378bf215546Sopenharmony_ci            .dstSubpass = VK_SUBPASS_EXTERNAL,
379bf215546Sopenharmony_ci            .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
380bf215546Sopenharmony_ci            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
381bf215546Sopenharmony_ci            .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
382bf215546Sopenharmony_ci                             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
383bf215546Sopenharmony_ci                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
384bf215546Sopenharmony_ci                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
385bf215546Sopenharmony_ci                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
386bf215546Sopenharmony_ci            .dstAccessMask = 0,
387bf215546Sopenharmony_ci            .dependencyFlags = 0,
388bf215546Sopenharmony_ci         });
389bf215546Sopenharmony_ci      }
390bf215546Sopenharmony_ci   }
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci   /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
393bf215546Sopenharmony_ci    * Assume that if an attachment has an initial layout of UNDEFINED, it gets
394bf215546Sopenharmony_ci    * transitioned eventually.
395bf215546Sopenharmony_ci    */
396bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->attachmentCount; i++) {
397bf215546Sopenharmony_ci      if (layout_undefined(att[i].initialLayout)) {
398bf215546Sopenharmony_ci         if (vk_format_is_depth_or_stencil(att[i].format)) {
399bf215546Sopenharmony_ci            pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
400bf215546Sopenharmony_ci         } else {
401bf215546Sopenharmony_ci            pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
402bf215546Sopenharmony_ci         }
403bf215546Sopenharmony_ci      }
404bf215546Sopenharmony_ci   }
405bf215546Sopenharmony_ci}
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci/* If an input attachment is used without an intervening write to the same
408bf215546Sopenharmony_ci * attachment, then we can just use the original image, even in GMEM mode.
409bf215546Sopenharmony_ci * This is an optimization, but it's also important because it allows us to
410bf215546Sopenharmony_ci * avoid having to invalidate UCHE at the beginning of each tile due to it
411bf215546Sopenharmony_ci * becoming invalid. The only reads of GMEM via UCHE should be after an
412bf215546Sopenharmony_ci * earlier subpass modified it, which only works if there's already an
413bf215546Sopenharmony_ci * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
414bf215546Sopenharmony_ci * don't consider this in the dependency code, so this is also required for
415bf215546Sopenharmony_ci * correctness.
416bf215546Sopenharmony_ci */
417bf215546Sopenharmony_cistatic void
418bf215546Sopenharmony_citu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
419bf215546Sopenharmony_ci{
420bf215546Sopenharmony_ci   bool written[pass->attachment_count];
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci   memset(written, 0, sizeof(written));
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci   for (unsigned i = 0; i < pass->subpass_count; i++) {
425bf215546Sopenharmony_ci      struct tu_subpass *subpass = &pass->subpasses[i];
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci      for (unsigned j = 0; j < subpass->input_count; j++) {
428bf215546Sopenharmony_ci         uint32_t a = subpass->input_attachments[j].attachment;
429bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
430bf215546Sopenharmony_ci            continue;
431bf215546Sopenharmony_ci         subpass->input_attachments[j].patch_input_gmem = written[a];
432bf215546Sopenharmony_ci      }
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_ci      for (unsigned j = 0; j < subpass->color_count; j++) {
435bf215546Sopenharmony_ci         uint32_t a = subpass->color_attachments[j].attachment;
436bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
437bf215546Sopenharmony_ci            continue;
438bf215546Sopenharmony_ci         written[a] = true;
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci         for (unsigned k = 0; k < subpass->input_count; k++) {
441bf215546Sopenharmony_ci            if (subpass->input_attachments[k].attachment == a &&
442bf215546Sopenharmony_ci                !subpass->input_attachments[k].patch_input_gmem) {
443bf215546Sopenharmony_ci               /* For render feedback loops, we have no idea whether the use
444bf215546Sopenharmony_ci                * as a color attachment or input attachment will come first,
445bf215546Sopenharmony_ci                * so we have to always use GMEM in case the color attachment
446bf215546Sopenharmony_ci                * comes first and defensively invalidate UCHE in case the
447bf215546Sopenharmony_ci                * input attachment comes first.
448bf215546Sopenharmony_ci                */
449bf215546Sopenharmony_ci               subpass->feedback_invalidate = true;
450bf215546Sopenharmony_ci               subpass->input_attachments[k].patch_input_gmem = true;
451bf215546Sopenharmony_ci            }
452bf215546Sopenharmony_ci         }
453bf215546Sopenharmony_ci      }
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci      for (unsigned j = 0; j < subpass->resolve_count; j++) {
456bf215546Sopenharmony_ci         uint32_t a = subpass->resolve_attachments[j].attachment;
457bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
458bf215546Sopenharmony_ci            continue;
459bf215546Sopenharmony_ci         written[a] = true;
460bf215546Sopenharmony_ci      }
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci      if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
463bf215546Sopenharmony_ci         written[subpass->depth_stencil_attachment.attachment] = true;
464bf215546Sopenharmony_ci         for (unsigned k = 0; k < subpass->input_count; k++) {
465bf215546Sopenharmony_ci            if (subpass->input_attachments[k].attachment ==
466bf215546Sopenharmony_ci                subpass->depth_stencil_attachment.attachment &&
467bf215546Sopenharmony_ci                !subpass->input_attachments[k].patch_input_gmem) {
468bf215546Sopenharmony_ci               subpass->feedback_invalidate = true;
469bf215546Sopenharmony_ci               subpass->input_attachments[k].patch_input_gmem = true;
470bf215546Sopenharmony_ci            }
471bf215546Sopenharmony_ci         }
472bf215546Sopenharmony_ci      }
473bf215546Sopenharmony_ci   }
474bf215546Sopenharmony_ci}
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_cistatic void
477bf215546Sopenharmony_citu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
478bf215546Sopenharmony_ci{
479bf215546Sopenharmony_ci   for (unsigned i = 0; i < pass->subpass_count; i++) {
480bf215546Sopenharmony_ci      struct tu_subpass *subpass = &pass->subpasses[i];
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci      for (unsigned j = 0; j < subpass->color_count; j++) {
483bf215546Sopenharmony_ci         uint32_t a = subpass->color_attachments[j].attachment;
484bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
485bf215546Sopenharmony_ci            continue;
486bf215546Sopenharmony_ci         for (unsigned k = 0; k < subpass->input_count; k++) {
487bf215546Sopenharmony_ci            if (subpass->input_attachments[k].attachment == a) {
488bf215546Sopenharmony_ci               subpass->feedback_loop_color = true;
489bf215546Sopenharmony_ci               break;
490bf215546Sopenharmony_ci            }
491bf215546Sopenharmony_ci         }
492bf215546Sopenharmony_ci      }
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci      if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
495bf215546Sopenharmony_ci         for (unsigned k = 0; k < subpass->input_count; k++) {
496bf215546Sopenharmony_ci            if (subpass->input_attachments[k].attachment ==
497bf215546Sopenharmony_ci                subpass->depth_stencil_attachment.attachment) {
498bf215546Sopenharmony_ci               subpass->feedback_loop_ds = true;
499bf215546Sopenharmony_ci               break;
500bf215546Sopenharmony_ci            }
501bf215546Sopenharmony_ci         }
502bf215546Sopenharmony_ci      }
503bf215546Sopenharmony_ci   }
504bf215546Sopenharmony_ci}
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_cistatic void update_samples(struct tu_subpass *subpass,
507bf215546Sopenharmony_ci                           VkSampleCountFlagBits samples)
508bf215546Sopenharmony_ci{
509bf215546Sopenharmony_ci   assert(subpass->samples == 0 || subpass->samples == samples);
510bf215546Sopenharmony_ci   subpass->samples = samples;
511bf215546Sopenharmony_ci}
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_cistatic void
514bf215546Sopenharmony_citu_render_pass_cond_config(struct tu_render_pass *pass)
515bf215546Sopenharmony_ci{
516bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pass->attachment_count; i++) {
517bf215546Sopenharmony_ci      struct tu_render_pass_attachment *att = &pass->attachments[i];
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci      att->cond_load_allowed =
520bf215546Sopenharmony_ci         (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
521bf215546Sopenharmony_ci      att->cond_store_allowed =
522bf215546Sopenharmony_ci         (att->store || att->store_stencil) && !att->clear_mask;
523bf215546Sopenharmony_ci   }
524bf215546Sopenharmony_ci}
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_cistatic void
527bf215546Sopenharmony_citu_render_pass_gmem_config(struct tu_render_pass *pass,
528bf215546Sopenharmony_ci                           const struct tu_physical_device *phys_dev)
529bf215546Sopenharmony_ci{
530bf215546Sopenharmony_ci   for (enum tu_gmem_layout layout = 0; layout < TU_GMEM_LAYOUT_COUNT;
531bf215546Sopenharmony_ci        layout++) {
532bf215546Sopenharmony_ci      /* From the VK_KHR_multiview spec:
533bf215546Sopenharmony_ci       *
534bf215546Sopenharmony_ci       *    Multiview is all-or-nothing for a render pass - that is, either all
535bf215546Sopenharmony_ci       *    subpasses must have a non-zero view mask (though some subpasses may
536bf215546Sopenharmony_ci       *    have only one view) or all must be zero.
537bf215546Sopenharmony_ci       *
538bf215546Sopenharmony_ci       * This means we only have to check one of the view masks.
539bf215546Sopenharmony_ci       */
540bf215546Sopenharmony_ci      if (pass->subpasses[0].multiview_mask) {
541bf215546Sopenharmony_ci         /* It seems multiview must use sysmem rendering. */
542bf215546Sopenharmony_ci         pass->gmem_pixels[layout] = 0;
543bf215546Sopenharmony_ci         continue;
544bf215546Sopenharmony_ci      }
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_ci      /* log2(gmem_align/(tile_align_w*tile_align_h)) */
547bf215546Sopenharmony_ci      uint32_t block_align_shift = 3;
548bf215546Sopenharmony_ci      uint32_t tile_align_w = phys_dev->info->tile_align_w;
549bf215546Sopenharmony_ci      uint32_t gmem_align = (1 << block_align_shift) * tile_align_w *
550bf215546Sopenharmony_ci                            phys_dev->info->tile_align_h;
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci      /* calculate total bytes per pixel */
553bf215546Sopenharmony_ci      uint32_t cpp_total = 0;
554bf215546Sopenharmony_ci      for (uint32_t i = 0; i < pass->attachment_count; i++) {
555bf215546Sopenharmony_ci         struct tu_render_pass_attachment *att = &pass->attachments[i];
556bf215546Sopenharmony_ci         bool cpp1 = (att->cpp == 1);
557bf215546Sopenharmony_ci         if (att->gmem) {
558bf215546Sopenharmony_ci            cpp_total += att->cpp;
559bf215546Sopenharmony_ci
560bf215546Sopenharmony_ci            /* take into account the separate stencil: */
561bf215546Sopenharmony_ci            if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
562bf215546Sopenharmony_ci               cpp1 = (att->samples == 1);
563bf215546Sopenharmony_ci               cpp_total += att->samples;
564bf215546Sopenharmony_ci            }
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci            /* texture pitch must be aligned to 64, use a tile_align_w that is
567bf215546Sopenharmony_ci             * a multiple of 64 for cpp==1 attachment to work as input
568bf215546Sopenharmony_ci             * attachment
569bf215546Sopenharmony_ci             */
570bf215546Sopenharmony_ci            if (cpp1 && tile_align_w % 64 != 0) {
571bf215546Sopenharmony_ci               tile_align_w *= 2;
572bf215546Sopenharmony_ci               block_align_shift -= 1;
573bf215546Sopenharmony_ci            }
574bf215546Sopenharmony_ci         }
575bf215546Sopenharmony_ci      }
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci      pass->tile_align_w = tile_align_w;
578bf215546Sopenharmony_ci
579bf215546Sopenharmony_ci      /* no gmem attachments */
580bf215546Sopenharmony_ci      if (cpp_total == 0) {
581bf215546Sopenharmony_ci         /* any value non-zero value so tiling config works with no
582bf215546Sopenharmony_ci          * attachments
583bf215546Sopenharmony_ci          */
584bf215546Sopenharmony_ci         pass->gmem_pixels[layout] = 1024 * 1024;
585bf215546Sopenharmony_ci         continue;
586bf215546Sopenharmony_ci      }
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci      /* TODO: this algorithm isn't optimal
589bf215546Sopenharmony_ci       * for example, two attachments with cpp = {1, 4}
590bf215546Sopenharmony_ci       * result:  nblocks = {12, 52}, pixels = 196608
591bf215546Sopenharmony_ci       * optimal: nblocks = {13, 51}, pixels = 208896
592bf215546Sopenharmony_ci       */
593bf215546Sopenharmony_ci      uint32_t gmem_size = layout == TU_GMEM_LAYOUT_FULL
594bf215546Sopenharmony_ci                              ? phys_dev->gmem_size
595bf215546Sopenharmony_ci                              : phys_dev->ccu_offset_gmem;
596bf215546Sopenharmony_ci      uint32_t gmem_blocks = gmem_size / gmem_align;
597bf215546Sopenharmony_ci      uint32_t offset = 0, pixels = ~0u, i;
598bf215546Sopenharmony_ci      for (i = 0; i < pass->attachment_count; i++) {
599bf215546Sopenharmony_ci         struct tu_render_pass_attachment *att = &pass->attachments[i];
600bf215546Sopenharmony_ci         if (!att->gmem)
601bf215546Sopenharmony_ci            continue;
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci         att->gmem_offset[layout] = offset;
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci         uint32_t align = MAX2(1, att->cpp >> block_align_shift);
606bf215546Sopenharmony_ci         uint32_t nblocks =
607bf215546Sopenharmony_ci            MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci         if (nblocks > gmem_blocks)
610bf215546Sopenharmony_ci            break;
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci         gmem_blocks -= nblocks;
613bf215546Sopenharmony_ci         cpp_total -= att->cpp;
614bf215546Sopenharmony_ci         offset += nblocks * gmem_align;
615bf215546Sopenharmony_ci         pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci         /* repeat the same for separate stencil */
618bf215546Sopenharmony_ci         if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
619bf215546Sopenharmony_ci            att->gmem_offset_stencil[layout] = offset;
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci            /* note: for s8_uint, block align is always 1 */
622bf215546Sopenharmony_ci            uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
623bf215546Sopenharmony_ci            if (nblocks > gmem_blocks)
624bf215546Sopenharmony_ci               break;
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci            gmem_blocks -= nblocks;
627bf215546Sopenharmony_ci            cpp_total -= att->samples;
628bf215546Sopenharmony_ci            offset += nblocks * gmem_align;
629bf215546Sopenharmony_ci            pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
630bf215546Sopenharmony_ci         }
631bf215546Sopenharmony_ci      }
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci      /* if the loop didn't complete then the gmem config is impossible */
634bf215546Sopenharmony_ci      if (i == pass->attachment_count)
635bf215546Sopenharmony_ci         pass->gmem_pixels[layout] = pixels;
636bf215546Sopenharmony_ci   }
637bf215546Sopenharmony_ci}
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_cistatic void
640bf215546Sopenharmony_citu_render_pass_bandwidth_config(struct tu_render_pass *pass)
641bf215546Sopenharmony_ci{
642bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pass->attachment_count; i++) {
643bf215546Sopenharmony_ci      const struct tu_render_pass_attachment *att = &pass->attachments[i];
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci      /* approximate tu_load_gmem_attachment */
646bf215546Sopenharmony_ci      if (att->load)
647bf215546Sopenharmony_ci         pass->gmem_bandwidth_per_pixel += att->cpp;
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci      /* approximate tu_store_gmem_attachment */
650bf215546Sopenharmony_ci      if (att->store)
651bf215546Sopenharmony_ci         pass->gmem_bandwidth_per_pixel += att->cpp;
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci      /* approximate tu_clear_sysmem_attachment */
654bf215546Sopenharmony_ci      if (att->clear_mask)
655bf215546Sopenharmony_ci         pass->sysmem_bandwidth_per_pixel += att->cpp;
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci      /* approximate tu6_emit_sysmem_resolves */
658bf215546Sopenharmony_ci      if (att->will_be_resolved) {
659bf215546Sopenharmony_ci         pass->sysmem_bandwidth_per_pixel +=
660bf215546Sopenharmony_ci            att->cpp + att->cpp / att->samples;
661bf215546Sopenharmony_ci      }
662bf215546Sopenharmony_ci   }
663bf215546Sopenharmony_ci}
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_cistatic void
666bf215546Sopenharmony_ciattachment_set_ops(struct tu_device *device,
667bf215546Sopenharmony_ci                   struct tu_render_pass_attachment *att,
668bf215546Sopenharmony_ci                   VkAttachmentLoadOp load_op,
669bf215546Sopenharmony_ci                   VkAttachmentLoadOp stencil_load_op,
670bf215546Sopenharmony_ci                   VkAttachmentStoreOp store_op,
671bf215546Sopenharmony_ci                   VkAttachmentStoreOp stencil_store_op)
672bf215546Sopenharmony_ci{
673bf215546Sopenharmony_ci   if (device->instance->debug_flags & TU_DEBUG_DONT_CARE_AS_LOAD) {
674bf215546Sopenharmony_ci      if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
675bf215546Sopenharmony_ci         load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
676bf215546Sopenharmony_ci      if (stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
677bf215546Sopenharmony_ci         stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
678bf215546Sopenharmony_ci   }
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci   /* load/store ops */
681bf215546Sopenharmony_ci   att->clear_mask =
682bf215546Sopenharmony_ci      (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
683bf215546Sopenharmony_ci   att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
684bf215546Sopenharmony_ci   att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_ci   bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
687bf215546Sopenharmony_ci   bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
688bf215546Sopenharmony_ci   bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci   switch (att->format) {
691bf215546Sopenharmony_ci   case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
692bf215546Sopenharmony_ci      if (att->clear_mask)
693bf215546Sopenharmony_ci         att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
694bf215546Sopenharmony_ci      if (stencil_clear)
695bf215546Sopenharmony_ci         att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
696bf215546Sopenharmony_ci      if (stencil_load)
697bf215546Sopenharmony_ci         att->load = true;
698bf215546Sopenharmony_ci      if (stencil_store)
699bf215546Sopenharmony_ci         att->store = true;
700bf215546Sopenharmony_ci      break;
701bf215546Sopenharmony_ci   case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
702bf215546Sopenharmony_ci      att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
703bf215546Sopenharmony_ci      att->load = stencil_load;
704bf215546Sopenharmony_ci      att->store = stencil_store;
705bf215546Sopenharmony_ci      break;
706bf215546Sopenharmony_ci   case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
707bf215546Sopenharmony_ci      if (att->clear_mask)
708bf215546Sopenharmony_ci         att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
709bf215546Sopenharmony_ci      if (stencil_clear)
710bf215546Sopenharmony_ci         att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
711bf215546Sopenharmony_ci      if (stencil_load)
712bf215546Sopenharmony_ci         att->load_stencil = true;
713bf215546Sopenharmony_ci      if (stencil_store)
714bf215546Sopenharmony_ci         att->store_stencil = true;
715bf215546Sopenharmony_ci      break;
716bf215546Sopenharmony_ci   default:
717bf215546Sopenharmony_ci      break;
718bf215546Sopenharmony_ci   }
719bf215546Sopenharmony_ci}
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_cistatic bool
722bf215546Sopenharmony_ciis_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)
723bf215546Sopenharmony_ci{
724bf215546Sopenharmony_ci   if (depth_stencil_resolve &&
725bf215546Sopenharmony_ci       depth_stencil_resolve->pDepthStencilResolveAttachment &&
726bf215546Sopenharmony_ci       depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
727bf215546Sopenharmony_ci      return true;
728bf215546Sopenharmony_ci   }
729bf215546Sopenharmony_ci   return false;
730bf215546Sopenharmony_ci}
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_cistatic void
733bf215546Sopenharmony_citu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const VkRenderPassCreateInfo2 *pCreateInfo)
734bf215546Sopenharmony_ci{
735bf215546Sopenharmony_ci   struct tu_subpass *subpass = &pass->subpasses[i];
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci   pass->attachments[a].gmem = true;
738bf215546Sopenharmony_ci   update_samples(subpass, pCreateInfo->pAttachments[a].samples);
739bf215546Sopenharmony_ci   pass->attachments[a].clear_views |= subpass->multiview_mask;
740bf215546Sopenharmony_ci}
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
743bf215546Sopenharmony_citu_CreateRenderPass2(VkDevice _device,
744bf215546Sopenharmony_ci                     const VkRenderPassCreateInfo2 *pCreateInfo,
745bf215546Sopenharmony_ci                     const VkAllocationCallbacks *pAllocator,
746bf215546Sopenharmony_ci                     VkRenderPass *pRenderPass)
747bf215546Sopenharmony_ci{
748bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_device, device, _device);
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci   if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC))
751bf215546Sopenharmony_ci      return vk_common_CreateRenderPass2(_device, pCreateInfo, pAllocator,
752bf215546Sopenharmony_ci                                         pRenderPass);
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci   struct tu_render_pass *pass;
755bf215546Sopenharmony_ci   size_t size;
756bf215546Sopenharmony_ci   size_t attachments_offset;
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ci   size = sizeof(*pass);
761bf215546Sopenharmony_ci   size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
762bf215546Sopenharmony_ci   attachments_offset = size;
763bf215546Sopenharmony_ci   size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ci   pass = vk_object_zalloc(&device->vk, pAllocator, size,
766bf215546Sopenharmony_ci                           VK_OBJECT_TYPE_RENDER_PASS);
767bf215546Sopenharmony_ci   if (pass == NULL)
768bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci   pass->attachment_count = pCreateInfo->attachmentCount;
771bf215546Sopenharmony_ci   pass->subpass_count = pCreateInfo->subpassCount;
772bf215546Sopenharmony_ci   pass->attachments = (void *) pass + attachments_offset;
773bf215546Sopenharmony_ci
774bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
775bf215546Sopenharmony_ci      struct tu_render_pass_attachment *att = &pass->attachments[i];
776bf215546Sopenharmony_ci
777bf215546Sopenharmony_ci      att->format = pCreateInfo->pAttachments[i].format;
778bf215546Sopenharmony_ci      att->samples = pCreateInfo->pAttachments[i].samples;
779bf215546Sopenharmony_ci      /* for d32s8, cpp is for the depth image, and
780bf215546Sopenharmony_ci       * att->samples will be used as the cpp for the stencil image
781bf215546Sopenharmony_ci       */
782bf215546Sopenharmony_ci      if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
783bf215546Sopenharmony_ci         att->cpp = 4 * att->samples;
784bf215546Sopenharmony_ci      else
785bf215546Sopenharmony_ci         att->cpp = vk_format_get_blocksize(att->format) * att->samples;
786bf215546Sopenharmony_ci      /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */
787bf215546Sopenharmony_ci      att->gmem = false;
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci      VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp;
790bf215546Sopenharmony_ci      VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp;
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci      attachment_set_ops(device, att, loadOp, stencilLoadOp,
793bf215546Sopenharmony_ci                         pCreateInfo->pAttachments[i].storeOp,
794bf215546Sopenharmony_ci                         pCreateInfo->pAttachments[i].stencilStoreOp);
795bf215546Sopenharmony_ci   }
796bf215546Sopenharmony_ci   uint32_t subpass_attachment_count = 0;
797bf215546Sopenharmony_ci   struct tu_subpass_attachment *p;
798bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
799bf215546Sopenharmony_ci      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
800bf215546Sopenharmony_ci      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
801bf215546Sopenharmony_ci         vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci      subpass_attachment_count +=
804bf215546Sopenharmony_ci         desc->inputAttachmentCount + desc->colorAttachmentCount +
805bf215546Sopenharmony_ci         (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
806bf215546Sopenharmony_ci         (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);
807bf215546Sopenharmony_ci   }
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci   if (subpass_attachment_count) {
810bf215546Sopenharmony_ci      pass->subpass_attachments = vk_alloc2(
811bf215546Sopenharmony_ci         &device->vk.alloc, pAllocator,
812bf215546Sopenharmony_ci         subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
813bf215546Sopenharmony_ci         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
814bf215546Sopenharmony_ci      if (pass->subpass_attachments == NULL) {
815bf215546Sopenharmony_ci         vk_object_free(&device->vk, pAllocator, pass);
816bf215546Sopenharmony_ci         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
817bf215546Sopenharmony_ci      }
818bf215546Sopenharmony_ci   } else
819bf215546Sopenharmony_ci      pass->subpass_attachments = NULL;
820bf215546Sopenharmony_ci
821bf215546Sopenharmony_ci   p = pass->subpass_attachments;
822bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
823bf215546Sopenharmony_ci      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
824bf215546Sopenharmony_ci      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
825bf215546Sopenharmony_ci         vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
826bf215546Sopenharmony_ci      struct tu_subpass *subpass = &pass->subpasses[i];
827bf215546Sopenharmony_ci
828bf215546Sopenharmony_ci      subpass->input_count = desc->inputAttachmentCount;
829bf215546Sopenharmony_ci      subpass->color_count = desc->colorAttachmentCount;
830bf215546Sopenharmony_ci      subpass->resolve_count = 0;
831bf215546Sopenharmony_ci      subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);
832bf215546Sopenharmony_ci      subpass->samples = 0;
833bf215546Sopenharmony_ci      subpass->srgb_cntl = 0;
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci      const VkSubpassDescriptionFlagBits raster_order_access_bits =
836bf215546Sopenharmony_ci         VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM |
837bf215546Sopenharmony_ci         VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM |
838bf215546Sopenharmony_ci         VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM;
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci      subpass->raster_order_attachment_access = desc->flags & raster_order_access_bits;
841bf215546Sopenharmony_ci
842bf215546Sopenharmony_ci      subpass->multiview_mask = desc->viewMask;
843bf215546Sopenharmony_ci
844bf215546Sopenharmony_ci      if (desc->inputAttachmentCount > 0) {
845bf215546Sopenharmony_ci         subpass->input_attachments = p;
846bf215546Sopenharmony_ci         p += desc->inputAttachmentCount;
847bf215546Sopenharmony_ci
848bf215546Sopenharmony_ci         for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
849bf215546Sopenharmony_ci            uint32_t a = desc->pInputAttachments[j].attachment;
850bf215546Sopenharmony_ci            subpass->input_attachments[j].attachment = a;
851bf215546Sopenharmony_ci            /* Note: attachments only used as input attachments will be read
852bf215546Sopenharmony_ci             * directly instead of through gmem, so we don't mark input
853bf215546Sopenharmony_ci             * attachments as needing gmem.
854bf215546Sopenharmony_ci             */
855bf215546Sopenharmony_ci         }
856bf215546Sopenharmony_ci      }
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci      if (desc->colorAttachmentCount > 0) {
859bf215546Sopenharmony_ci         subpass->color_attachments = p;
860bf215546Sopenharmony_ci         p += desc->colorAttachmentCount;
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
863bf215546Sopenharmony_ci            uint32_t a = desc->pColorAttachments[j].attachment;
864bf215546Sopenharmony_ci            subpass->color_attachments[j].attachment = a;
865bf215546Sopenharmony_ci
866bf215546Sopenharmony_ci            if (a != VK_ATTACHMENT_UNUSED) {
867bf215546Sopenharmony_ci               tu_subpass_use_attachment(pass, i, a, pCreateInfo);
868bf215546Sopenharmony_ci
869bf215546Sopenharmony_ci               if (vk_format_is_srgb(pass->attachments[a].format))
870bf215546Sopenharmony_ci                  subpass->srgb_cntl |= 1 << j;
871bf215546Sopenharmony_ci            }
872bf215546Sopenharmony_ci         }
873bf215546Sopenharmony_ci      }
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci      subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;
876bf215546Sopenharmony_ci      if (desc->pResolveAttachments) {
877bf215546Sopenharmony_ci         p += desc->colorAttachmentCount;
878bf215546Sopenharmony_ci         subpass->resolve_count += desc->colorAttachmentCount;
879bf215546Sopenharmony_ci         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
880bf215546Sopenharmony_ci            subpass->resolve_attachments[j].attachment =
881bf215546Sopenharmony_ci                  desc->pResolveAttachments[j].attachment;
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci            uint32_t src_a = desc->pColorAttachments[j].attachment;
884bf215546Sopenharmony_ci            if (src_a != VK_ATTACHMENT_UNUSED) {
885bf215546Sopenharmony_ci               pass->attachments[src_a].will_be_resolved =
886bf215546Sopenharmony_ci                  desc->pResolveAttachments[j].attachment != VK_ATTACHMENT_UNUSED;
887bf215546Sopenharmony_ci            }
888bf215546Sopenharmony_ci         }
889bf215546Sopenharmony_ci      }
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci      if (subpass->resolve_depth_stencil) {
892bf215546Sopenharmony_ci         p++;
893bf215546Sopenharmony_ci         subpass->resolve_count++;
894bf215546Sopenharmony_ci         uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
895bf215546Sopenharmony_ci         subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci         uint32_t src_a = desc->pDepthStencilAttachment->attachment;
898bf215546Sopenharmony_ci         if (src_a != VK_ATTACHMENT_UNUSED) {
899bf215546Sopenharmony_ci            pass->attachments[src_a].will_be_resolved = a != VK_ATTACHMENT_UNUSED;
900bf215546Sopenharmony_ci         }
901bf215546Sopenharmony_ci      }
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci      uint32_t a = desc->pDepthStencilAttachment ?
904bf215546Sopenharmony_ci         desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
905bf215546Sopenharmony_ci      subpass->depth_stencil_attachment.attachment = a;
906bf215546Sopenharmony_ci      if (a != VK_ATTACHMENT_UNUSED)
907bf215546Sopenharmony_ci         tu_subpass_use_attachment(pass, i, a, pCreateInfo);
908bf215546Sopenharmony_ci   }
909bf215546Sopenharmony_ci
910bf215546Sopenharmony_ci   tu_render_pass_patch_input_gmem(pass);
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci   tu_render_pass_check_feedback_loop(pass);
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci   /* disable unused attachments */
915bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pass->attachment_count; i++) {
916bf215546Sopenharmony_ci      struct tu_render_pass_attachment *att = &pass->attachments[i];
917bf215546Sopenharmony_ci      if (!att->gmem) {
918bf215546Sopenharmony_ci         att->clear_mask = 0;
919bf215546Sopenharmony_ci         att->load = false;
920bf215546Sopenharmony_ci      }
921bf215546Sopenharmony_ci   }
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci   tu_render_pass_cond_config(pass);
924bf215546Sopenharmony_ci   tu_render_pass_gmem_config(pass, device->physical_device);
925bf215546Sopenharmony_ci   tu_render_pass_bandwidth_config(pass);
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
928bf215546Sopenharmony_ci      tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
929bf215546Sopenharmony_ci   }
930bf215546Sopenharmony_ci
931bf215546Sopenharmony_ci   tu_render_pass_add_implicit_deps(pass, pCreateInfo);
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   *pRenderPass = tu_render_pass_to_handle(pass);
934bf215546Sopenharmony_ci
935bf215546Sopenharmony_ci   return VK_SUCCESS;
936bf215546Sopenharmony_ci}
937bf215546Sopenharmony_ci
938bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
939bf215546Sopenharmony_citu_DestroyRenderPass(VkDevice _device,
940bf215546Sopenharmony_ci                     VkRenderPass _pass,
941bf215546Sopenharmony_ci                     const VkAllocationCallbacks *pAllocator)
942bf215546Sopenharmony_ci{
943bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_device, device, _device);
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci   if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
946bf215546Sopenharmony_ci      vk_common_DestroyRenderPass(_device, _pass, pAllocator);
947bf215546Sopenharmony_ci      return;
948bf215546Sopenharmony_ci   }
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_render_pass, pass, _pass);
951bf215546Sopenharmony_ci
952bf215546Sopenharmony_ci   if (!_pass)
953bf215546Sopenharmony_ci      return;
954bf215546Sopenharmony_ci
955bf215546Sopenharmony_ci   vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
956bf215546Sopenharmony_ci   vk_object_free(&device->vk, pAllocator, pass);
957bf215546Sopenharmony_ci}
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_cistatic void
960bf215546Sopenharmony_citu_setup_dynamic_attachment(struct tu_render_pass_attachment *att,
961bf215546Sopenharmony_ci                            struct tu_image_view *view)
962bf215546Sopenharmony_ci{
963bf215546Sopenharmony_ci   att->format = view->vk.format;
964bf215546Sopenharmony_ci   att->samples = view->image->layout->nr_samples;
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci   /* for d32s8, cpp is for the depth image, and
967bf215546Sopenharmony_ci    * att->samples will be used as the cpp for the stencil image
968bf215546Sopenharmony_ci    */
969bf215546Sopenharmony_ci   if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
970bf215546Sopenharmony_ci      att->cpp = 4 * att->samples;
971bf215546Sopenharmony_ci   else
972bf215546Sopenharmony_ci      att->cpp = vk_format_get_blocksize(att->format) * att->samples;
973bf215546Sopenharmony_ci}
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_civoid
976bf215546Sopenharmony_citu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
977bf215546Sopenharmony_ci                             const VkRenderingInfo *info)
978bf215546Sopenharmony_ci{
979bf215546Sopenharmony_ci   struct tu_device *device = cmd_buffer->device;
980bf215546Sopenharmony_ci   struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
981bf215546Sopenharmony_ci   struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
982bf215546Sopenharmony_ci
983bf215546Sopenharmony_ci   pass->subpass_count = 1;
984bf215546Sopenharmony_ci   pass->attachments = cmd_buffer->dynamic_rp_attachments;
985bf215546Sopenharmony_ci
986bf215546Sopenharmony_ci   subpass->color_count = subpass->resolve_count = info->colorAttachmentCount;
987bf215546Sopenharmony_ci   subpass->resolve_depth_stencil = false;
988bf215546Sopenharmony_ci   subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
989bf215546Sopenharmony_ci   subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments;
990bf215546Sopenharmony_ci   subpass->feedback_invalidate = false;
991bf215546Sopenharmony_ci   subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
992bf215546Sopenharmony_ci   subpass->input_count = 0;
993bf215546Sopenharmony_ci   subpass->samples = 0;
994bf215546Sopenharmony_ci   subpass->srgb_cntl = 0;
995bf215546Sopenharmony_ci   subpass->raster_order_attachment_access = false;
996bf215546Sopenharmony_ci   subpass->multiview_mask = info->viewMask;
997bf215546Sopenharmony_ci
998bf215546Sopenharmony_ci   uint32_t a = 0;
999bf215546Sopenharmony_ci   for (uint32_t i = 0; i < info->colorAttachmentCount; i++) {
1000bf215546Sopenharmony_ci      struct tu_render_pass_attachment *att = &pass->attachments[a];
1001bf215546Sopenharmony_ci      const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i];
1002bf215546Sopenharmony_ci
1003bf215546Sopenharmony_ci      if (att_info->imageView == VK_NULL_HANDLE) {
1004bf215546Sopenharmony_ci         subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1005bf215546Sopenharmony_ci         subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1006bf215546Sopenharmony_ci         continue;
1007bf215546Sopenharmony_ci      }
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci      TU_FROM_HANDLE(tu_image_view, view, att_info->imageView);
1010bf215546Sopenharmony_ci      tu_setup_dynamic_attachment(att, view);
1011bf215546Sopenharmony_ci      att->gmem = true;
1012bf215546Sopenharmony_ci      att->clear_views = info->viewMask;
1013bf215546Sopenharmony_ci      attachment_set_ops(device, att, att_info->loadOp, 0,
1014bf215546Sopenharmony_ci                         att_info->storeOp, 0);
1015bf215546Sopenharmony_ci      subpass->color_attachments[i].attachment = a++;
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_ci      subpass->samples = view->image->layout->nr_samples;
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_ci      if (vk_format_is_srgb(view->vk.format))
1020bf215546Sopenharmony_ci         subpass->srgb_cntl |= 1 << i;
1021bf215546Sopenharmony_ci
1022bf215546Sopenharmony_ci      if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1023bf215546Sopenharmony_ci         struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1024bf215546Sopenharmony_ci         TU_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView);
1025bf215546Sopenharmony_ci         tu_setup_dynamic_attachment(resolve_att, resolve_view);
1026bf215546Sopenharmony_ci         resolve_att->gmem = false;
1027bf215546Sopenharmony_ci         attachment_set_ops(device, resolve_att,
1028bf215546Sopenharmony_ci                            VK_ATTACHMENT_LOAD_OP_DONT_CARE, 0,
1029bf215546Sopenharmony_ci                            VK_ATTACHMENT_STORE_OP_STORE, 0);
1030bf215546Sopenharmony_ci         subpass->resolve_attachments[i].attachment = a++;
1031bf215546Sopenharmony_ci         att->will_be_resolved = true;
1032bf215546Sopenharmony_ci      } else {
1033bf215546Sopenharmony_ci         subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1034bf215546Sopenharmony_ci         att->will_be_resolved = false;
1035bf215546Sopenharmony_ci      }
1036bf215546Sopenharmony_ci   }
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci   if (info->pDepthAttachment || info->pStencilAttachment) {
1039bf215546Sopenharmony_ci      const struct VkRenderingAttachmentInfo *common_info =
1040bf215546Sopenharmony_ci         (info->pDepthAttachment &&
1041bf215546Sopenharmony_ci          info->pDepthAttachment->imageView != VK_NULL_HANDLE) ?
1042bf215546Sopenharmony_ci         info->pDepthAttachment :
1043bf215546Sopenharmony_ci         info->pStencilAttachment;
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_ci      if (common_info && common_info->imageView != VK_NULL_HANDLE) {
1046bf215546Sopenharmony_ci         TU_FROM_HANDLE(tu_image_view, view, common_info->imageView);
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_ci         struct tu_render_pass_attachment *att = &pass->attachments[a];
1049bf215546Sopenharmony_ci         tu_setup_dynamic_attachment(att, view);
1050bf215546Sopenharmony_ci         att->gmem = true;
1051bf215546Sopenharmony_ci         att->clear_views = info->viewMask;
1052bf215546Sopenharmony_ci         subpass->depth_stencil_attachment.attachment = a++;
1053bf215546Sopenharmony_ci
1054bf215546Sopenharmony_ci         attachment_set_ops(device, att,
1055bf215546Sopenharmony_ci                            info->pDepthAttachment ? info->pDepthAttachment->loadOp : 0,
1056bf215546Sopenharmony_ci                            info->pStencilAttachment ? info->pStencilAttachment->loadOp : 0,
1057bf215546Sopenharmony_ci                            info->pDepthAttachment ? info->pDepthAttachment->storeOp : 0,
1058bf215546Sopenharmony_ci                            info->pStencilAttachment ? info->pStencilAttachment->storeOp : 0);
1059bf215546Sopenharmony_ci
1060bf215546Sopenharmony_ci         subpass->samples = view->image->layout->nr_samples;
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_ci         if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1063bf215546Sopenharmony_ci            unsigned i = subpass->resolve_count++;
1064bf215546Sopenharmony_ci            struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1065bf215546Sopenharmony_ci            TU_FROM_HANDLE(tu_image_view, resolve_view,
1066bf215546Sopenharmony_ci                           common_info->resolveImageView);
1067bf215546Sopenharmony_ci            tu_setup_dynamic_attachment(resolve_att, resolve_view);
1068bf215546Sopenharmony_ci            resolve_att->gmem = false;
1069bf215546Sopenharmony_ci            attachment_set_ops(device, resolve_att,
1070bf215546Sopenharmony_ci                               VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1071bf215546Sopenharmony_ci                               VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1072bf215546Sopenharmony_ci                               VK_ATTACHMENT_STORE_OP_STORE,
1073bf215546Sopenharmony_ci                               VK_ATTACHMENT_STORE_OP_STORE);
1074bf215546Sopenharmony_ci            subpass->resolve_attachments[i].attachment = a++;
1075bf215546Sopenharmony_ci            att->will_be_resolved = true;
1076bf215546Sopenharmony_ci            subpass->resolve_depth_stencil = true;
1077bf215546Sopenharmony_ci         } else {
1078bf215546Sopenharmony_ci            att->will_be_resolved = false;
1079bf215546Sopenharmony_ci         }
1080bf215546Sopenharmony_ci      } else {
1081bf215546Sopenharmony_ci         subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1082bf215546Sopenharmony_ci      }
1083bf215546Sopenharmony_ci   } else {
1084bf215546Sopenharmony_ci      subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1085bf215546Sopenharmony_ci   }
1086bf215546Sopenharmony_ci
1087bf215546Sopenharmony_ci   pass->attachment_count = a;
1088bf215546Sopenharmony_ci
1089bf215546Sopenharmony_ci   tu_render_pass_cond_config(pass);
1090bf215546Sopenharmony_ci   tu_render_pass_gmem_config(pass, device->physical_device);
1091bf215546Sopenharmony_ci   tu_render_pass_bandwidth_config(pass);
1092bf215546Sopenharmony_ci}
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_civoid
1095bf215546Sopenharmony_citu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer,
1096bf215546Sopenharmony_ci                             const VkCommandBufferInheritanceRenderingInfo *info)
1097bf215546Sopenharmony_ci{
1098bf215546Sopenharmony_ci   struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
1099bf215546Sopenharmony_ci   struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
1100bf215546Sopenharmony_ci
1101bf215546Sopenharmony_ci   pass->subpass_count = 1;
1102bf215546Sopenharmony_ci   pass->attachments = cmd_buffer->dynamic_rp_attachments;
1103bf215546Sopenharmony_ci
1104bf215546Sopenharmony_ci   subpass->color_count = info->colorAttachmentCount;
1105bf215546Sopenharmony_ci   subpass->resolve_count = 0;
1106bf215546Sopenharmony_ci   subpass->resolve_depth_stencil = false;
1107bf215546Sopenharmony_ci   subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
1108bf215546Sopenharmony_ci   subpass->resolve_attachments = NULL;
1109bf215546Sopenharmony_ci   subpass->feedback_invalidate = false;
1110bf215546Sopenharmony_ci   subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
1111bf215546Sopenharmony_ci   subpass->input_count = 0;
1112bf215546Sopenharmony_ci   subpass->samples = 0;
1113bf215546Sopenharmony_ci   subpass->srgb_cntl = 0;
1114bf215546Sopenharmony_ci   subpass->raster_order_attachment_access = false;
1115bf215546Sopenharmony_ci   subpass->multiview_mask = info->viewMask;
1116bf215546Sopenharmony_ci   subpass->samples = info->rasterizationSamples;
1117bf215546Sopenharmony_ci
1118bf215546Sopenharmony_ci   unsigned a = 0;
1119bf215546Sopenharmony_ci   for (unsigned i = 0; i < info->colorAttachmentCount; i++) {
1120bf215546Sopenharmony_ci      struct tu_render_pass_attachment *att = &pass->attachments[a];
1121bf215546Sopenharmony_ci      VkFormat format = info->pColorAttachmentFormats[i];
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci      if (format == VK_FORMAT_UNDEFINED) {
1124bf215546Sopenharmony_ci         subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1125bf215546Sopenharmony_ci         continue;
1126bf215546Sopenharmony_ci      }
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_ci      att->format = format;
1129bf215546Sopenharmony_ci      att->samples = info->rasterizationSamples;
1130bf215546Sopenharmony_ci      subpass->samples = info->rasterizationSamples;
1131bf215546Sopenharmony_ci      subpass->color_attachments[i].attachment = a++;
1132bf215546Sopenharmony_ci
1133bf215546Sopenharmony_ci      /* conservatively assume that the attachment may be conditionally
1134bf215546Sopenharmony_ci       * loaded/stored.
1135bf215546Sopenharmony_ci       */
1136bf215546Sopenharmony_ci      att->cond_load_allowed = att->cond_store_allowed = true;
1137bf215546Sopenharmony_ci   }
1138bf215546Sopenharmony_ci
1139bf215546Sopenharmony_ci   if (info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ||
1140bf215546Sopenharmony_ci       info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) {
1141bf215546Sopenharmony_ci      struct tu_render_pass_attachment *att = &pass->attachments[a];
1142bf215546Sopenharmony_ci      att->format = info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ?
1143bf215546Sopenharmony_ci         info->depthAttachmentFormat : info->stencilAttachmentFormat;
1144bf215546Sopenharmony_ci      att->samples = info->rasterizationSamples;
1145bf215546Sopenharmony_ci      subpass->depth_stencil_attachment.attachment = a++;
1146bf215546Sopenharmony_ci      att->cond_load_allowed = att->cond_store_allowed = true;
1147bf215546Sopenharmony_ci   } else {
1148bf215546Sopenharmony_ci      subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1149bf215546Sopenharmony_ci   }
1150bf215546Sopenharmony_ci}
1151bf215546Sopenharmony_ci
1152bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
1153bf215546Sopenharmony_citu_GetRenderAreaGranularity(VkDevice _device,
1154bf215546Sopenharmony_ci                            VkRenderPass renderPass,
1155bf215546Sopenharmony_ci                            VkExtent2D *pGranularity)
1156bf215546Sopenharmony_ci{
1157bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_device, device, _device);
1158bf215546Sopenharmony_ci   pGranularity->width = device->physical_device->info->gmem_align_w;
1159bf215546Sopenharmony_ci   pGranularity->height = device->physical_device->info->gmem_align_h;
1160bf215546Sopenharmony_ci}
1161bf215546Sopenharmony_ci
1162bf215546Sopenharmony_ciuint32_t
1163bf215546Sopenharmony_citu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)
1164bf215546Sopenharmony_ci{
1165bf215546Sopenharmony_ci   if (subpass->resolve_depth_stencil &&
1166bf215546Sopenharmony_ci       index == (subpass->resolve_count - 1))
1167bf215546Sopenharmony_ci      return subpass->depth_stencil_attachment.attachment;
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ci   return subpass->color_attachments[index].attachment;
1170bf215546Sopenharmony_ci}
1171