1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10#include "tu_pass.h"
11
12#include "vk_util.h"
13
14#include "tu_cmd_buffer.h"
15#include "tu_device.h"
16#include "tu_image.h"
17
18/* Return true if we have to fallback to sysmem rendering because the
19 * dependency can't be satisfied with tiled rendering.
20 */
21
22static bool
23dep_invalid_for_gmem(const VkSubpassDependency2 *dep,
24                     VkPipelineStageFlags2 src_stage_mask,
25                     VkPipelineStageFlags2 dst_stage_mask)
26{
27   /* External dependencies don't matter here. */
28   if (dep->srcSubpass == VK_SUBPASS_EXTERNAL ||
29       dep->dstSubpass == VK_SUBPASS_EXTERNAL)
30      return false;
31
32   /* We can conceptually break down the process of rewriting a sysmem
33    * renderpass into a gmem one into two parts:
34    *
35    * 1. Split each draw and multisample resolve into N copies, one for each
36    * bin. (If hardware binning, add one more copy where the FS is disabled
37    * for the binning pass). This is always allowed because the vertex stage
38    * is allowed to run an arbitrary number of times and there are no extra
39    * ordering constraints within a draw.
40    * 2. Take the last copy of the second-to-last draw and slide it down to
41    * before the last copy of the last draw. Repeat for each earlier draw
42    * until the draw pass for the last bin is complete, then repeat for each
43    * earlier bin until we finish with the first bin.
44    *
45    * During this rearranging process, we can't slide draws past each other in
46    * a way that breaks the subpass dependencies. For each draw, we must slide
47    * it past (copies of) the rest of the draws in the renderpass. We can
48    * slide a draw past another if there isn't a dependency between them, or
49    * if the dependenc(ies) are dependencies between framebuffer-space stages
50    * only with the BY_REGION bit set. Note that this includes
51    * self-dependencies, since these may result in pipeline barriers that also
52    * break the rearranging process.
53    */
54
55   /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer
56    * Region Dependencies":
57    */
58   const VkPipelineStageFlags2 framebuffer_space_stages =
59      VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
60      VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
61      VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
62      VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
63
64   return
65      (src_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) ||
66      (dst_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)) ||
67      !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT);
68}
69
70static void
71tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
72                               const VkSubpassDependency2 *dep)
73{
74   uint32_t src = dep->srcSubpass;
75   uint32_t dst = dep->dstSubpass;
76
77   /* Ignore subpass self-dependencies as they allow the app to call
78    * vkCmdPipelineBarrier() inside the render pass and the driver should only
79    * do the barrier when called, not when starting the render pass.
80    *
81    * We cannot decide whether to allow gmem rendering before a barrier
82    * is actually emitted, so we delay the decision until then.
83    */
84   if (src == dst)
85      return;
86
87   /* From the Vulkan 1.2.195 spec:
88    *
89    * "If an instance of VkMemoryBarrier2 is included in the pNext chain, srcStageMask,
90    *  dstStageMask, srcAccessMask, and dstAccessMask parameters are ignored. The synchronization
91    *  and access scopes instead are defined by the parameters of VkMemoryBarrier2."
92    */
93   const VkMemoryBarrier2 *barrier =
94      vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2);
95   VkPipelineStageFlags2 src_stage_mask = barrier ? barrier->srcStageMask : dep->srcStageMask;
96   VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask;
97   VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask;
98   VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask;
99
100   if (dep_invalid_for_gmem(dep, src_stage_mask, dst_stage_mask)) {
101      perf_debug((struct tu_device *)pass->base.device, "Disabling gmem rendering due to invalid subpass dependency");
102      for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++)
103         pass->gmem_pixels[i] = 0;
104   }
105
106   struct tu_subpass_barrier *dst_barrier;
107   if (dst == VK_SUBPASS_EXTERNAL) {
108      dst_barrier = &pass->end_barrier;
109   } else {
110      dst_barrier = &pass->subpasses[dst].start_barrier;
111   }
112
113   dst_barrier->src_stage_mask |= src_stage_mask;
114   dst_barrier->dst_stage_mask |= dst_stage_mask;
115   dst_barrier->src_access_mask |= src_access_mask;
116   dst_barrier->dst_access_mask |= dst_access_mask;
117}
118
119/* We currently only care about undefined layouts, because we have to
120 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
121 * UNDEFINED for anything not linear tiled, but we don't know yet whether the
122 * images used are tiled, so just assume they are.
123 */
124
125static bool
126layout_undefined(VkImageLayout layout)
127{
128   return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
129          layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
130}
131
132/* This implements the following bit of spec text:
133 *
134 *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
135 *    first subpass that uses an attachment, then an implicit subpass
136 *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
137 *    used in. The implicit subpass dependency only exists if there
138 *    exists an automatic layout transition away from initialLayout.
139 *    The subpass dependency operates as if defined with the
140 *    following parameters:
141 *
142 *    VkSubpassDependency implicitDependency = {
143 *        .srcSubpass = VK_SUBPASS_EXTERNAL;
144 *        .dstSubpass = firstSubpass; // First subpass attachment is used in
145 *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
146 *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
147 *        .srcAccessMask = 0;
148 *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
149 *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
150 *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
151 *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
152 *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
153 *        .dependencyFlags = 0;
154 *    };
155 *
156 *    Similarly, if there is no subpass dependency from the last subpass
157 *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
158 *    subpass dependency exists from the last subpass it is used in to
159 *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
160 *    if there exists an automatic layout transition into finalLayout.
161 *    The subpass dependency operates as if defined with the following
162 *    parameters:
163 *
164 *    VkSubpassDependency implicitDependency = {
165 *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
166 *        .dstSubpass = VK_SUBPASS_EXTERNAL;
167 *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
168 *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
169 *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
170 *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
171 *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
172 *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
173 *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
174 *        .dstAccessMask = 0;
175 *        .dependencyFlags = 0;
176 *    };
177 *
178 * Note: currently this is the only use we have for layout transitions,
179 * besides needing to invalidate CCU at the beginning, so we also flag
180 * transitions from UNDEFINED here.
181 */
182static void
183tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
184                                 const VkRenderPassCreateInfo2 *info)
185{
186   const VkAttachmentDescription2* att = info->pAttachments;
187   bool has_external_src[info->subpassCount];
188   bool has_external_dst[info->subpassCount];
189   bool att_used[pass->attachment_count];
190
191   memset(has_external_src, 0, sizeof(has_external_src));
192   memset(has_external_dst, 0, sizeof(has_external_dst));
193
194   for (uint32_t i = 0; i < info->dependencyCount; i++) {
195      uint32_t src = info->pDependencies[i].srcSubpass;
196      uint32_t dst = info->pDependencies[i].dstSubpass;
197
198      if (src == dst)
199         continue;
200
201      if (src == VK_SUBPASS_EXTERNAL)
202         has_external_src[dst] = true;
203      if (dst == VK_SUBPASS_EXTERNAL)
204         has_external_dst[src] = true;
205   }
206
207   memset(att_used, 0, sizeof(att_used));
208
209   for (unsigned i = 0; i < info->subpassCount; i++) {
210      const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
211      bool src_implicit_dep = false;
212
213      for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
214         uint32_t a = subpass->pInputAttachments[j].attachment;
215
216         if (a == VK_ATTACHMENT_UNUSED)
217            continue;
218
219         uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
220               vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
221               VK_IMAGE_LAYOUT_UNDEFINED;
222         uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
223
224         if ((att[a].initialLayout != subpass->pInputAttachments[j].layout ||
225             stencil_initial_layout != stencil_layout) &&
226             !att_used[a] && !has_external_src[i])
227            src_implicit_dep = true;
228         att_used[a] = true;
229      }
230
231      for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
232         uint32_t a = subpass->pColorAttachments[j].attachment;
233         if (a == VK_ATTACHMENT_UNUSED)
234            continue;
235         if (att[a].initialLayout != subpass->pColorAttachments[j].layout &&
236             !att_used[a] && !has_external_src[i])
237            src_implicit_dep = true;
238         att_used[a] = true;
239      }
240
241      if (subpass->pDepthStencilAttachment &&
242          subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
243         uint32_t a = subpass->pDepthStencilAttachment->attachment;
244         uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
245         uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
246
247         if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
248             stencil_initial_layout != stencil_layout) &&
249             !att_used[a] && !has_external_src[i]) {
250            src_implicit_dep = true;
251         }
252         att_used[a] = true;
253      }
254
255      if (subpass->pResolveAttachments) {
256         for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
257            uint32_t a = subpass->pResolveAttachments[j].attachment;
258            if (a == VK_ATTACHMENT_UNUSED)
259               continue;
260            if (att[a].initialLayout != subpass->pResolveAttachments[j].layout &&
261               !att_used[a] && !has_external_src[i])
262               src_implicit_dep = true;
263            att_used[a] = true;
264         }
265      }
266
267      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
268         vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
269
270      if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
271          ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
272            uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
273            uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
274            uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
275
276            if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
277                stencil_initial_layout != stencil_layout) &&
278                !att_used[a] && !has_external_src[i])
279               src_implicit_dep = true;
280            att_used[a] = true;
281      }
282
283      if (src_implicit_dep) {
284         tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) {
285            .srcSubpass = VK_SUBPASS_EXTERNAL,
286            .dstSubpass = i,
287            .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
288            .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
289            .srcAccessMask = 0,
290            .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
291                             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
292                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
293                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
294                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
295            .dependencyFlags = 0,
296         });
297      }
298   }
299
300   memset(att_used, 0, sizeof(att_used));
301
302   for (int i = info->subpassCount - 1; i >= 0; i--) {
303      const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
304      bool dst_implicit_dep = false;
305
306      for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
307         uint32_t a = subpass->pInputAttachments[j].attachment;
308         if (a == VK_ATTACHMENT_UNUSED)
309            continue;
310
311         uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
312               vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
313               VK_IMAGE_LAYOUT_UNDEFINED;
314         uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
315
316         if ((att[a].finalLayout != subpass->pInputAttachments[j].layout ||
317             stencil_final_layout != stencil_layout) &&
318             !att_used[a] && !has_external_dst[i])
319            dst_implicit_dep = true;
320         att_used[a] = true;
321      }
322
323      for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
324         uint32_t a = subpass->pColorAttachments[j].attachment;
325         if (a == VK_ATTACHMENT_UNUSED)
326            continue;
327         if (att[a].finalLayout != subpass->pColorAttachments[j].layout &&
328             !att_used[a] && !has_external_dst[i])
329            dst_implicit_dep = true;
330         att_used[a] = true;
331      }
332
333      if (subpass->pDepthStencilAttachment &&
334          subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
335         uint32_t a = subpass->pDepthStencilAttachment->attachment;
336         uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
337         uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
338
339         if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
340             stencil_final_layout != stencil_layout) &&
341             !att_used[a] && !has_external_dst[i]) {
342            dst_implicit_dep = true;
343         }
344         att_used[a] = true;
345      }
346
347      if (subpass->pResolveAttachments) {
348         for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
349            uint32_t a = subpass->pResolveAttachments[j].attachment;
350            if (a == VK_ATTACHMENT_UNUSED)
351               continue;
352            if (att[a].finalLayout != subpass->pResolveAttachments[j].layout &&
353                !att_used[a] && !has_external_dst[i])
354               dst_implicit_dep = true;
355            att_used[a] = true;
356         }
357      }
358
359      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
360         vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
361
362      if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
363          ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
364            uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
365            uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
366            uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
367
368            if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
369                stencil_final_layout != stencil_layout) &&
370                !att_used[a] && !has_external_src[i])
371               dst_implicit_dep = true;
372            att_used[a] = true;
373      }
374
375      if (dst_implicit_dep) {
376         tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) {
377            .srcSubpass = i,
378            .dstSubpass = VK_SUBPASS_EXTERNAL,
379            .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
380            .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
381            .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
382                             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
383                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
384                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
385                             VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
386            .dstAccessMask = 0,
387            .dependencyFlags = 0,
388         });
389      }
390   }
391
392   /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
393    * Assume that if an attachment has an initial layout of UNDEFINED, it gets
394    * transitioned eventually.
395    */
396   for (unsigned i = 0; i < info->attachmentCount; i++) {
397      if (layout_undefined(att[i].initialLayout)) {
398         if (vk_format_is_depth_or_stencil(att[i].format)) {
399            pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
400         } else {
401            pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
402         }
403      }
404   }
405}
406
407/* If an input attachment is used without an intervening write to the same
408 * attachment, then we can just use the original image, even in GMEM mode.
409 * This is an optimization, but it's also important because it allows us to
410 * avoid having to invalidate UCHE at the beginning of each tile due to it
411 * becoming invalid. The only reads of GMEM via UCHE should be after an
412 * earlier subpass modified it, which only works if there's already an
413 * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
414 * don't consider this in the dependency code, so this is also required for
415 * correctness.
416 */
417static void
418tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
419{
420   bool written[pass->attachment_count];
421
422   memset(written, 0, sizeof(written));
423
424   for (unsigned i = 0; i < pass->subpass_count; i++) {
425      struct tu_subpass *subpass = &pass->subpasses[i];
426
427      for (unsigned j = 0; j < subpass->input_count; j++) {
428         uint32_t a = subpass->input_attachments[j].attachment;
429         if (a == VK_ATTACHMENT_UNUSED)
430            continue;
431         subpass->input_attachments[j].patch_input_gmem = written[a];
432      }
433
434      for (unsigned j = 0; j < subpass->color_count; j++) {
435         uint32_t a = subpass->color_attachments[j].attachment;
436         if (a == VK_ATTACHMENT_UNUSED)
437            continue;
438         written[a] = true;
439
440         for (unsigned k = 0; k < subpass->input_count; k++) {
441            if (subpass->input_attachments[k].attachment == a &&
442                !subpass->input_attachments[k].patch_input_gmem) {
443               /* For render feedback loops, we have no idea whether the use
444                * as a color attachment or input attachment will come first,
445                * so we have to always use GMEM in case the color attachment
446                * comes first and defensively invalidate UCHE in case the
447                * input attachment comes first.
448                */
449               subpass->feedback_invalidate = true;
450               subpass->input_attachments[k].patch_input_gmem = true;
451            }
452         }
453      }
454
455      for (unsigned j = 0; j < subpass->resolve_count; j++) {
456         uint32_t a = subpass->resolve_attachments[j].attachment;
457         if (a == VK_ATTACHMENT_UNUSED)
458            continue;
459         written[a] = true;
460      }
461
462      if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
463         written[subpass->depth_stencil_attachment.attachment] = true;
464         for (unsigned k = 0; k < subpass->input_count; k++) {
465            if (subpass->input_attachments[k].attachment ==
466                subpass->depth_stencil_attachment.attachment &&
467                !subpass->input_attachments[k].patch_input_gmem) {
468               subpass->feedback_invalidate = true;
469               subpass->input_attachments[k].patch_input_gmem = true;
470            }
471         }
472      }
473   }
474}
475
476static void
477tu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
478{
479   for (unsigned i = 0; i < pass->subpass_count; i++) {
480      struct tu_subpass *subpass = &pass->subpasses[i];
481
482      for (unsigned j = 0; j < subpass->color_count; j++) {
483         uint32_t a = subpass->color_attachments[j].attachment;
484         if (a == VK_ATTACHMENT_UNUSED)
485            continue;
486         for (unsigned k = 0; k < subpass->input_count; k++) {
487            if (subpass->input_attachments[k].attachment == a) {
488               subpass->feedback_loop_color = true;
489               break;
490            }
491         }
492      }
493
494      if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
495         for (unsigned k = 0; k < subpass->input_count; k++) {
496            if (subpass->input_attachments[k].attachment ==
497                subpass->depth_stencil_attachment.attachment) {
498               subpass->feedback_loop_ds = true;
499               break;
500            }
501         }
502      }
503   }
504}
505
506static void update_samples(struct tu_subpass *subpass,
507                           VkSampleCountFlagBits samples)
508{
509   assert(subpass->samples == 0 || subpass->samples == samples);
510   subpass->samples = samples;
511}
512
513static void
514tu_render_pass_cond_config(struct tu_render_pass *pass)
515{
516   for (uint32_t i = 0; i < pass->attachment_count; i++) {
517      struct tu_render_pass_attachment *att = &pass->attachments[i];
518
519      att->cond_load_allowed =
520         (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
521      att->cond_store_allowed =
522         (att->store || att->store_stencil) && !att->clear_mask;
523   }
524}
525
526static void
527tu_render_pass_gmem_config(struct tu_render_pass *pass,
528                           const struct tu_physical_device *phys_dev)
529{
530   for (enum tu_gmem_layout layout = 0; layout < TU_GMEM_LAYOUT_COUNT;
531        layout++) {
532      /* From the VK_KHR_multiview spec:
533       *
534       *    Multiview is all-or-nothing for a render pass - that is, either all
535       *    subpasses must have a non-zero view mask (though some subpasses may
536       *    have only one view) or all must be zero.
537       *
538       * This means we only have to check one of the view masks.
539       */
540      if (pass->subpasses[0].multiview_mask) {
541         /* It seems multiview must use sysmem rendering. */
542         pass->gmem_pixels[layout] = 0;
543         continue;
544      }
545
546      /* log2(gmem_align/(tile_align_w*tile_align_h)) */
547      uint32_t block_align_shift = 3;
548      uint32_t tile_align_w = phys_dev->info->tile_align_w;
549      uint32_t gmem_align = (1 << block_align_shift) * tile_align_w *
550                            phys_dev->info->tile_align_h;
551
552      /* calculate total bytes per pixel */
553      uint32_t cpp_total = 0;
554      for (uint32_t i = 0; i < pass->attachment_count; i++) {
555         struct tu_render_pass_attachment *att = &pass->attachments[i];
556         bool cpp1 = (att->cpp == 1);
557         if (att->gmem) {
558            cpp_total += att->cpp;
559
560            /* take into account the separate stencil: */
561            if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
562               cpp1 = (att->samples == 1);
563               cpp_total += att->samples;
564            }
565
566            /* texture pitch must be aligned to 64, use a tile_align_w that is
567             * a multiple of 64 for cpp==1 attachment to work as input
568             * attachment
569             */
570            if (cpp1 && tile_align_w % 64 != 0) {
571               tile_align_w *= 2;
572               block_align_shift -= 1;
573            }
574         }
575      }
576
577      pass->tile_align_w = tile_align_w;
578
579      /* no gmem attachments */
580      if (cpp_total == 0) {
581         /* any value non-zero value so tiling config works with no
582          * attachments
583          */
584         pass->gmem_pixels[layout] = 1024 * 1024;
585         continue;
586      }
587
588      /* TODO: this algorithm isn't optimal
589       * for example, two attachments with cpp = {1, 4}
590       * result:  nblocks = {12, 52}, pixels = 196608
591       * optimal: nblocks = {13, 51}, pixels = 208896
592       */
593      uint32_t gmem_size = layout == TU_GMEM_LAYOUT_FULL
594                              ? phys_dev->gmem_size
595                              : phys_dev->ccu_offset_gmem;
596      uint32_t gmem_blocks = gmem_size / gmem_align;
597      uint32_t offset = 0, pixels = ~0u, i;
598      for (i = 0; i < pass->attachment_count; i++) {
599         struct tu_render_pass_attachment *att = &pass->attachments[i];
600         if (!att->gmem)
601            continue;
602
603         att->gmem_offset[layout] = offset;
604
605         uint32_t align = MAX2(1, att->cpp >> block_align_shift);
606         uint32_t nblocks =
607            MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
608
609         if (nblocks > gmem_blocks)
610            break;
611
612         gmem_blocks -= nblocks;
613         cpp_total -= att->cpp;
614         offset += nblocks * gmem_align;
615         pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
616
617         /* repeat the same for separate stencil */
618         if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
619            att->gmem_offset_stencil[layout] = offset;
620
621            /* note: for s8_uint, block align is always 1 */
622            uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
623            if (nblocks > gmem_blocks)
624               break;
625
626            gmem_blocks -= nblocks;
627            cpp_total -= att->samples;
628            offset += nblocks * gmem_align;
629            pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
630         }
631      }
632
633      /* if the loop didn't complete then the gmem config is impossible */
634      if (i == pass->attachment_count)
635         pass->gmem_pixels[layout] = pixels;
636   }
637}
638
639static void
640tu_render_pass_bandwidth_config(struct tu_render_pass *pass)
641{
642   for (uint32_t i = 0; i < pass->attachment_count; i++) {
643      const struct tu_render_pass_attachment *att = &pass->attachments[i];
644
645      /* approximate tu_load_gmem_attachment */
646      if (att->load)
647         pass->gmem_bandwidth_per_pixel += att->cpp;
648
649      /* approximate tu_store_gmem_attachment */
650      if (att->store)
651         pass->gmem_bandwidth_per_pixel += att->cpp;
652
653      /* approximate tu_clear_sysmem_attachment */
654      if (att->clear_mask)
655         pass->sysmem_bandwidth_per_pixel += att->cpp;
656
657      /* approximate tu6_emit_sysmem_resolves */
658      if (att->will_be_resolved) {
659         pass->sysmem_bandwidth_per_pixel +=
660            att->cpp + att->cpp / att->samples;
661      }
662   }
663}
664
665static void
666attachment_set_ops(struct tu_device *device,
667                   struct tu_render_pass_attachment *att,
668                   VkAttachmentLoadOp load_op,
669                   VkAttachmentLoadOp stencil_load_op,
670                   VkAttachmentStoreOp store_op,
671                   VkAttachmentStoreOp stencil_store_op)
672{
673   if (device->instance->debug_flags & TU_DEBUG_DONT_CARE_AS_LOAD) {
674      if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
675         load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
676      if (stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
677         stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
678   }
679
680   /* load/store ops */
681   att->clear_mask =
682      (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
683   att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
684   att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
685
686   bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
687   bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
688   bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
689
690   switch (att->format) {
691   case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
692      if (att->clear_mask)
693         att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
694      if (stencil_clear)
695         att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
696      if (stencil_load)
697         att->load = true;
698      if (stencil_store)
699         att->store = true;
700      break;
701   case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
702      att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
703      att->load = stencil_load;
704      att->store = stencil_store;
705      break;
706   case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
707      if (att->clear_mask)
708         att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
709      if (stencil_clear)
710         att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
711      if (stencil_load)
712         att->load_stencil = true;
713      if (stencil_store)
714         att->store_stencil = true;
715      break;
716   default:
717      break;
718   }
719}
720
721static bool
722is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)
723{
724   if (depth_stencil_resolve &&
725       depth_stencil_resolve->pDepthStencilResolveAttachment &&
726       depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
727      return true;
728   }
729   return false;
730}
731
732static void
733tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const VkRenderPassCreateInfo2 *pCreateInfo)
734{
735   struct tu_subpass *subpass = &pass->subpasses[i];
736
737   pass->attachments[a].gmem = true;
738   update_samples(subpass, pCreateInfo->pAttachments[a].samples);
739   pass->attachments[a].clear_views |= subpass->multiview_mask;
740}
741
742VKAPI_ATTR VkResult VKAPI_CALL
743tu_CreateRenderPass2(VkDevice _device,
744                     const VkRenderPassCreateInfo2 *pCreateInfo,
745                     const VkAllocationCallbacks *pAllocator,
746                     VkRenderPass *pRenderPass)
747{
748   TU_FROM_HANDLE(tu_device, device, _device);
749
750   if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC))
751      return vk_common_CreateRenderPass2(_device, pCreateInfo, pAllocator,
752                                         pRenderPass);
753
754   struct tu_render_pass *pass;
755   size_t size;
756   size_t attachments_offset;
757
758   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
759
760   size = sizeof(*pass);
761   size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
762   attachments_offset = size;
763   size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
764
765   pass = vk_object_zalloc(&device->vk, pAllocator, size,
766                           VK_OBJECT_TYPE_RENDER_PASS);
767   if (pass == NULL)
768      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
769
770   pass->attachment_count = pCreateInfo->attachmentCount;
771   pass->subpass_count = pCreateInfo->subpassCount;
772   pass->attachments = (void *) pass + attachments_offset;
773
774   for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
775      struct tu_render_pass_attachment *att = &pass->attachments[i];
776
777      att->format = pCreateInfo->pAttachments[i].format;
778      att->samples = pCreateInfo->pAttachments[i].samples;
779      /* for d32s8, cpp is for the depth image, and
780       * att->samples will be used as the cpp for the stencil image
781       */
782      if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
783         att->cpp = 4 * att->samples;
784      else
785         att->cpp = vk_format_get_blocksize(att->format) * att->samples;
786      /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */
787      att->gmem = false;
788
789      VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp;
790      VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp;
791
792      attachment_set_ops(device, att, loadOp, stencilLoadOp,
793                         pCreateInfo->pAttachments[i].storeOp,
794                         pCreateInfo->pAttachments[i].stencilStoreOp);
795   }
796   uint32_t subpass_attachment_count = 0;
797   struct tu_subpass_attachment *p;
798   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
799      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
800      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
801         vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
802
803      subpass_attachment_count +=
804         desc->inputAttachmentCount + desc->colorAttachmentCount +
805         (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
806         (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);
807   }
808
809   if (subpass_attachment_count) {
810      pass->subpass_attachments = vk_alloc2(
811         &device->vk.alloc, pAllocator,
812         subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
813         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
814      if (pass->subpass_attachments == NULL) {
815         vk_object_free(&device->vk, pAllocator, pass);
816         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
817      }
818   } else
819      pass->subpass_attachments = NULL;
820
821   p = pass->subpass_attachments;
822   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
823      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
824      const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
825         vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
826      struct tu_subpass *subpass = &pass->subpasses[i];
827
828      subpass->input_count = desc->inputAttachmentCount;
829      subpass->color_count = desc->colorAttachmentCount;
830      subpass->resolve_count = 0;
831      subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);
832      subpass->samples = 0;
833      subpass->srgb_cntl = 0;
834
835      const VkSubpassDescriptionFlagBits raster_order_access_bits =
836         VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM |
837         VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM |
838         VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM;
839
840      subpass->raster_order_attachment_access = desc->flags & raster_order_access_bits;
841
842      subpass->multiview_mask = desc->viewMask;
843
844      if (desc->inputAttachmentCount > 0) {
845         subpass->input_attachments = p;
846         p += desc->inputAttachmentCount;
847
848         for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
849            uint32_t a = desc->pInputAttachments[j].attachment;
850            subpass->input_attachments[j].attachment = a;
851            /* Note: attachments only used as input attachments will be read
852             * directly instead of through gmem, so we don't mark input
853             * attachments as needing gmem.
854             */
855         }
856      }
857
858      if (desc->colorAttachmentCount > 0) {
859         subpass->color_attachments = p;
860         p += desc->colorAttachmentCount;
861
862         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
863            uint32_t a = desc->pColorAttachments[j].attachment;
864            subpass->color_attachments[j].attachment = a;
865
866            if (a != VK_ATTACHMENT_UNUSED) {
867               tu_subpass_use_attachment(pass, i, a, pCreateInfo);
868
869               if (vk_format_is_srgb(pass->attachments[a].format))
870                  subpass->srgb_cntl |= 1 << j;
871            }
872         }
873      }
874
875      subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;
876      if (desc->pResolveAttachments) {
877         p += desc->colorAttachmentCount;
878         subpass->resolve_count += desc->colorAttachmentCount;
879         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
880            subpass->resolve_attachments[j].attachment =
881                  desc->pResolveAttachments[j].attachment;
882
883            uint32_t src_a = desc->pColorAttachments[j].attachment;
884            if (src_a != VK_ATTACHMENT_UNUSED) {
885               pass->attachments[src_a].will_be_resolved =
886                  desc->pResolveAttachments[j].attachment != VK_ATTACHMENT_UNUSED;
887            }
888         }
889      }
890
891      if (subpass->resolve_depth_stencil) {
892         p++;
893         subpass->resolve_count++;
894         uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
895         subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
896
897         uint32_t src_a = desc->pDepthStencilAttachment->attachment;
898         if (src_a != VK_ATTACHMENT_UNUSED) {
899            pass->attachments[src_a].will_be_resolved = a != VK_ATTACHMENT_UNUSED;
900         }
901      }
902
903      uint32_t a = desc->pDepthStencilAttachment ?
904         desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
905      subpass->depth_stencil_attachment.attachment = a;
906      if (a != VK_ATTACHMENT_UNUSED)
907         tu_subpass_use_attachment(pass, i, a, pCreateInfo);
908   }
909
910   tu_render_pass_patch_input_gmem(pass);
911
912   tu_render_pass_check_feedback_loop(pass);
913
914   /* disable unused attachments */
915   for (uint32_t i = 0; i < pass->attachment_count; i++) {
916      struct tu_render_pass_attachment *att = &pass->attachments[i];
917      if (!att->gmem) {
918         att->clear_mask = 0;
919         att->load = false;
920      }
921   }
922
923   tu_render_pass_cond_config(pass);
924   tu_render_pass_gmem_config(pass, device->physical_device);
925   tu_render_pass_bandwidth_config(pass);
926
927   for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
928      tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
929   }
930
931   tu_render_pass_add_implicit_deps(pass, pCreateInfo);
932
933   *pRenderPass = tu_render_pass_to_handle(pass);
934
935   return VK_SUCCESS;
936}
937
938VKAPI_ATTR void VKAPI_CALL
939tu_DestroyRenderPass(VkDevice _device,
940                     VkRenderPass _pass,
941                     const VkAllocationCallbacks *pAllocator)
942{
943   TU_FROM_HANDLE(tu_device, device, _device);
944
945   if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
946      vk_common_DestroyRenderPass(_device, _pass, pAllocator);
947      return;
948   }
949
950   TU_FROM_HANDLE(tu_render_pass, pass, _pass);
951
952   if (!_pass)
953      return;
954
955   vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
956   vk_object_free(&device->vk, pAllocator, pass);
957}
958
959static void
960tu_setup_dynamic_attachment(struct tu_render_pass_attachment *att,
961                            struct tu_image_view *view)
962{
963   att->format = view->vk.format;
964   att->samples = view->image->layout->nr_samples;
965
966   /* for d32s8, cpp is for the depth image, and
967    * att->samples will be used as the cpp for the stencil image
968    */
969   if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
970      att->cpp = 4 * att->samples;
971   else
972      att->cpp = vk_format_get_blocksize(att->format) * att->samples;
973}
974
975void
976tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
977                             const VkRenderingInfo *info)
978{
979   struct tu_device *device = cmd_buffer->device;
980   struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
981   struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
982
983   pass->subpass_count = 1;
984   pass->attachments = cmd_buffer->dynamic_rp_attachments;
985
986   subpass->color_count = subpass->resolve_count = info->colorAttachmentCount;
987   subpass->resolve_depth_stencil = false;
988   subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
989   subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments;
990   subpass->feedback_invalidate = false;
991   subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
992   subpass->input_count = 0;
993   subpass->samples = 0;
994   subpass->srgb_cntl = 0;
995   subpass->raster_order_attachment_access = false;
996   subpass->multiview_mask = info->viewMask;
997
998   uint32_t a = 0;
999   for (uint32_t i = 0; i < info->colorAttachmentCount; i++) {
1000      struct tu_render_pass_attachment *att = &pass->attachments[a];
1001      const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i];
1002
1003      if (att_info->imageView == VK_NULL_HANDLE) {
1004         subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1005         subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1006         continue;
1007      }
1008
1009      TU_FROM_HANDLE(tu_image_view, view, att_info->imageView);
1010      tu_setup_dynamic_attachment(att, view);
1011      att->gmem = true;
1012      att->clear_views = info->viewMask;
1013      attachment_set_ops(device, att, att_info->loadOp, 0,
1014                         att_info->storeOp, 0);
1015      subpass->color_attachments[i].attachment = a++;
1016
1017      subpass->samples = view->image->layout->nr_samples;
1018
1019      if (vk_format_is_srgb(view->vk.format))
1020         subpass->srgb_cntl |= 1 << i;
1021
1022      if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1023         struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1024         TU_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView);
1025         tu_setup_dynamic_attachment(resolve_att, resolve_view);
1026         resolve_att->gmem = false;
1027         attachment_set_ops(device, resolve_att,
1028                            VK_ATTACHMENT_LOAD_OP_DONT_CARE, 0,
1029                            VK_ATTACHMENT_STORE_OP_STORE, 0);
1030         subpass->resolve_attachments[i].attachment = a++;
1031         att->will_be_resolved = true;
1032      } else {
1033         subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1034         att->will_be_resolved = false;
1035      }
1036   }
1037
1038   if (info->pDepthAttachment || info->pStencilAttachment) {
1039      const struct VkRenderingAttachmentInfo *common_info =
1040         (info->pDepthAttachment &&
1041          info->pDepthAttachment->imageView != VK_NULL_HANDLE) ?
1042         info->pDepthAttachment :
1043         info->pStencilAttachment;
1044
1045      if (common_info && common_info->imageView != VK_NULL_HANDLE) {
1046         TU_FROM_HANDLE(tu_image_view, view, common_info->imageView);
1047
1048         struct tu_render_pass_attachment *att = &pass->attachments[a];
1049         tu_setup_dynamic_attachment(att, view);
1050         att->gmem = true;
1051         att->clear_views = info->viewMask;
1052         subpass->depth_stencil_attachment.attachment = a++;
1053
1054         attachment_set_ops(device, att,
1055                            info->pDepthAttachment ? info->pDepthAttachment->loadOp : 0,
1056                            info->pStencilAttachment ? info->pStencilAttachment->loadOp : 0,
1057                            info->pDepthAttachment ? info->pDepthAttachment->storeOp : 0,
1058                            info->pStencilAttachment ? info->pStencilAttachment->storeOp : 0);
1059
1060         subpass->samples = view->image->layout->nr_samples;
1061
1062         if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1063            unsigned i = subpass->resolve_count++;
1064            struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1065            TU_FROM_HANDLE(tu_image_view, resolve_view,
1066                           common_info->resolveImageView);
1067            tu_setup_dynamic_attachment(resolve_att, resolve_view);
1068            resolve_att->gmem = false;
1069            attachment_set_ops(device, resolve_att,
1070                               VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1071                               VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1072                               VK_ATTACHMENT_STORE_OP_STORE,
1073                               VK_ATTACHMENT_STORE_OP_STORE);
1074            subpass->resolve_attachments[i].attachment = a++;
1075            att->will_be_resolved = true;
1076            subpass->resolve_depth_stencil = true;
1077         } else {
1078            att->will_be_resolved = false;
1079         }
1080      } else {
1081         subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1082      }
1083   } else {
1084      subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1085   }
1086
1087   pass->attachment_count = a;
1088
1089   tu_render_pass_cond_config(pass);
1090   tu_render_pass_gmem_config(pass, device->physical_device);
1091   tu_render_pass_bandwidth_config(pass);
1092}
1093
1094void
1095tu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer,
1096                             const VkCommandBufferInheritanceRenderingInfo *info)
1097{
1098   struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
1099   struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
1100
1101   pass->subpass_count = 1;
1102   pass->attachments = cmd_buffer->dynamic_rp_attachments;
1103
1104   subpass->color_count = info->colorAttachmentCount;
1105   subpass->resolve_count = 0;
1106   subpass->resolve_depth_stencil = false;
1107   subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
1108   subpass->resolve_attachments = NULL;
1109   subpass->feedback_invalidate = false;
1110   subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
1111   subpass->input_count = 0;
1112   subpass->samples = 0;
1113   subpass->srgb_cntl = 0;
1114   subpass->raster_order_attachment_access = false;
1115   subpass->multiview_mask = info->viewMask;
1116   subpass->samples = info->rasterizationSamples;
1117
1118   unsigned a = 0;
1119   for (unsigned i = 0; i < info->colorAttachmentCount; i++) {
1120      struct tu_render_pass_attachment *att = &pass->attachments[a];
1121      VkFormat format = info->pColorAttachmentFormats[i];
1122
1123      if (format == VK_FORMAT_UNDEFINED) {
1124         subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1125         continue;
1126      }
1127
1128      att->format = format;
1129      att->samples = info->rasterizationSamples;
1130      subpass->samples = info->rasterizationSamples;
1131      subpass->color_attachments[i].attachment = a++;
1132
1133      /* conservatively assume that the attachment may be conditionally
1134       * loaded/stored.
1135       */
1136      att->cond_load_allowed = att->cond_store_allowed = true;
1137   }
1138
1139   if (info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ||
1140       info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) {
1141      struct tu_render_pass_attachment *att = &pass->attachments[a];
1142      att->format = info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ?
1143         info->depthAttachmentFormat : info->stencilAttachmentFormat;
1144      att->samples = info->rasterizationSamples;
1145      subpass->depth_stencil_attachment.attachment = a++;
1146      att->cond_load_allowed = att->cond_store_allowed = true;
1147   } else {
1148      subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1149   }
1150}
1151
1152VKAPI_ATTR void VKAPI_CALL
1153tu_GetRenderAreaGranularity(VkDevice _device,
1154                            VkRenderPass renderPass,
1155                            VkExtent2D *pGranularity)
1156{
1157   TU_FROM_HANDLE(tu_device, device, _device);
1158   pGranularity->width = device->physical_device->info->gmem_align_w;
1159   pGranularity->height = device->physical_device->info->gmem_align_h;
1160}
1161
1162uint32_t
1163tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)
1164{
1165   if (subpass->resolve_depth_stencil &&
1166       index == (subpass->resolve_count - 1))
1167      return subpass->depth_stencil_attachment.attachment;
1168
1169   return subpass->color_attachments[index].attachment;
1170}
1171