1/*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "v3dv_private.h"
25
26static uint32_t
27num_subpass_attachments(const VkSubpassDescription2 *desc)
28{
29   return desc->inputAttachmentCount +
30          desc->colorAttachmentCount +
31          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
32          (desc->pDepthStencilAttachment != NULL);
33}
34
35static void
36set_try_tlb_resolve(struct v3dv_device *device,
37                    struct v3dv_render_pass_attachment *att)
38{
39   const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format);
40   att->try_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format);
41}
42
43static void
44pass_find_subpass_range_for_attachments(struct v3dv_device *device,
45                                        struct v3dv_render_pass *pass)
46{
47   for (uint32_t i = 0; i < pass->attachment_count; i++) {
48      pass->attachments[i].first_subpass = pass->subpass_count - 1;
49      pass->attachments[i].last_subpass = 0;
50      if (pass->multiview_enabled) {
51         for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) {
52            pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1;
53            pass->attachments[i].views[j].last_subpass = 0;
54         }
55      }
56   }
57
58   for (uint32_t i = 0; i < pass->subpass_count; i++) {
59      const struct v3dv_subpass *subpass = &pass->subpasses[i];
60
61      for (uint32_t j = 0; j < subpass->color_count; j++) {
62         uint32_t attachment_idx = subpass->color_attachments[j].attachment;
63         if (attachment_idx == VK_ATTACHMENT_UNUSED)
64            continue;
65
66         struct v3dv_render_pass_attachment *att =
67            &pass->attachments[attachment_idx];
68
69         if (i < att->first_subpass)
70            att->first_subpass = i;
71         if (i > att->last_subpass)
72            att->last_subpass = i;
73
74         uint32_t view_mask = subpass->view_mask;
75         while (view_mask) {
76            uint32_t view_index = u_bit_scan(&view_mask);
77            if (i < att->views[view_index].first_subpass)
78               att->views[view_index].first_subpass = i;
79            if (i > att->views[view_index].last_subpass)
80               att->views[view_index].last_subpass = i;
81         }
82
83         if (subpass->resolve_attachments &&
84             subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
85            set_try_tlb_resolve(device, att);
86         }
87      }
88
89      uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
90      if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
91         if (i < pass->attachments[ds_attachment_idx].first_subpass)
92            pass->attachments[ds_attachment_idx].first_subpass = i;
93         if (i > pass->attachments[ds_attachment_idx].last_subpass)
94            pass->attachments[ds_attachment_idx].last_subpass = i;
95
96         if (subpass->ds_resolve_attachment.attachment != VK_ATTACHMENT_UNUSED)
97            set_try_tlb_resolve(device, &pass->attachments[ds_attachment_idx]);
98      }
99
100      for (uint32_t j = 0; j < subpass->input_count; j++) {
101         uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;
102         if (input_attachment_idx == VK_ATTACHMENT_UNUSED)
103            continue;
104         if (i < pass->attachments[input_attachment_idx].first_subpass)
105            pass->attachments[input_attachment_idx].first_subpass = i;
106         if (i > pass->attachments[input_attachment_idx].last_subpass)
107            pass->attachments[input_attachment_idx].last_subpass = i;
108      }
109
110      if (subpass->resolve_attachments) {
111         for (uint32_t j = 0; j < subpass->color_count; j++) {
112            uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;
113            if (attachment_idx == VK_ATTACHMENT_UNUSED)
114               continue;
115            if (i < pass->attachments[attachment_idx].first_subpass)
116               pass->attachments[attachment_idx].first_subpass = i;
117            if (i > pass->attachments[attachment_idx].last_subpass)
118               pass->attachments[attachment_idx].last_subpass = i;
119         }
120      }
121   }
122}
123
124
125VKAPI_ATTR VkResult VKAPI_CALL
126v3dv_CreateRenderPass2(VkDevice _device,
127                       const VkRenderPassCreateInfo2 *pCreateInfo,
128                       const VkAllocationCallbacks *pAllocator,
129                       VkRenderPass *pRenderPass)
130{
131   V3DV_FROM_HANDLE(v3dv_device, device, _device);
132   struct v3dv_render_pass *pass;
133
134   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
135
136   /* From the VK_KHR_multiview spec:
137    *
138    *   When a subpass uses a non-zero view mask, multiview functionality is
139    *   considered to be enabled. Multiview is all-or-nothing for a render
140    *   pass - that is, either all subpasses must have a non-zero view mask
141    *   (though some subpasses may have only one view) or all must be zero.
142    */
143   bool multiview_enabled = pCreateInfo->subpassCount &&
144      pCreateInfo->pSubpasses[0].viewMask;
145
146   size_t size = sizeof(*pass);
147   size_t subpasses_offset = size;
148   size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
149   size_t attachments_offset = size;
150   size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
151
152   pass = vk_object_zalloc(&device->vk, pAllocator, size,
153                           VK_OBJECT_TYPE_RENDER_PASS);
154   if (pass == NULL)
155      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
156
157   pass->multiview_enabled = multiview_enabled;
158   pass->attachment_count = pCreateInfo->attachmentCount;
159   pass->attachments = (void *) pass + attachments_offset;
160   pass->subpass_count = pCreateInfo->subpassCount;
161   pass->subpasses = (void *) pass + subpasses_offset;
162
163   for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)
164      pass->attachments[i].desc = pCreateInfo->pAttachments[i];
165
166   uint32_t subpass_attachment_count = 0;
167   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
168      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
169      subpass_attachment_count += num_subpass_attachments(desc);
170   }
171
172   if (subpass_attachment_count) {
173      const size_t subpass_attachment_bytes =
174         subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);
175      pass->subpass_attachments =
176         vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8,
177                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
178      if (pass->subpass_attachments == NULL) {
179         vk_object_free(&device->vk, pAllocator, pass);
180         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
181      }
182   } else {
183      pass->subpass_attachments = NULL;
184   }
185
186   struct v3dv_subpass_attachment *p = pass->subpass_attachments;
187   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
188      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
189      struct v3dv_subpass *subpass = &pass->subpasses[i];
190
191      subpass->input_count = desc->inputAttachmentCount;
192      subpass->color_count = desc->colorAttachmentCount;
193      subpass->view_mask = desc->viewMask;
194
195      if (desc->inputAttachmentCount > 0) {
196         subpass->input_attachments = p;
197         p += desc->inputAttachmentCount;
198
199         for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
200            subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {
201               .attachment = desc->pInputAttachments[j].attachment,
202               .layout = desc->pInputAttachments[j].layout,
203            };
204         }
205      }
206
207      if (desc->colorAttachmentCount > 0) {
208         subpass->color_attachments = p;
209         p += desc->colorAttachmentCount;
210
211         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
212            subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
213               .attachment = desc->pColorAttachments[j].attachment,
214               .layout = desc->pColorAttachments[j].layout,
215            };
216         }
217      }
218
219      if (desc->pResolveAttachments) {
220         subpass->resolve_attachments = p;
221         p += desc->colorAttachmentCount;
222
223         for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
224            subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {
225               .attachment = desc->pResolveAttachments[j].attachment,
226               .layout = desc->pResolveAttachments[j].layout,
227            };
228         }
229      }
230
231      if (desc->pDepthStencilAttachment) {
232         subpass->ds_attachment = (struct v3dv_subpass_attachment) {
233            .attachment = desc->pDepthStencilAttachment->attachment,
234            .layout = desc->pDepthStencilAttachment->layout,
235         };
236
237         /* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),
238          * the clear might get lost. If a subpass has this then we can't emit
239          * the clear using the TLB and we have to do it as a draw call.
240          *
241          * FIXME: separate stencil.
242          */
243         if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
244            struct v3dv_render_pass_attachment *att =
245               &pass->attachments[subpass->ds_attachment.attachment];
246            if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
247               if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
248                   att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
249                  subpass->do_depth_clear_with_draw = true;
250               } else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&
251                          att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
252                  subpass->do_stencil_clear_with_draw = true;
253               }
254            }
255         }
256
257         /* VK_KHR_depth_stencil_resolve */
258         const VkSubpassDescriptionDepthStencilResolve *resolve_desc =
259            vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
260         const VkAttachmentReference2 *resolve_att =
261            resolve_desc && resolve_desc->pDepthStencilResolveAttachment &&
262            resolve_desc->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED ?
263               resolve_desc->pDepthStencilResolveAttachment : NULL;
264         if (resolve_att) {
265            subpass->ds_resolve_attachment = (struct v3dv_subpass_attachment) {
266               .attachment = resolve_att->attachment,
267               .layout = resolve_att->layout,
268            };
269            assert(resolve_desc->depthResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT ||
270                   resolve_desc->stencilResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
271            subpass->resolve_depth =
272               resolve_desc->depthResolveMode != VK_RESOLVE_MODE_NONE &&
273               resolve_att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
274            subpass->resolve_stencil =
275               resolve_desc->stencilResolveMode != VK_RESOLVE_MODE_NONE &&
276               resolve_att->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
277         } else {
278            subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED;
279            subpass->resolve_depth = false;
280            subpass->resolve_stencil = false;
281         }
282      } else {
283         subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;
284         subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED;
285         subpass->resolve_depth = false;
286         subpass->resolve_stencil = false;
287      }
288   }
289
290   pass_find_subpass_range_for_attachments(device, pass);
291
292   /* FIXME: handle subpass dependencies */
293
294   *pRenderPass = v3dv_render_pass_to_handle(pass);
295
296   return VK_SUCCESS;
297}
298
299VKAPI_ATTR void VKAPI_CALL
300v3dv_DestroyRenderPass(VkDevice _device,
301                       VkRenderPass _pass,
302                       const VkAllocationCallbacks *pAllocator)
303{
304   V3DV_FROM_HANDLE(v3dv_device, device, _device);
305   V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);
306
307   if (!_pass)
308      return;
309
310   vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
311   vk_object_free(&device->vk, pAllocator, pass);
312}
313
314static void
315subpass_get_granularity(struct v3dv_device *device,
316                        struct v3dv_render_pass *pass,
317                        uint32_t subpass_idx,
318                        VkExtent2D *granularity)
319{
320   /* Granularity is defined by the tile size */
321   assert(subpass_idx < pass->subpass_count);
322   struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
323   const uint32_t color_attachment_count = subpass->color_count;
324
325   bool msaa = false;
326   uint32_t max_bpp = 0;
327   for (uint32_t i = 0; i < color_attachment_count; i++) {
328      uint32_t attachment_idx = subpass->color_attachments[i].attachment;
329      if (attachment_idx == VK_ATTACHMENT_UNUSED)
330         continue;
331      const VkAttachmentDescription2 *desc =
332         &pass->attachments[attachment_idx].desc;
333      const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format);
334      uint32_t internal_type, internal_bpp;
335      v3dv_X(device, get_internal_type_bpp_for_output_format)
336         (format->rt_type, &internal_type, &internal_bpp);
337
338      max_bpp = MAX2(max_bpp, internal_bpp);
339
340      if (desc->samples > VK_SAMPLE_COUNT_1_BIT)
341         msaa = true;
342   }
343
344   uint32_t width, height;
345   bool double_buffer = (V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
346   v3d_choose_tile_size(color_attachment_count, max_bpp, msaa,
347                        double_buffer, &width, &height);
348   *granularity = (VkExtent2D) {
349      .width = width,
350      .height = height
351   };
352}
353
354VKAPI_ATTR void VKAPI_CALL
355v3dv_GetRenderAreaGranularity(VkDevice _device,
356                              VkRenderPass renderPass,
357                              VkExtent2D *pGranularity)
358{
359   V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
360   V3DV_FROM_HANDLE(v3dv_device, device, _device);
361
362   *pGranularity = (VkExtent2D) {
363      .width = 64,
364      .height = 64,
365   };
366
367   for (uint32_t i = 0; i < pass->subpass_count; i++) {
368      VkExtent2D sg;
369      subpass_get_granularity(device, pass, i, &sg);
370      pGranularity->width = MIN2(pGranularity->width, sg.width);
371      pGranularity->height = MIN2(pGranularity->height, sg.height);
372   }
373}
374
375/* Checks whether the render area rectangle covers a region that is aligned to
376 * tile boundaries. This means that we are writing to all pixels covered by
377 * all tiles in that area (except for pixels on edge tiles that are outside
378 * the framebuffer dimensions).
379 *
380 * When our framebuffer is aligned to tile boundaries we know we are writing
381 * valid data to all all pixels in each tile and we can apply certain
382 * optimizations, like avoiding tile loads, since we know that none of the
383 * original pixel values in each tile for that area need to be preserved.
384 * We also use this to decide if we can use TLB clears, as these clear whole
385 * tiles so we can't use them if the render area is not aligned.
386 *
387 * Note that when an image is created it will possibly include padding blocks
388 * depending on its tiling layout. When the framebuffer dimensions are not
389 * aligned to tile boundaries then edge tiles are only partially covered by the
390 * framebuffer pixels, but tile stores still seem to store full tiles
391 * writing to the padded sections. This is important when the framebuffer
392 * is aliasing a smaller section of a larger image, as in that case the edge
393 * tiles of the framebuffer would overwrite valid pixels in the larger image.
394 * In that case, we can't flag the area as being aligned.
395 */
396bool
397v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
398                                  const VkRect2D *area,
399                                  struct v3dv_framebuffer *fb,
400                                  struct v3dv_render_pass *pass,
401                                  uint32_t subpass_idx)
402{
403   assert(subpass_idx < pass->subpass_count);
404
405   VkExtent2D granularity;
406   subpass_get_granularity(device, pass, subpass_idx, &granularity);
407
408   return area->offset.x % granularity.width == 0 &&
409          area->offset.y % granularity.height == 0 &&
410         (area->extent.width % granularity.width == 0 ||
411          (fb->has_edge_padding &&
412           area->offset.x + area->extent.width >= fb->width)) &&
413         (area->extent.height % granularity.height == 0 ||
414          (fb->has_edge_padding &&
415           area->offset.y + area->extent.height >= fb->height));
416}
417