1/* 2 * Copyright © 2019 Raspberry Pi Ltd 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "v3dv_private.h" 25 26static uint32_t 27num_subpass_attachments(const VkSubpassDescription2 *desc) 28{ 29 return desc->inputAttachmentCount + 30 desc->colorAttachmentCount + 31 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + 32 (desc->pDepthStencilAttachment != NULL); 33} 34 35static void 36set_try_tlb_resolve(struct v3dv_device *device, 37 struct v3dv_render_pass_attachment *att) 38{ 39 const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format); 40 att->try_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format); 41} 42 43static void 44pass_find_subpass_range_for_attachments(struct v3dv_device *device, 45 struct v3dv_render_pass *pass) 46{ 47 for (uint32_t i = 0; i < pass->attachment_count; i++) { 48 pass->attachments[i].first_subpass = pass->subpass_count - 1; 49 pass->attachments[i].last_subpass = 0; 50 if (pass->multiview_enabled) { 51 for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) { 52 pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1; 53 pass->attachments[i].views[j].last_subpass = 0; 54 } 55 } 56 } 57 58 for (uint32_t i = 0; i < pass->subpass_count; i++) { 59 const struct v3dv_subpass *subpass = &pass->subpasses[i]; 60 61 for (uint32_t j = 0; j < subpass->color_count; j++) { 62 uint32_t attachment_idx = subpass->color_attachments[j].attachment; 63 if (attachment_idx == VK_ATTACHMENT_UNUSED) 64 continue; 65 66 struct v3dv_render_pass_attachment *att = 67 &pass->attachments[attachment_idx]; 68 69 if (i < att->first_subpass) 70 att->first_subpass = i; 71 if (i > att->last_subpass) 72 att->last_subpass = i; 73 74 uint32_t view_mask = subpass->view_mask; 75 while (view_mask) { 76 uint32_t view_index = u_bit_scan(&view_mask); 77 if (i < att->views[view_index].first_subpass) 78 att->views[view_index].first_subpass = i; 79 if (i > att->views[view_index].last_subpass) 80 att->views[view_index].last_subpass = i; 81 } 82 83 if (subpass->resolve_attachments && 84 subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) { 85 set_try_tlb_resolve(device, att); 86 } 87 } 88 89 uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; 90 if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 91 if (i < pass->attachments[ds_attachment_idx].first_subpass) 92 pass->attachments[ds_attachment_idx].first_subpass = i; 93 if (i > pass->attachments[ds_attachment_idx].last_subpass) 94 pass->attachments[ds_attachment_idx].last_subpass = i; 95 96 if (subpass->ds_resolve_attachment.attachment != VK_ATTACHMENT_UNUSED) 97 set_try_tlb_resolve(device, &pass->attachments[ds_attachment_idx]); 98 } 99 100 for (uint32_t j = 0; j < subpass->input_count; j++) { 101 uint32_t input_attachment_idx = subpass->input_attachments[j].attachment; 102 if (input_attachment_idx == VK_ATTACHMENT_UNUSED) 103 continue; 104 if (i < pass->attachments[input_attachment_idx].first_subpass) 105 pass->attachments[input_attachment_idx].first_subpass = i; 106 if (i > pass->attachments[input_attachment_idx].last_subpass) 107 pass->attachments[input_attachment_idx].last_subpass = i; 108 } 109 110 if (subpass->resolve_attachments) { 111 for (uint32_t j = 0; j < subpass->color_count; j++) { 112 uint32_t attachment_idx = subpass->resolve_attachments[j].attachment; 113 if (attachment_idx == VK_ATTACHMENT_UNUSED) 114 continue; 115 if (i < pass->attachments[attachment_idx].first_subpass) 116 pass->attachments[attachment_idx].first_subpass = i; 117 if (i > pass->attachments[attachment_idx].last_subpass) 118 pass->attachments[attachment_idx].last_subpass = i; 119 } 120 } 121 } 122} 123 124 125VKAPI_ATTR VkResult VKAPI_CALL 126v3dv_CreateRenderPass2(VkDevice _device, 127 const VkRenderPassCreateInfo2 *pCreateInfo, 128 const VkAllocationCallbacks *pAllocator, 129 VkRenderPass *pRenderPass) 130{ 131 V3DV_FROM_HANDLE(v3dv_device, device, _device); 132 struct v3dv_render_pass *pass; 133 134 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2); 135 136 /* From the VK_KHR_multiview spec: 137 * 138 * When a subpass uses a non-zero view mask, multiview functionality is 139 * considered to be enabled. Multiview is all-or-nothing for a render 140 * pass - that is, either all subpasses must have a non-zero view mask 141 * (though some subpasses may have only one view) or all must be zero. 142 */ 143 bool multiview_enabled = pCreateInfo->subpassCount && 144 pCreateInfo->pSubpasses[0].viewMask; 145 146 size_t size = sizeof(*pass); 147 size_t subpasses_offset = size; 148 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); 149 size_t attachments_offset = size; 150 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); 151 152 pass = vk_object_zalloc(&device->vk, pAllocator, size, 153 VK_OBJECT_TYPE_RENDER_PASS); 154 if (pass == NULL) 155 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 156 157 pass->multiview_enabled = multiview_enabled; 158 pass->attachment_count = pCreateInfo->attachmentCount; 159 pass->attachments = (void *) pass + attachments_offset; 160 pass->subpass_count = pCreateInfo->subpassCount; 161 pass->subpasses = (void *) pass + subpasses_offset; 162 163 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) 164 pass->attachments[i].desc = pCreateInfo->pAttachments[i]; 165 166 uint32_t subpass_attachment_count = 0; 167 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 168 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 169 subpass_attachment_count += num_subpass_attachments(desc); 170 } 171 172 if (subpass_attachment_count) { 173 const size_t subpass_attachment_bytes = 174 subpass_attachment_count * sizeof(struct v3dv_subpass_attachment); 175 pass->subpass_attachments = 176 vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8, 177 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 178 if (pass->subpass_attachments == NULL) { 179 vk_object_free(&device->vk, pAllocator, pass); 180 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 181 } 182 } else { 183 pass->subpass_attachments = NULL; 184 } 185 186 struct v3dv_subpass_attachment *p = pass->subpass_attachments; 187 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 188 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 189 struct v3dv_subpass *subpass = &pass->subpasses[i]; 190 191 subpass->input_count = desc->inputAttachmentCount; 192 subpass->color_count = desc->colorAttachmentCount; 193 subpass->view_mask = desc->viewMask; 194 195 if (desc->inputAttachmentCount > 0) { 196 subpass->input_attachments = p; 197 p += desc->inputAttachmentCount; 198 199 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { 200 subpass->input_attachments[j] = (struct v3dv_subpass_attachment) { 201 .attachment = desc->pInputAttachments[j].attachment, 202 .layout = desc->pInputAttachments[j].layout, 203 }; 204 } 205 } 206 207 if (desc->colorAttachmentCount > 0) { 208 subpass->color_attachments = p; 209 p += desc->colorAttachmentCount; 210 211 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { 212 subpass->color_attachments[j] = (struct v3dv_subpass_attachment) { 213 .attachment = desc->pColorAttachments[j].attachment, 214 .layout = desc->pColorAttachments[j].layout, 215 }; 216 } 217 } 218 219 if (desc->pResolveAttachments) { 220 subpass->resolve_attachments = p; 221 p += desc->colorAttachmentCount; 222 223 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { 224 subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) { 225 .attachment = desc->pResolveAttachments[j].attachment, 226 .layout = desc->pResolveAttachments[j].layout, 227 }; 228 } 229 } 230 231 if (desc->pDepthStencilAttachment) { 232 subpass->ds_attachment = (struct v3dv_subpass_attachment) { 233 .attachment = desc->pDepthStencilAttachment->attachment, 234 .layout = desc->pDepthStencilAttachment->layout, 235 }; 236 237 /* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa), 238 * the clear might get lost. If a subpass has this then we can't emit 239 * the clear using the TLB and we have to do it as a draw call. 240 * 241 * FIXME: separate stencil. 242 */ 243 if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) { 244 struct v3dv_render_pass_attachment *att = 245 &pass->attachments[subpass->ds_attachment.attachment]; 246 if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) { 247 if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR && 248 att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) { 249 subpass->do_depth_clear_with_draw = true; 250 } else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD && 251 att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) { 252 subpass->do_stencil_clear_with_draw = true; 253 } 254 } 255 } 256 257 /* VK_KHR_depth_stencil_resolve */ 258 const VkSubpassDescriptionDepthStencilResolve *resolve_desc = 259 vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); 260 const VkAttachmentReference2 *resolve_att = 261 resolve_desc && resolve_desc->pDepthStencilResolveAttachment && 262 resolve_desc->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED ? 263 resolve_desc->pDepthStencilResolveAttachment : NULL; 264 if (resolve_att) { 265 subpass->ds_resolve_attachment = (struct v3dv_subpass_attachment) { 266 .attachment = resolve_att->attachment, 267 .layout = resolve_att->layout, 268 }; 269 assert(resolve_desc->depthResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT || 270 resolve_desc->stencilResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT); 271 subpass->resolve_depth = 272 resolve_desc->depthResolveMode != VK_RESOLVE_MODE_NONE && 273 resolve_att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT; 274 subpass->resolve_stencil = 275 resolve_desc->stencilResolveMode != VK_RESOLVE_MODE_NONE && 276 resolve_att->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT; 277 } else { 278 subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED; 279 subpass->resolve_depth = false; 280 subpass->resolve_stencil = false; 281 } 282 } else { 283 subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED; 284 subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED; 285 subpass->resolve_depth = false; 286 subpass->resolve_stencil = false; 287 } 288 } 289 290 pass_find_subpass_range_for_attachments(device, pass); 291 292 /* FIXME: handle subpass dependencies */ 293 294 *pRenderPass = v3dv_render_pass_to_handle(pass); 295 296 return VK_SUCCESS; 297} 298 299VKAPI_ATTR void VKAPI_CALL 300v3dv_DestroyRenderPass(VkDevice _device, 301 VkRenderPass _pass, 302 const VkAllocationCallbacks *pAllocator) 303{ 304 V3DV_FROM_HANDLE(v3dv_device, device, _device); 305 V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass); 306 307 if (!_pass) 308 return; 309 310 vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments); 311 vk_object_free(&device->vk, pAllocator, pass); 312} 313 314static void 315subpass_get_granularity(struct v3dv_device *device, 316 struct v3dv_render_pass *pass, 317 uint32_t subpass_idx, 318 VkExtent2D *granularity) 319{ 320 /* Granularity is defined by the tile size */ 321 assert(subpass_idx < pass->subpass_count); 322 struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx]; 323 const uint32_t color_attachment_count = subpass->color_count; 324 325 bool msaa = false; 326 uint32_t max_bpp = 0; 327 for (uint32_t i = 0; i < color_attachment_count; i++) { 328 uint32_t attachment_idx = subpass->color_attachments[i].attachment; 329 if (attachment_idx == VK_ATTACHMENT_UNUSED) 330 continue; 331 const VkAttachmentDescription2 *desc = 332 &pass->attachments[attachment_idx].desc; 333 const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format); 334 uint32_t internal_type, internal_bpp; 335 v3dv_X(device, get_internal_type_bpp_for_output_format) 336 (format->rt_type, &internal_type, &internal_bpp); 337 338 max_bpp = MAX2(max_bpp, internal_bpp); 339 340 if (desc->samples > VK_SAMPLE_COUNT_1_BIT) 341 msaa = true; 342 } 343 344 uint32_t width, height; 345 bool double_buffer = (V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa; 346 v3d_choose_tile_size(color_attachment_count, max_bpp, msaa, 347 double_buffer, &width, &height); 348 *granularity = (VkExtent2D) { 349 .width = width, 350 .height = height 351 }; 352} 353 354VKAPI_ATTR void VKAPI_CALL 355v3dv_GetRenderAreaGranularity(VkDevice _device, 356 VkRenderPass renderPass, 357 VkExtent2D *pGranularity) 358{ 359 V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass); 360 V3DV_FROM_HANDLE(v3dv_device, device, _device); 361 362 *pGranularity = (VkExtent2D) { 363 .width = 64, 364 .height = 64, 365 }; 366 367 for (uint32_t i = 0; i < pass->subpass_count; i++) { 368 VkExtent2D sg; 369 subpass_get_granularity(device, pass, i, &sg); 370 pGranularity->width = MIN2(pGranularity->width, sg.width); 371 pGranularity->height = MIN2(pGranularity->height, sg.height); 372 } 373} 374 375/* Checks whether the render area rectangle covers a region that is aligned to 376 * tile boundaries. This means that we are writing to all pixels covered by 377 * all tiles in that area (except for pixels on edge tiles that are outside 378 * the framebuffer dimensions). 379 * 380 * When our framebuffer is aligned to tile boundaries we know we are writing 381 * valid data to all all pixels in each tile and we can apply certain 382 * optimizations, like avoiding tile loads, since we know that none of the 383 * original pixel values in each tile for that area need to be preserved. 384 * We also use this to decide if we can use TLB clears, as these clear whole 385 * tiles so we can't use them if the render area is not aligned. 386 * 387 * Note that when an image is created it will possibly include padding blocks 388 * depending on its tiling layout. When the framebuffer dimensions are not 389 * aligned to tile boundaries then edge tiles are only partially covered by the 390 * framebuffer pixels, but tile stores still seem to store full tiles 391 * writing to the padded sections. This is important when the framebuffer 392 * is aliasing a smaller section of a larger image, as in that case the edge 393 * tiles of the framebuffer would overwrite valid pixels in the larger image. 394 * In that case, we can't flag the area as being aligned. 395 */ 396bool 397v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device, 398 const VkRect2D *area, 399 struct v3dv_framebuffer *fb, 400 struct v3dv_render_pass *pass, 401 uint32_t subpass_idx) 402{ 403 assert(subpass_idx < pass->subpass_count); 404 405 VkExtent2D granularity; 406 subpass_get_granularity(device, pass, subpass_idx, &granularity); 407 408 return area->offset.x % granularity.width == 0 && 409 area->offset.y % granularity.height == 0 && 410 (area->extent.width % granularity.width == 0 || 411 (fb->has_edge_padding && 412 area->offset.x + area->extent.width >= fb->width)) && 413 (area->extent.height % granularity.height == 0 || 414 (fb->has_edge_padding && 415 area->offset.y + area->extent.height >= fb->height)); 416} 417