1/* 2 * Copyright © 2022 Imagination Technologies Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <stdbool.h> 25#include <stdint.h> 26 27#include "hwdef/rogue_hw_utils.h" 28#include "pvr_bo.h" 29#include "pvr_device_info.h" 30#include "pvr_formats.h" 31#include "pvr_hw_pass.h" 32#include "pvr_pds.h" 33#include "pvr_private.h" 34#include "pvr_usc_fragment_shader.h" 35#include "rogue/rogue.h" 36#include "vk_alloc.h" 37#include "vk_format.h" 38#include "vk_log.h" 39 40/***************************************************************************** 41 PDS pre-baked program generation parameters and variables. 42*****************************************************************************/ 43/* These would normally be produced by the compiler or other code. We're using 44 * them for now just to speed up things. All of these should eventually be 45 * removed. 46 */ 47 48static const struct { 49 /* Indicates the amount of temporaries for the shader. */ 50 uint32_t temp_count; 51 enum rogue_msaa_mode msaa_mode; 52 /* Indicates the presence of PHAS instruction. */ 53 bool has_phase_rate_change; 54} pvr_pds_fragment_program_params = { 55 .temp_count = 0, 56 .msaa_mode = ROGUE_MSAA_MODE_PIXEL, 57 .has_phase_rate_change = false, 58}; 59 60static inline bool pvr_subpass_has_msaa_input_attachment( 61 struct pvr_render_subpass *subpass, 62 const VkRenderPassCreateInfo2 *pCreateInfo) 63{ 64 for (uint32_t i = 0; i < subpass->input_count; i++) { 65 const uint32_t attachment = subpass->input_attachments[i]; 66 67 if (pCreateInfo->pAttachments[attachment].samples > 1) 68 return true; 69 } 70 71 return false; 72} 73 74static inline size_t 75pvr_num_subpass_attachments(const VkSubpassDescription2 *desc) 76{ 77 return desc->inputAttachmentCount + desc->colorAttachmentCount + 78 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + 79 (desc->pDepthStencilAttachment != NULL); 80} 81 82static bool pvr_is_subpass_initops_flush_needed( 83 const struct pvr_render_pass *pass, 84 const struct pvr_renderpass_hwsetup_render *hw_render) 85{ 86 struct pvr_render_subpass *subpass = &pass->subpasses[0]; 87 uint32_t render_loadop_mask = 0; 88 uint32_t color_attachment_mask; 89 90 for (uint32_t i = 0; i < hw_render->color_init_count; i++) { 91 if (hw_render->color_init[i].op != RENDERPASS_SURFACE_INITOP_NOP) 92 render_loadop_mask |= (1 << hw_render->color_init[i].driver_id); 93 } 94 95 /* If there are no load ops then there's nothing to flush. */ 96 if (render_loadop_mask == 0) 97 return false; 98 99 /* If the first subpass has any input attachments, they need to be 100 * initialized with the result of the load op. Since the input attachment 101 * may be read from fragments with an opaque pass type, the load ops must be 102 * flushed or else they would be obscured and eliminated by HSR. 103 */ 104 if (subpass->input_count != 0) 105 return true; 106 107 color_attachment_mask = 0; 108 109 for (uint32_t i = 0; i < subpass->color_count; i++) { 110 const int32_t color_idx = subpass->color_attachments[i]; 111 112 if (color_idx != -1) 113 color_attachment_mask |= (1 << pass->attachments[color_idx].index); 114 } 115 116 /* If the first subpass does not write to all attachments which have a load 117 * op then the load ops need to be flushed to ensure they don't get obscured 118 * and removed by HSR. 119 */ 120 return (render_loadop_mask & color_attachment_mask) != render_loadop_mask; 121} 122 123static void 124pvr_init_subpass_userpass_spawn(struct pvr_renderpass_hwsetup *hw_setup, 125 struct pvr_render_pass *pass, 126 struct pvr_render_subpass *subpasses) 127{ 128 uint32_t subpass_idx = 0; 129 130 for (uint32_t i = 0; i < hw_setup->render_count; i++) { 131 struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i]; 132 uint32_t initial_userpass_spawn = 133 (uint32_t)pvr_is_subpass_initops_flush_needed(pass, hw_render); 134 135 for (uint32_t j = 0; j < hw_render->subpass_count; j++) { 136 subpasses[subpass_idx].userpass_spawn = (j + initial_userpass_spawn); 137 subpass_idx++; 138 } 139 } 140 141 assert(subpass_idx == pass->subpass_count); 142} 143 144static inline bool pvr_has_output_register_writes( 145 const struct pvr_renderpass_hwsetup_render *hw_render) 146{ 147 for (uint32_t i = 0; i < hw_render->init_setup.render_targets_count; i++) { 148 struct usc_mrt_resource *mrt_resource = 149 &hw_render->init_setup.mrt_resources[i]; 150 151 if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER) 152 return true; 153 } 154 155 return false; 156} 157 158VkResult pvr_pds_unitex_state_program_create_and_upload( 159 struct pvr_device *device, 160 const VkAllocationCallbacks *allocator, 161 uint32_t texture_kicks, 162 uint32_t uniform_kicks, 163 struct pvr_pds_upload *const pds_upload_out) 164{ 165 struct pvr_pds_pixel_shader_sa_program program = { 166 .num_texture_dma_kicks = texture_kicks, 167 .num_uniform_dma_kicks = uniform_kicks, 168 }; 169 uint32_t staging_buffer_size; 170 uint32_t *staging_buffer; 171 VkResult result; 172 173 pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program); 174 175 staging_buffer_size = program.code_size * sizeof(*staging_buffer); 176 177 staging_buffer = vk_alloc2(&device->vk.alloc, 178 allocator, 179 staging_buffer_size, 180 8U, 181 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 182 if (!staging_buffer) 183 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 184 185 pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer); 186 187 /* FIXME: Figure out the define for alignment of 16. */ 188 result = pvr_gpu_upload_pds(device, 189 NULL, 190 0U, 191 0U, 192 staging_buffer, 193 program.code_size, 194 16U, 195 16U, 196 pds_upload_out); 197 if (result != VK_SUCCESS) { 198 vk_free2(&device->vk.alloc, allocator, staging_buffer); 199 return result; 200 } 201 202 vk_free2(&device->vk.alloc, allocator, staging_buffer); 203 204 return VK_SUCCESS; 205} 206 207static VkResult 208pvr_load_op_create(struct pvr_device *device, 209 const VkAllocationCallbacks *allocator, 210 struct pvr_renderpass_hwsetup_render *hw_render, 211 struct pvr_load_op **const load_op_out) 212{ 213 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 214 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); 215 struct pvr_load_op *load_op; 216 VkResult result; 217 218 load_op = vk_zalloc2(&device->vk.alloc, 219 allocator, 220 sizeof(*load_op), 221 8, 222 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 223 if (!load_op) 224 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 225 226 for (uint32_t i = 0; i < hw_render->color_init_count; i++) { 227 struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i]; 228 229 if (color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR) 230 load_op->clear_mask |= 1U << i; 231 else if (color_init->op == RENDERPASS_SURFACE_INITOP_LOAD) 232 pvr_finishme("Missing 'load' load op"); 233 } 234 235 result = pvr_gpu_upload_usc(device, 236 pvr_usc_fragment_shader, 237 sizeof(pvr_usc_fragment_shader), 238 cache_line_size, 239 &load_op->usc_frag_prog_bo); 240 if (result != VK_SUCCESS) 241 goto err_free_load_op; 242 243 result = pvr_pds_fragment_program_create_and_upload( 244 device, 245 allocator, 246 load_op->usc_frag_prog_bo, 247 pvr_pds_fragment_program_params.temp_count, 248 pvr_pds_fragment_program_params.msaa_mode, 249 pvr_pds_fragment_program_params.has_phase_rate_change, 250 &load_op->pds_frag_prog); 251 if (result != VK_SUCCESS) 252 goto err_free_usc_frag_prog_bo; 253 254 result = pvr_pds_unitex_state_program_create_and_upload( 255 device, 256 allocator, 257 1U, 258 0U, 259 &load_op->pds_tex_state_prog); 260 if (result != VK_SUCCESS) 261 goto err_free_pds_frag_prog; 262 263 load_op->is_hw_object = true; 264 /* FIXME: These should be based on the USC and PDS programs, but are hard 265 * coded for now. 266 */ 267 load_op->const_shareds_count = 1; 268 load_op->shareds_dest_offset = 0; 269 load_op->shareds_count = 1; 270 load_op->temps_count = 1; 271 272 *load_op_out = load_op; 273 274 return VK_SUCCESS; 275 276err_free_pds_frag_prog: 277 pvr_bo_free(device, load_op->pds_frag_prog.pvr_bo); 278 279err_free_usc_frag_prog_bo: 280 pvr_bo_free(device, load_op->usc_frag_prog_bo); 281 282err_free_load_op: 283 vk_free2(&device->vk.alloc, allocator, load_op); 284 285 return result; 286} 287 288static void pvr_load_op_destroy(struct pvr_device *device, 289 const VkAllocationCallbacks *allocator, 290 struct pvr_load_op *load_op) 291{ 292 pvr_bo_free(device, load_op->pds_tex_state_prog.pvr_bo); 293 pvr_bo_free(device, load_op->pds_frag_prog.pvr_bo); 294 pvr_bo_free(device, load_op->usc_frag_prog_bo); 295 vk_free2(&device->vk.alloc, allocator, load_op); 296} 297 298#define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info) \ 299 ({ \ 300 int __ret = 7U; \ 301 if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) \ 302 __ret = 3U; \ 303 __ret; \ 304 }) 305 306VkResult pvr_CreateRenderPass2(VkDevice _device, 307 const VkRenderPassCreateInfo2 *pCreateInfo, 308 const VkAllocationCallbacks *pAllocator, 309 VkRenderPass *pRenderPass) 310{ 311 struct pvr_render_pass_attachment *attachments; 312 PVR_FROM_HANDLE(pvr_device, device, _device); 313 struct pvr_render_subpass *subpasses; 314 size_t subpass_attachment_count; 315 uint32_t *subpass_attachments; 316 struct pvr_render_pass *pass; 317 uint32_t *dep_list; 318 bool *flush_on_dep; 319 VkResult result; 320 321 VK_MULTIALLOC(ma); 322 vk_multialloc_add(&ma, &pass, __typeof__(*pass), 1); 323 vk_multialloc_add(&ma, 324 &attachments, 325 __typeof__(*attachments), 326 pCreateInfo->attachmentCount); 327 vk_multialloc_add(&ma, 328 &subpasses, 329 __typeof__(*subpasses), 330 pCreateInfo->subpassCount); 331 332 subpass_attachment_count = 0; 333 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { 334 subpass_attachment_count += 335 pvr_num_subpass_attachments(&pCreateInfo->pSubpasses[i]); 336 } 337 338 vk_multialloc_add(&ma, 339 &subpass_attachments, 340 __typeof__(*subpass_attachments), 341 subpass_attachment_count); 342 vk_multialloc_add(&ma, 343 &dep_list, 344 __typeof__(*dep_list), 345 pCreateInfo->dependencyCount); 346 vk_multialloc_add(&ma, 347 &flush_on_dep, 348 __typeof__(*flush_on_dep), 349 pCreateInfo->dependencyCount); 350 351 if (!vk_multialloc_zalloc2(&ma, 352 &device->vk.alloc, 353 pAllocator, 354 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) { 355 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 356 } 357 358 vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS); 359 pass->attachment_count = pCreateInfo->attachmentCount; 360 pass->attachments = attachments; 361 pass->subpass_count = pCreateInfo->subpassCount; 362 pass->subpasses = subpasses; 363 pass->max_sample_count = 1; 364 365 /* Copy attachment descriptions. */ 366 for (uint32_t i = 0; i < pass->attachment_count; i++) { 367 const VkAttachmentDescription2 *desc = &pCreateInfo->pAttachments[i]; 368 struct pvr_render_pass_attachment *attachment = &pass->attachments[i]; 369 370 pvr_assert(!(desc->flags & ~VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT)); 371 372 attachment->load_op = desc->loadOp; 373 attachment->store_op = desc->storeOp; 374 375 attachment->has_stencil = vk_format_has_stencil(attachment->vk_format); 376 if (attachment->has_stencil) { 377 attachment->stencil_load_op = desc->stencilLoadOp; 378 attachment->stencil_store_op = desc->stencilStoreOp; 379 } 380 381 attachment->vk_format = desc->format; 382 attachment->sample_count = desc->samples; 383 attachment->initial_layout = desc->initialLayout; 384 attachment->is_pbe_downscalable = 385 pvr_format_is_pbe_downscalable(attachment->vk_format); 386 attachment->index = i; 387 388 if (attachment->sample_count > pass->max_sample_count) 389 pass->max_sample_count = attachment->sample_count; 390 } 391 392 /* Count how many dependencies each subpass has. */ 393 for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { 394 const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i]; 395 396 if (dep->srcSubpass != VK_SUBPASS_EXTERNAL && 397 dep->dstSubpass != VK_SUBPASS_EXTERNAL && 398 dep->srcSubpass != dep->dstSubpass) { 399 pass->subpasses[dep->dstSubpass].dep_count++; 400 } 401 } 402 403 /* Assign reference pointers to lists, and fill in the attachments list, we 404 * need to re-walk the dependencies array later to fill the per-subpass 405 * dependencies lists in. 406 */ 407 for (uint32_t i = 0; i < pass->subpass_count; i++) { 408 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; 409 struct pvr_render_subpass *subpass = &pass->subpasses[i]; 410 411 subpass->pipeline_bind_point = desc->pipelineBindPoint; 412 subpass->sample_count = 1; 413 414 subpass->color_count = desc->colorAttachmentCount; 415 if (subpass->color_count > 0) { 416 bool has_used_color_attachment = false; 417 uint32_t index; 418 419 subpass->color_attachments = subpass_attachments; 420 subpass_attachments += subpass->color_count; 421 422 for (uint32_t j = 0; j < subpass->color_count; j++) { 423 subpass->color_attachments[j] = 424 desc->pColorAttachments[j].attachment; 425 426 if (subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED) 427 continue; 428 429 index = subpass->color_attachments[j]; 430 subpass->sample_count = pass->attachments[index].sample_count; 431 has_used_color_attachment = true; 432 } 433 434 if (!has_used_color_attachment && desc->pDepthStencilAttachment && 435 desc->pDepthStencilAttachment->attachment != 436 VK_ATTACHMENT_UNUSED) { 437 index = desc->pDepthStencilAttachment->attachment; 438 subpass->sample_count = pass->attachments[index].sample_count; 439 } 440 } 441 442 if (desc->pResolveAttachments) { 443 subpass->resolve_attachments = subpass_attachments; 444 subpass_attachments += subpass->color_count; 445 446 for (uint32_t j = 0; j < subpass->color_count; j++) { 447 subpass->resolve_attachments[j] = 448 desc->pResolveAttachments[j].attachment; 449 } 450 } 451 452 subpass->input_count = desc->inputAttachmentCount; 453 if (subpass->input_count > 0) { 454 subpass->input_attachments = subpass_attachments; 455 subpass_attachments += subpass->input_count; 456 457 for (uint32_t j = 0; j < subpass->input_count; j++) { 458 subpass->input_attachments[j] = 459 desc->pInputAttachments[j].attachment; 460 } 461 } 462 463 if (desc->pDepthStencilAttachment) { 464 subpass->depth_stencil_attachment = subpass_attachments++; 465 *subpass->depth_stencil_attachment = 466 desc->pDepthStencilAttachment->attachment; 467 } 468 469 /* Give the dependencies a slice of the subpass_attachments array. */ 470 subpass->dep_list = dep_list; 471 dep_list += subpass->dep_count; 472 subpass->flush_on_dep = flush_on_dep; 473 flush_on_dep += subpass->dep_count; 474 475 /* Reset the dependencies count so we can start from 0 and index into 476 * the dependencies array. 477 */ 478 subpass->dep_count = 0; 479 subpass->index = i; 480 } 481 482 /* Compute dependencies and populate dep_list and flush_on_dep. */ 483 for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { 484 const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i]; 485 486 if (dep->srcSubpass != VK_SUBPASS_EXTERNAL && 487 dep->dstSubpass != VK_SUBPASS_EXTERNAL && 488 dep->srcSubpass != dep->dstSubpass) { 489 struct pvr_render_subpass *subpass = &pass->subpasses[dep->dstSubpass]; 490 491 subpass->dep_list[subpass->dep_count] = dep->srcSubpass; 492 if (pvr_subpass_has_msaa_input_attachment(subpass, pCreateInfo)) 493 subpass->flush_on_dep[subpass->dep_count] = true; 494 495 subpass->dep_count++; 496 } 497 } 498 499 pass->max_tilebuffer_count = 500 PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info); 501 502 pass->hw_setup = pvr_create_renderpass_hwsetup(device, pass, false); 503 if (!pass->hw_setup) { 504 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 505 goto err_free_pass; 506 } 507 508 pvr_init_subpass_userpass_spawn(pass->hw_setup, pass, pass->subpasses); 509 510 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { 511 struct pvr_renderpass_hwsetup_render *hw_render = 512 &pass->hw_setup->renders[i]; 513 struct pvr_load_op *load_op = NULL; 514 515 if (hw_render->tile_buffers_count) 516 pvr_finishme("Set up tile buffer table"); 517 518 if (!hw_render->color_init_count) { 519 assert(!hw_render->client_data); 520 continue; 521 } 522 523 if (!pvr_has_output_register_writes(hw_render)) 524 pvr_finishme("Add output register write"); 525 526 result = pvr_load_op_create(device, pAllocator, hw_render, &load_op); 527 if (result != VK_SUCCESS) 528 goto err_load_op_destroy; 529 530 hw_render->client_data = load_op; 531 } 532 533 *pRenderPass = pvr_render_pass_to_handle(pass); 534 535 return VK_SUCCESS; 536 537err_load_op_destroy: 538 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { 539 struct pvr_renderpass_hwsetup_render *hw_render = 540 &pass->hw_setup->renders[i]; 541 542 if (hw_render->client_data) 543 pvr_load_op_destroy(device, pAllocator, hw_render->client_data); 544 } 545 546 pvr_destroy_renderpass_hwsetup(device, pass->hw_setup); 547 548err_free_pass: 549 vk_object_base_finish(&pass->base); 550 vk_free2(&device->vk.alloc, pAllocator, pass); 551 552 return result; 553} 554 555void pvr_DestroyRenderPass(VkDevice _device, 556 VkRenderPass _pass, 557 const VkAllocationCallbacks *pAllocator) 558{ 559 PVR_FROM_HANDLE(pvr_device, device, _device); 560 PVR_FROM_HANDLE(pvr_render_pass, pass, _pass); 561 562 if (!pass) 563 return; 564 565 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) { 566 struct pvr_renderpass_hwsetup_render *hw_render = 567 &pass->hw_setup->renders[i]; 568 569 pvr_load_op_destroy(device, pAllocator, hw_render->client_data); 570 } 571 572 pvr_destroy_renderpass_hwsetup(device, pass->hw_setup); 573 vk_object_base_finish(&pass->base); 574 vk_free2(&device->vk.alloc, pAllocator, pass); 575} 576 577void pvr_GetRenderAreaGranularity(VkDevice _device, 578 VkRenderPass renderPass, 579 VkExtent2D *pGranularity) 580{ 581 PVR_FROM_HANDLE(pvr_device, device, _device); 582 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 583 584 /* Granularity does not depend on any settings in the render pass, so return 585 * the tile granularity. 586 * 587 * The default value is based on the minimum value found in all existing 588 * cores. 589 */ 590 pGranularity->width = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 16); 591 pGranularity->height = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 16); 592} 593