1/* 2 * Copyright © 2022 Imagination Technologies Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <stddef.h> 27#include <stdint.h> 28#include <vulkan/vulkan.h> 29 30#include "hwdef/rogue_hw_utils.h" 31#include "pvr_bo.h" 32#include "pvr_cdm_load_sr.h" 33#include "pvr_csb.h" 34#include "pvr_job_context.h" 35#include "pvr_pds.h" 36#include "pvr_private.h" 37#include "pvr_transfer_eot.h" 38#include "pvr_types.h" 39#include "pvr_vdm_load_sr.h" 40#include "pvr_vdm_store_sr.h" 41#include "pvr_winsys.h" 42#include "util/macros.h" 43#include "vk_alloc.h" 44#include "vk_log.h" 45 46/* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this 47 * value when constructing the control stream? 48 */ 49/* The VDM callstack is used by the hardware to implement control stream links 50 * with a return, i.e. sub-control streams/subroutines. This value specifies the 51 * maximum callstack depth. 52 */ 53#define PVR_VDM_CALLSTACK_MAX_DEPTH 1U 54 55#define ROGUE_PDS_TASK_PROGRAM_SIZE 256U 56 57static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device, 58 struct pvr_reset_cmd *const reset_cmd) 59{ 60 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 61 62 /* The reset framework depends on compute support in the hw. */ 63 assert(PVR_HAS_FEATURE(dev_info, compute)); 64 65 if (PVR_HAS_QUIRK(dev_info, 51764)) 66 pvr_finishme("Missing reset support for brn51764"); 67 68 if (PVR_HAS_QUIRK(dev_info, 58839)) 69 pvr_finishme("Missing reset support for brn58839"); 70 71 return VK_SUCCESS; 72} 73 74static void pvr_ctx_reset_cmd_fini(struct pvr_device *device, 75 struct pvr_reset_cmd *reset_cmd) 76 77{ 78 /* TODO: reset command cleanup. */ 79} 80 81static VkResult pvr_pds_pt_store_program_create_and_upload( 82 struct pvr_device *device, 83 struct pvr_bo *pt_bo, 84 uint32_t pt_bo_size, 85 struct pvr_pds_upload *const pds_upload_out) 86{ 87 struct pvr_pds_stream_out_terminate_program program = { 0 }; 88 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 89 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); 90 size_t staging_buffer_size; 91 uint32_t *staging_buffer; 92 uint32_t *data_buffer; 93 uint32_t *code_buffer; 94 VkResult result; 95 96 /* Check the bo size can be converted to dwords without any rounding. */ 97 assert(pt_bo_size % 4 == 0); 98 99 program.pds_persistent_temp_size_to_store = pt_bo_size / 4; 100 program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr; 101 102 pvr_pds_generate_stream_out_terminate_program(&program, 103 NULL, 104 PDS_GENERATE_SIZES, 105 dev_info); 106 107 staging_buffer_size = (program.stream_out_terminate_pds_data_size + 108 program.stream_out_terminate_pds_code_size) * 109 sizeof(*staging_buffer); 110 111 staging_buffer = vk_zalloc(&device->vk.alloc, 112 staging_buffer_size, 113 8, 114 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 115 if (!staging_buffer) 116 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 117 118 data_buffer = staging_buffer; 119 code_buffer = 120 pvr_pds_generate_stream_out_terminate_program(&program, 121 data_buffer, 122 PDS_GENERATE_DATA_SEGMENT, 123 dev_info); 124 pvr_pds_generate_stream_out_terminate_program(&program, 125 code_buffer, 126 PDS_GENERATE_CODE_SEGMENT, 127 dev_info); 128 129 /* This PDS program is passed to the HW via the PPP state words. These only 130 * allow the data segment address to be specified and expect the code 131 * segment to immediately follow. Assume the code alignment is the same as 132 * the data. 133 */ 134 result = 135 pvr_gpu_upload_pds(device, 136 data_buffer, 137 program.stream_out_terminate_pds_data_size, 138 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), 139 code_buffer, 140 program.stream_out_terminate_pds_code_size, 141 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), 142 cache_line_size, 143 pds_upload_out); 144 145 vk_free(&device->vk.alloc, staging_buffer); 146 147 return result; 148} 149 150static VkResult pvr_pds_pt_resume_program_create_and_upload( 151 struct pvr_device *device, 152 struct pvr_bo *pt_bo, 153 uint32_t pt_bo_size, 154 struct pvr_pds_upload *const pds_upload_out) 155{ 156 struct pvr_pds_stream_out_init_program program = { 0 }; 157 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 158 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); 159 size_t staging_buffer_size; 160 uint32_t *staging_buffer; 161 uint32_t *data_buffer; 162 uint32_t *code_buffer; 163 VkResult result; 164 165 /* Check the bo size can be converted to dwords without any rounding. */ 166 assert(pt_bo_size % 4 == 0); 167 168 program.num_buffers = 1; 169 program.pds_buffer_data_size[0] = pt_bo_size / 4; 170 program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr; 171 172 pvr_pds_generate_stream_out_init_program(&program, 173 NULL, 174 false, 175 PDS_GENERATE_SIZES, 176 dev_info); 177 178 staging_buffer_size = (program.stream_out_init_pds_data_size + 179 program.stream_out_init_pds_code_size) * 180 sizeof(*staging_buffer); 181 182 staging_buffer = vk_zalloc(&device->vk.alloc, 183 staging_buffer_size, 184 8, 185 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 186 if (!staging_buffer) 187 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 188 189 data_buffer = staging_buffer; 190 code_buffer = 191 pvr_pds_generate_stream_out_init_program(&program, 192 data_buffer, 193 false, 194 PDS_GENERATE_DATA_SEGMENT, 195 dev_info); 196 pvr_pds_generate_stream_out_init_program(&program, 197 code_buffer, 198 false, 199 PDS_GENERATE_CODE_SEGMENT, 200 dev_info); 201 202 /* This PDS program is passed to the HW via the PPP state words. These only 203 * allow the data segment address to be specified and expect the code 204 * segment to immediately follow. Assume the code alignment is the same as 205 * the data. 206 */ 207 result = 208 pvr_gpu_upload_pds(device, 209 data_buffer, 210 program.stream_out_init_pds_data_size, 211 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), 212 code_buffer, 213 program.stream_out_init_pds_code_size, 214 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE), 215 cache_line_size, 216 pds_upload_out); 217 218 vk_free(&device->vk.alloc, staging_buffer); 219 220 return result; 221} 222 223static VkResult 224pvr_render_job_pt_programs_setup(struct pvr_device *device, 225 struct rogue_pt_programs *pt_programs) 226{ 227 VkResult result; 228 229 result = pvr_bo_alloc(device, 230 device->heaps.pds_heap, 231 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, 232 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT, 233 PVR_BO_ALLOC_FLAG_CPU_ACCESS, 234 &pt_programs->store_resume_state_bo); 235 if (result != VK_SUCCESS) 236 return result; 237 238 result = pvr_pds_pt_store_program_create_and_upload( 239 device, 240 pt_programs->store_resume_state_bo, 241 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, 242 &pt_programs->pds_store_program); 243 if (result != VK_SUCCESS) 244 goto err_free_store_resume_state_bo; 245 246 result = pvr_pds_pt_resume_program_create_and_upload( 247 device, 248 pt_programs->store_resume_state_bo, 249 ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE, 250 &pt_programs->pds_resume_program); 251 if (result != VK_SUCCESS) 252 goto err_free_pds_store_program; 253 254 return VK_SUCCESS; 255 256err_free_pds_store_program: 257 pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo); 258 259err_free_store_resume_state_bo: 260 pvr_bo_free(device, pt_programs->store_resume_state_bo); 261 262 return result; 263} 264 265static void 266pvr_render_job_pt_programs_cleanup(struct pvr_device *device, 267 struct rogue_pt_programs *pt_programs) 268{ 269 pvr_bo_free(device, pt_programs->pds_resume_program.pvr_bo); 270 pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo); 271 pvr_bo_free(device, pt_programs->store_resume_state_bo); 272} 273 274static void pvr_pds_ctx_sr_program_setup( 275 bool cc_enable, 276 uint64_t usc_program_upload_offset, 277 uint8_t usc_temps, 278 pvr_dev_addr_t sr_addr, 279 struct pvr_pds_shared_storing_program *const program_out) 280{ 281 /* The PDS task is the same for stores and loads. */ 282 *program_out = (struct pvr_pds_shared_storing_program){ 283 .cc_enable = cc_enable, 284 .doutw_control = { 285 .dest_store = PDS_UNIFIED_STORE, 286 .num_const64 = 2, 287 .doutw_data = { 288 [0] = sr_addr.addr, 289 [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE, 290 }, 291 .last_instruction = false, 292 }, 293 }; 294 295 pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control, 296 usc_program_upload_offset, 297 usc_temps, 298 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), 299 false); 300} 301 302/* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to 303 * this. If there is a problem here it's likely that the same problem exists 304 * there so don't forget to update the compute function. 305 */ 306static VkResult pvr_pds_render_ctx_sr_program_create_and_upload( 307 struct pvr_device *device, 308 uint64_t usc_program_upload_offset, 309 uint8_t usc_temps, 310 pvr_dev_addr_t sr_addr, 311 struct pvr_pds_upload *const pds_upload_out) 312{ 313 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 314 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); 315 const uint32_t pds_data_alignment = 316 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; 317 318 /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data 319 * and code size when using the PDS_GENERATE_SIZES mode. 320 */ 321 STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0); 322 uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 }; 323 struct pvr_pds_shared_storing_program program; 324 ASSERTED uint32_t *buffer_end; 325 uint32_t code_offset; 326 327 pvr_pds_ctx_sr_program_setup(false, 328 usc_program_upload_offset, 329 usc_temps, 330 sr_addr, 331 &program); 332 333 pvr_pds_generate_shared_storing_program(&program, 334 &staging_buffer[0], 335 PDS_GENERATE_DATA_SEGMENT, 336 dev_info); 337 338 code_offset = ALIGN_POT(program.data_size, pds_data_alignment); 339 340 buffer_end = 341 pvr_pds_generate_shared_storing_program(&program, 342 &staging_buffer[code_offset], 343 PDS_GENERATE_CODE_SEGMENT, 344 dev_info); 345 346 assert((uint32_t)(buffer_end - staging_buffer) * 4 < 347 ROGUE_PDS_TASK_PROGRAM_SIZE); 348 349 return pvr_gpu_upload_pds(device, 350 &staging_buffer[0], 351 program.data_size, 352 PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT), 353 &staging_buffer[code_offset], 354 program.code_size, 355 PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT), 356 cache_line_size, 357 pds_upload_out); 358} 359 360/* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to 361 * this. If there is a problem here it's likely that the same problem exists 362 * there so don't forget to update the render_ctx function. 363 */ 364static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload( 365 struct pvr_device *device, 366 bool is_loading_program, 367 uint64_t usc_program_upload_offset, 368 uint8_t usc_temps, 369 pvr_dev_addr_t sr_addr, 370 struct pvr_pds_upload *const pds_upload_out) 371{ 372 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 373 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); 374 const uint32_t pds_data_alignment = 375 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; 376 377 /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data 378 * and code size when using the PDS_GENERATE_SIZES mode. 379 */ 380 STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0); 381 uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 }; 382 struct pvr_pds_shared_storing_program program; 383 uint32_t *buffer_ptr; 384 uint32_t code_offset; 385 386 pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421), 387 usc_program_upload_offset, 388 usc_temps, 389 sr_addr, 390 &program); 391 392 if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { 393 pvr_pds_generate_compute_shared_loading_program(&program, 394 &staging_buffer[0], 395 PDS_GENERATE_DATA_SEGMENT, 396 dev_info); 397 } else { 398 pvr_pds_generate_shared_storing_program(&program, 399 &staging_buffer[0], 400 PDS_GENERATE_DATA_SEGMENT, 401 dev_info); 402 } 403 404 code_offset = ALIGN_POT(program.data_size, pds_data_alignment); 405 406 buffer_ptr = 407 pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset], 408 PDS_GENERATE_CODE_SEGMENT); 409 410 if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { 411 buffer_ptr = pvr_pds_generate_compute_shared_loading_program( 412 &program, 413 buffer_ptr, 414 PDS_GENERATE_CODE_SEGMENT, 415 dev_info); 416 } else { 417 buffer_ptr = 418 pvr_pds_generate_shared_storing_program(&program, 419 buffer_ptr, 420 PDS_GENERATE_CODE_SEGMENT, 421 dev_info); 422 } 423 424 assert((uint32_t)(buffer_ptr - staging_buffer) * 4 < 425 ROGUE_PDS_TASK_PROGRAM_SIZE); 426 427 STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) == 428 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT)); 429 430 STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) == 431 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT)); 432 433 return pvr_gpu_upload_pds( 434 device, 435 &staging_buffer[0], 436 program.data_size, 437 PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT), 438 &staging_buffer[code_offset], 439 (uint32_t)(buffer_ptr - &staging_buffer[code_offset]), 440 PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT), 441 cache_line_size, 442 pds_upload_out); 443} 444 445enum pvr_ctx_sr_program_target { 446 PVR_CTX_SR_RENDER_TARGET, 447 PVR_CTX_SR_COMPUTE_TARGET, 448}; 449 450static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device, 451 enum pvr_ctx_sr_program_target target, 452 struct rogue_sr_programs *sr_programs) 453{ 454 const uint64_t store_load_state_bo_size = 455 PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) + 456 ROGUE_LLS_SHARED_REGS_RESERVE_SIZE; 457 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 458 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); 459 uint64_t usc_store_program_upload_offset; 460 uint64_t usc_load_program_upload_offset; 461 const uint8_t *usc_load_sr_code; 462 uint32_t usc_load_sr_code_size; 463 VkResult result; 464 465 /* Note that this is being used for both compute and render ctx. There is no 466 * compute equivalent define for the VDMCTRL unit size. 467 */ 468 /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */ 469 sr_programs->usc.unified_size = 470 DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE)); 471 472 result = pvr_bo_alloc(device, 473 device->heaps.pds_heap, 474 store_load_state_bo_size, 475 cache_line_size, 476 PVR_WINSYS_BO_FLAG_CPU_ACCESS, 477 &sr_programs->store_load_state_bo); 478 if (result != VK_SUCCESS) 479 return result; 480 481 /* USC state update: SR state store. */ 482 483 assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); 484 485 result = pvr_gpu_upload_usc(device, 486 pvr_vdm_store_sr_code, 487 sizeof(pvr_vdm_store_sr_code), 488 cache_line_size, 489 &sr_programs->usc.store_program_bo); 490 if (result != VK_SUCCESS) 491 goto err_free_store_load_state_bo; 492 493 usc_store_program_upload_offset = 494 sr_programs->usc.store_program_bo->vma->dev_addr.addr - 495 device->heaps.usc_heap->base_addr.addr; 496 497 /* USC state update: SR state load. */ 498 499 if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) { 500 STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); 501 502 usc_load_sr_code = pvr_cdm_load_sr_code; 503 usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code); 504 } else { 505 STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE); 506 507 usc_load_sr_code = pvr_vdm_load_sr_code; 508 usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code); 509 } 510 511 result = pvr_gpu_upload_usc(device, 512 usc_load_sr_code, 513 usc_load_sr_code_size, 514 cache_line_size, 515 &sr_programs->usc.load_program_bo); 516 if (result != VK_SUCCESS) 517 goto err_free_usc_store_program_bo; 518 519 usc_load_program_upload_offset = 520 sr_programs->usc.load_program_bo->vma->dev_addr.addr - 521 device->heaps.usc_heap->base_addr.addr; 522 523 /* FIXME: The number of USC temps should be output alongside 524 * pvr_vdm_store_sr_code rather than hard coded. 525 */ 526 /* Create and upload the PDS load and store programs. Point them to the 527 * appropriate USC load and store programs. 528 */ 529 switch (target) { 530 case PVR_CTX_SR_RENDER_TARGET: 531 /* PDS state update: SR state store. */ 532 result = pvr_pds_render_ctx_sr_program_create_and_upload( 533 device, 534 usc_store_program_upload_offset, 535 8, 536 sr_programs->store_load_state_bo->vma->dev_addr, 537 &sr_programs->pds.store_program); 538 if (result != VK_SUCCESS) 539 goto err_free_usc_load_program_bo; 540 541 /* PDS state update: SR state load. */ 542 result = pvr_pds_render_ctx_sr_program_create_and_upload( 543 device, 544 usc_load_program_upload_offset, 545 20, 546 sr_programs->store_load_state_bo->vma->dev_addr, 547 &sr_programs->pds.load_program); 548 if (result != VK_SUCCESS) 549 goto err_free_pds_store_program_bo; 550 551 break; 552 553 case PVR_CTX_SR_COMPUTE_TARGET: 554 /* PDS state update: SR state store. */ 555 result = pvr_pds_compute_ctx_sr_program_create_and_upload( 556 device, 557 false, 558 usc_store_program_upload_offset, 559 8, 560 sr_programs->store_load_state_bo->vma->dev_addr, 561 &sr_programs->pds.store_program); 562 if (result != VK_SUCCESS) 563 goto err_free_usc_load_program_bo; 564 565 /* PDS state update: SR state load. */ 566 result = pvr_pds_compute_ctx_sr_program_create_and_upload( 567 device, 568 true, 569 usc_load_program_upload_offset, 570 20, 571 sr_programs->store_load_state_bo->vma->dev_addr, 572 &sr_programs->pds.load_program); 573 if (result != VK_SUCCESS) 574 goto err_free_pds_store_program_bo; 575 576 break; 577 578 default: 579 unreachable("Invalid target."); 580 break; 581 } 582 583 return VK_SUCCESS; 584 585err_free_pds_store_program_bo: 586 pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo); 587 588err_free_usc_load_program_bo: 589 pvr_bo_free(device, sr_programs->usc.load_program_bo); 590 591err_free_usc_store_program_bo: 592 pvr_bo_free(device, sr_programs->usc.store_program_bo); 593 594err_free_store_load_state_bo: 595 pvr_bo_free(device, sr_programs->store_load_state_bo); 596 597 return VK_SUCCESS; 598} 599 600static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device, 601 struct rogue_sr_programs *sr_programs) 602{ 603 pvr_bo_free(device, sr_programs->pds.load_program.pvr_bo); 604 pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo); 605 pvr_bo_free(device, sr_programs->usc.load_program_bo); 606 pvr_bo_free(device, sr_programs->usc.store_program_bo); 607 pvr_bo_free(device, sr_programs->store_load_state_bo); 608} 609 610static VkResult 611pvr_render_ctx_switch_programs_setup(struct pvr_device *device, 612 struct pvr_render_ctx_programs *programs) 613{ 614 VkResult result; 615 616 result = pvr_render_job_pt_programs_setup(device, &programs->pt); 617 if (result != VK_SUCCESS) 618 return result; 619 620 result = pvr_ctx_sr_programs_setup(device, 621 PVR_CTX_SR_RENDER_TARGET, 622 &programs->sr); 623 if (result != VK_SUCCESS) 624 goto err_pt_programs_cleanup; 625 626 return VK_SUCCESS; 627 628err_pt_programs_cleanup: 629 pvr_render_job_pt_programs_cleanup(device, &programs->pt); 630 631 return result; 632} 633 634static void 635pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device, 636 struct pvr_render_ctx_programs *programs) 637{ 638 pvr_ctx_sr_programs_cleanup(device, &programs->sr); 639 pvr_render_job_pt_programs_cleanup(device, &programs->pt); 640} 641 642static VkResult pvr_render_ctx_switch_init(struct pvr_device *device, 643 struct pvr_render_ctx *ctx) 644{ 645 struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch; 646 const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | 647 PVR_BO_ALLOC_FLAG_CPU_ACCESS; 648 const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED | 649 PVR_BO_ALLOC_FLAG_CPU_ACCESS; 650 VkResult result; 651 652 result = pvr_bo_alloc(device, 653 device->heaps.general_heap, 654 ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE, 655 ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT, 656 vdm_state_bo_flags, 657 &ctx_switch->vdm_state_bo); 658 if (result != VK_SUCCESS) 659 return result; 660 661 result = pvr_bo_alloc(device, 662 device->heaps.general_heap, 663 ROGUE_LLS_TA_STATE_BUFFER_SIZE, 664 ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT, 665 geom_state_bo_flags, 666 &ctx_switch->geom_state_bo); 667 if (result != VK_SUCCESS) 668 goto err_pvr_bo_free_vdm_state_bo; 669 670 for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { 671 result = 672 pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]); 673 if (result) 674 goto err_programs_cleanup; 675 } 676 677 return result; 678 679err_programs_cleanup: 680 for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { 681 pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]); 682 } 683 684 pvr_bo_free(device, ctx_switch->geom_state_bo); 685 686err_pvr_bo_free_vdm_state_bo: 687 pvr_bo_free(device, ctx_switch->vdm_state_bo); 688 689 return result; 690} 691 692static void pvr_render_ctx_switch_fini(struct pvr_device *device, 693 struct pvr_render_ctx *ctx) 694{ 695 struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch; 696 697 for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) { 698 pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]); 699 } 700 701 pvr_bo_free(device, ctx_switch->geom_state_bo); 702 pvr_bo_free(device, ctx_switch->vdm_state_bo); 703} 704 705static void 706pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program, 707 enum PVRX(VDMCTRL_USC_TARGET) usc_target, 708 uint8_t usc_unified_size, 709 uint32_t *const state0_out, 710 uint32_t *const state1_out) 711{ 712 pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) { 713 /* Convert the data size from dwords to bytes. */ 714 const uint32_t pds_data_size = pds_program->data_size * 4; 715 716 state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM); 717 state.usc_target = usc_target; 718 state.usc_common_size = 0; 719 state.usc_unified_size = usc_unified_size; 720 state.pds_temp_size = 0; 721 722 assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == 723 0); 724 state.pds_data_size = 725 pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); 726 }; 727 728 pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) { 729 state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset); 730 state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS); 731 state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS); 732 } 733} 734 735static void 736pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program, 737 uint32_t *const stream_out1_out, 738 uint32_t *const stream_out2_out) 739{ 740 pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) { 741 /* Convert the data size from dwords to bytes. */ 742 const uint32_t pds_data_size = pds_program->data_size * 4; 743 744 state.sync = true; 745 746 assert(pds_data_size % 747 PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) == 748 0); 749 state.pds_data_size = 750 pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE); 751 752 state.pds_temp_size = 0; 753 } 754 755 pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) { 756 state.pds_data_addr = PVR_DEV_ADDR(pds_program->data_offset); 757 } 758} 759 760static void pvr_render_ctx_ws_static_state_init( 761 struct pvr_render_ctx *ctx, 762 struct pvr_winsys_render_ctx_static_state *static_state) 763{ 764 uint64_t *q_dst; 765 uint32_t *d_dst; 766 767 q_dst = &static_state->vdm_ctx_state_base_addr; 768 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STATE_BASE, base) { 769 base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr; 770 } 771 772 q_dst = &static_state->geom_ctx_state_base_addr; 773 pvr_csb_pack (q_dst, CR_TA_CONTEXT_STATE_BASE, base) { 774 base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr; 775 } 776 777 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) { 778 struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt; 779 struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr; 780 781 /* Context store state. */ 782 q_dst = &static_state->geom_state[i].vdm_ctx_store_task0; 783 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK0, task0) { 784 pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program, 785 PVRX(VDMCTRL_USC_TARGET_ANY), 786 sr_prog->usc.unified_size, 787 &task0.pds_state0, 788 &task0.pds_state1); 789 } 790 791 d_dst = &static_state->geom_state[i].vdm_ctx_store_task1; 792 pvr_csb_pack (d_dst, CR_VDM_CONTEXT_STORE_TASK1, task1) { 793 pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) { 794 state.pds_code_addr = 795 PVR_DEV_ADDR(sr_prog->pds.store_program.code_offset); 796 } 797 } 798 799 q_dst = &static_state->geom_state[i].vdm_ctx_store_task2; 800 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_STORE_TASK2, task2) { 801 pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program, 802 &task2.stream_out1, 803 &task2.stream_out2); 804 } 805 806 /* Context resume state. */ 807 q_dst = &static_state->geom_state[i].vdm_ctx_resume_task0; 808 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) { 809 pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program, 810 PVRX(VDMCTRL_USC_TARGET_ALL), 811 sr_prog->usc.unified_size, 812 &task0.pds_state0, 813 &task0.pds_state1); 814 } 815 816 d_dst = &static_state->geom_state[i].vdm_ctx_resume_task1; 817 pvr_csb_pack (d_dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) { 818 pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) { 819 state.pds_code_addr = 820 PVR_DEV_ADDR(sr_prog->pds.load_program.code_offset); 821 } 822 } 823 824 q_dst = &static_state->geom_state[i].vdm_ctx_resume_task2; 825 pvr_csb_pack (q_dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) { 826 pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program, 827 &task2.stream_out1, 828 &task2.stream_out2); 829 } 830 } 831} 832 833static void pvr_render_ctx_ws_create_info_init( 834 struct pvr_render_ctx *ctx, 835 enum pvr_winsys_ctx_priority priority, 836 struct pvr_winsys_render_ctx_create_info *create_info) 837{ 838 create_info->priority = priority; 839 create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr; 840 841 pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state); 842} 843 844VkResult pvr_render_ctx_create(struct pvr_device *device, 845 enum pvr_winsys_ctx_priority priority, 846 struct pvr_render_ctx **const ctx_out) 847{ 848 const uint64_t vdm_callstack_size = 849 sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH; 850 struct pvr_winsys_render_ctx_create_info create_info; 851 struct pvr_render_ctx *ctx; 852 VkResult result; 853 854 ctx = vk_alloc(&device->vk.alloc, 855 sizeof(*ctx), 856 8, 857 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 858 if (!ctx) 859 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 860 861 ctx->device = device; 862 863 result = pvr_bo_alloc(device, 864 device->heaps.general_heap, 865 vdm_callstack_size, 866 PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT), 867 0, 868 &ctx->vdm_callstack_bo); 869 if (result != VK_SUCCESS) 870 goto err_vk_free_ctx; 871 872 result = pvr_render_ctx_switch_init(device, ctx); 873 if (result != VK_SUCCESS) 874 goto err_free_vdm_callstack_bo; 875 876 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd); 877 if (result != VK_SUCCESS) 878 goto err_render_ctx_switch_fini; 879 880 /* ctx must be fully initialized by this point since 881 * pvr_render_ctx_ws_create_info_init() depends on this. 882 */ 883 pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info); 884 885 result = device->ws->ops->render_ctx_create(device->ws, 886 &create_info, 887 &ctx->ws_ctx); 888 if (result != VK_SUCCESS) 889 goto err_render_ctx_reset_cmd_fini; 890 891 *ctx_out = ctx; 892 893 return VK_SUCCESS; 894 895err_render_ctx_reset_cmd_fini: 896 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); 897 898err_render_ctx_switch_fini: 899 pvr_render_ctx_switch_fini(device, ctx); 900 901err_free_vdm_callstack_bo: 902 pvr_bo_free(device, ctx->vdm_callstack_bo); 903 904err_vk_free_ctx: 905 vk_free(&device->vk.alloc, ctx); 906 907 return result; 908} 909 910void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx) 911{ 912 struct pvr_device *device = ctx->device; 913 914 device->ws->ops->render_ctx_destroy(ctx->ws_ctx); 915 916 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); 917 pvr_render_ctx_switch_fini(device, ctx); 918 pvr_bo_free(device, ctx->vdm_callstack_bo); 919 vk_free(&device->vk.alloc, ctx); 920} 921 922static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload( 923 struct pvr_device *device, 924 struct pvr_pds_upload *const pds_upload_out) 925{ 926 const uint32_t pds_data_alignment = 927 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; 928 const struct pvr_device_runtime_info *dev_runtime_info = 929 &device->pdevice->dev_runtime_info; 930 ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 931 uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U]; 932 struct pvr_pds_fence_program program = { 0 }; 933 ASSERTED uint32_t *buffer_end; 934 uint32_t code_offset; 935 uint32_t data_size; 936 937 /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */ 938 assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) && 939 dev_runtime_info->num_phantoms >= 2)); 940 941 pvr_pds_generate_fence_terminate_program(&program, 942 staging_buffer, 943 PDS_GENERATE_DATA_SEGMENT, 944 &device->pdevice->dev_info); 945 946 /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size 947 * when we generate the code segment. Implement 948 * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change? 949 * This behavior doesn't seem consistent with the rest of the api. For now 950 * we store the size in a variable. 951 */ 952 data_size = program.data_size; 953 code_offset = ALIGN_POT(program.data_size, pds_data_alignment); 954 955 buffer_end = 956 pvr_pds_generate_fence_terminate_program(&program, 957 &staging_buffer[code_offset], 958 PDS_GENERATE_CODE_SEGMENT, 959 &device->pdevice->dev_info); 960 961 assert((uint64_t)(buffer_end - staging_buffer) * 4U < 962 ROGUE_PDS_TASK_PROGRAM_SIZE); 963 964 return pvr_gpu_upload_pds(device, 965 staging_buffer, 966 data_size, 967 PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT), 968 &staging_buffer[code_offset], 969 program.code_size, 970 PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT), 971 0, 972 pds_upload_out); 973} 974 975static void pvr_compute_ctx_ws_static_state_init( 976 const struct pvr_device_info *const dev_info, 977 const struct pvr_compute_ctx *const ctx, 978 struct pvr_winsys_compute_ctx_static_state *const static_state) 979{ 980 const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch; 981 982 /* CR_CDM_CONTEXT_... use state store program info. */ 983 984 pvr_csb_pack (&static_state->cdm_ctx_store_pds0, 985 CR_CDM_CONTEXT_PDS0, 986 state) { 987 state.data_addr = 988 PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.data_offset); 989 state.code_addr = 990 PVR_DEV_ADDR(ctx_switch->sr[0].pds.store_program.code_offset); 991 } 992 993 pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b, 994 CR_CDM_CONTEXT_PDS0, 995 state) { 996 state.data_addr = 997 PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.data_offset); 998 state.code_addr = 999 PVR_DEV_ADDR(ctx_switch->sr[1].pds.store_program.code_offset); 1000 } 1001 1002 pvr_csb_pack (&static_state->cdm_ctx_store_pds1, 1003 CR_CDM_CONTEXT_PDS1, 1004 state) { 1005 /* Convert the data size from dwords to bytes. */ 1006 const uint32_t store_program_data_size = 1007 ctx_switch->sr[0].pds.store_program.data_size * 4U; 1008 1009 state.pds_seq_dep = true; 1010 state.usc_seq_dep = false; 1011 state.target = true; 1012 state.unified_size = ctx_switch->sr[0].usc.unified_size; 1013 state.common_shared = false; 1014 state.common_size = 0; 1015 state.temp_size = 0; 1016 1017 assert(store_program_data_size % 1018 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == 1019 0); 1020 state.data_size = store_program_data_size / 1021 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); 1022 1023 state.fence = true; 1024 } 1025 1026 /* CR_CDM_TERMINATE_... use fence terminate info. */ 1027 1028 pvr_csb_pack (&static_state->cdm_ctx_terminate_pds, 1029 CR_CDM_TERMINATE_PDS, 1030 state) { 1031 state.data_addr = 1032 PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.data_offset); 1033 state.code_addr = 1034 PVR_DEV_ADDR(ctx_switch->sr_fence_terminate_program.code_offset); 1035 } 1036 1037 pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1, 1038 CR_CDM_TERMINATE_PDS1, 1039 state) { 1040 /* Convert the data size from dwords to bytes. */ 1041 const uint32_t fence_terminate_program_data_size = 1042 ctx_switch->sr_fence_terminate_program.data_size * 4U; 1043 1044 state.pds_seq_dep = true; 1045 state.usc_seq_dep = false; 1046 state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable); 1047 state.unified_size = 0; 1048 /* Common store is for shareds -- this will free the partitions. */ 1049 state.common_shared = true; 1050 state.common_size = 0; 1051 state.temp_size = 0; 1052 1053 assert(fence_terminate_program_data_size % 1054 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) == 1055 0); 1056 state.data_size = fence_terminate_program_data_size / 1057 PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE); 1058 state.fence = true; 1059 } 1060 1061 /* CR_CDM_RESUME_... use state load program info. */ 1062 1063 pvr_csb_pack (&static_state->cdm_ctx_resume_pds0, 1064 CR_CDM_CONTEXT_LOAD_PDS0, 1065 state) { 1066 state.data_addr = 1067 PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.data_offset); 1068 state.code_addr = 1069 PVR_DEV_ADDR(ctx_switch->sr[0].pds.load_program.code_offset); 1070 } 1071 1072 pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b, 1073 CR_CDM_CONTEXT_LOAD_PDS0, 1074 state) { 1075 state.data_addr = 1076 PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.data_offset); 1077 state.code_addr = 1078 PVR_DEV_ADDR(ctx_switch->sr[1].pds.load_program.code_offset); 1079 } 1080} 1081 1082static void pvr_compute_ctx_ws_create_info_init( 1083 const struct pvr_compute_ctx *const ctx, 1084 enum pvr_winsys_ctx_priority priority, 1085 struct pvr_winsys_compute_ctx_create_info *const create_info) 1086{ 1087 create_info->priority = priority; 1088 1089 pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info, 1090 ctx, 1091 &create_info->static_state); 1092} 1093 1094VkResult pvr_compute_ctx_create(struct pvr_device *const device, 1095 enum pvr_winsys_ctx_priority priority, 1096 struct pvr_compute_ctx **const ctx_out) 1097{ 1098 struct pvr_winsys_compute_ctx_create_info create_info; 1099 struct pvr_compute_ctx *ctx; 1100 VkResult result; 1101 1102 ctx = vk_alloc(&device->vk.alloc, 1103 sizeof(*ctx), 1104 8, 1105 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1106 if (!ctx) 1107 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1108 1109 ctx->device = device; 1110 1111 result = pvr_bo_alloc( 1112 device, 1113 device->heaps.general_heap, 1114 rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info), 1115 rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info), 1116 PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED, 1117 &ctx->ctx_switch.compute_state_bo); 1118 if (result != VK_SUCCESS) 1119 goto err_free_ctx; 1120 1121 /* TODO: Change this so that enabling storage to B doesn't change the array 1122 * size. Instead of looping we could unroll this and have the second 1123 * programs setup depending on the B enable. Doing it that way would make 1124 * things more obvious. 1125 */ 1126 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) { 1127 result = pvr_ctx_sr_programs_setup(device, 1128 PVR_CTX_SR_COMPUTE_TARGET, 1129 &ctx->ctx_switch.sr[i]); 1130 if (result != VK_SUCCESS) { 1131 for (uint32_t j = 0; j < i; j++) 1132 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]); 1133 1134 goto err_free_state_buffer; 1135 } 1136 } 1137 1138 result = pvr_pds_sr_fence_terminate_program_create_and_upload( 1139 device, 1140 &ctx->ctx_switch.sr_fence_terminate_program); 1141 if (result != VK_SUCCESS) 1142 goto err_free_sr_programs; 1143 1144 pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info); 1145 1146 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd); 1147 if (result != VK_SUCCESS) 1148 goto err_free_pds_fence_terminate_program; 1149 1150 result = device->ws->ops->compute_ctx_create(device->ws, 1151 &create_info, 1152 &ctx->ws_ctx); 1153 if (result != VK_SUCCESS) 1154 goto err_fini_reset_cmd; 1155 1156 *ctx_out = ctx; 1157 1158 return VK_SUCCESS; 1159 1160err_fini_reset_cmd: 1161 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); 1162 1163err_free_pds_fence_terminate_program: 1164 pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo); 1165 1166err_free_sr_programs: 1167 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i) 1168 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]); 1169 1170err_free_state_buffer: 1171 pvr_bo_free(device, ctx->ctx_switch.compute_state_bo); 1172 1173err_free_ctx: 1174 vk_free(&device->vk.alloc, ctx); 1175 1176 return result; 1177} 1178 1179void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx) 1180{ 1181 struct pvr_device *device = ctx->device; 1182 1183 device->ws->ops->compute_ctx_destroy(ctx->ws_ctx); 1184 1185 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); 1186 1187 pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo); 1188 for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i) 1189 pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]); 1190 1191 pvr_bo_free(device, ctx->ctx_switch.compute_state_bo); 1192 1193 vk_free(&device->vk.alloc, ctx); 1194} 1195 1196static void pvr_transfer_ctx_ws_create_info_init( 1197 enum pvr_winsys_ctx_priority priority, 1198 struct pvr_winsys_transfer_ctx_create_info *const create_info) 1199{ 1200 create_info->priority = priority; 1201} 1202 1203static VkResult pvr_transfer_ctx_setup_shaders(struct pvr_device *device, 1204 struct pvr_transfer_ctx *ctx) 1205{ 1206 const uint32_t cache_line_size = 1207 rogue_get_slc_cache_line_size(&device->pdevice->dev_info); 1208 VkResult result; 1209 1210 /* TODO: Setup USC fragments. */ 1211 1212 /* Setup EOT program. */ 1213 result = pvr_gpu_upload_usc(device, 1214 pvr_transfer_eot_usc_code, 1215 sizeof(pvr_transfer_eot_usc_code), 1216 cache_line_size, 1217 &ctx->usc_eot_bo); 1218 if (result != VK_SUCCESS) 1219 return result; 1220 1221 STATIC_ASSERT(ARRAY_SIZE(pvr_transfer_eot_usc_offsets) == 1222 ARRAY_SIZE(ctx->transfer_mrts)); 1223 for (uint32_t i = 0U; i < ARRAY_SIZE(pvr_transfer_eot_usc_offsets); i++) { 1224 ctx->transfer_mrts[i] = 1225 PVR_DEV_ADDR_OFFSET(ctx->usc_eot_bo->vma->dev_addr, 1226 pvr_transfer_eot_usc_offsets[i]); 1227 } 1228 1229 return VK_SUCCESS; 1230} 1231 1232static void pvr_transfer_ctx_fini_shaders(struct pvr_device *device, 1233 struct pvr_transfer_ctx *ctx) 1234{ 1235 pvr_bo_free(device, ctx->usc_eot_bo); 1236} 1237 1238VkResult pvr_transfer_ctx_create(struct pvr_device *const device, 1239 enum pvr_winsys_ctx_priority priority, 1240 struct pvr_transfer_ctx **const ctx_out) 1241{ 1242 struct pvr_winsys_transfer_ctx_create_info create_info; 1243 struct pvr_transfer_ctx *ctx; 1244 VkResult result; 1245 1246 ctx = vk_zalloc(&device->vk.alloc, 1247 sizeof(*ctx), 1248 8U, 1249 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1250 if (!ctx) 1251 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1252 1253 ctx->device = device; 1254 1255 result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd); 1256 if (result != VK_SUCCESS) 1257 goto err_free_ctx; 1258 1259 pvr_transfer_ctx_ws_create_info_init(priority, &create_info); 1260 1261 result = device->ws->ops->transfer_ctx_create(device->ws, 1262 &create_info, 1263 &ctx->ws_ctx); 1264 if (result != VK_SUCCESS) 1265 goto err_fini_reset_cmd; 1266 1267 result = pvr_transfer_ctx_setup_shaders(device, ctx); 1268 if (result != VK_SUCCESS) 1269 goto err_destroy_transfer_ctx; 1270 1271 /* Create the PDS Uniform/Tex state code segment array. */ 1272 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) { 1273 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) { 1274 if (i == 0U && j == 0U) 1275 continue; 1276 1277 result = pvr_pds_unitex_state_program_create_and_upload( 1278 device, 1279 NULL, 1280 i, 1281 j, 1282 &ctx->pds_unitex_code[i][j]); 1283 if (result != VK_SUCCESS) { 1284 goto err_free_pds_unitex_bos; 1285 } 1286 } 1287 } 1288 1289 *ctx_out = ctx; 1290 1291 return VK_SUCCESS; 1292 1293err_free_pds_unitex_bos: 1294 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) { 1295 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) { 1296 if (!ctx->pds_unitex_code[i][j].pvr_bo) 1297 continue; 1298 1299 pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo); 1300 } 1301 } 1302 1303 pvr_transfer_ctx_fini_shaders(device, ctx); 1304 1305err_destroy_transfer_ctx: 1306 device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx); 1307 1308err_fini_reset_cmd: 1309 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); 1310 1311err_free_ctx: 1312 vk_free(&device->vk.alloc, ctx); 1313 1314 return result; 1315} 1316 1317void pvr_transfer_ctx_destroy(struct pvr_transfer_ctx *const ctx) 1318{ 1319 struct pvr_device *device = ctx->device; 1320 1321 for (uint32_t i = 0U; i < ARRAY_SIZE(ctx->pds_unitex_code); i++) { 1322 for (uint32_t j = 0U; j < ARRAY_SIZE(ctx->pds_unitex_code[0U]); j++) { 1323 if (!ctx->pds_unitex_code[i][j].pvr_bo) 1324 continue; 1325 1326 pvr_bo_free(device, ctx->pds_unitex_code[i][j].pvr_bo); 1327 } 1328 } 1329 1330 pvr_transfer_ctx_fini_shaders(device, ctx); 1331 device->ws->ops->transfer_ctx_destroy(ctx->ws_ctx); 1332 pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd); 1333 vk_free(&device->vk.alloc, ctx); 1334} 1335