1/* 2 * Copyright © 2022 Imagination Technologies Ltd. 3 * 4 * based in part on anv driver which is: 5 * Copyright © 2015 Intel Corporation 6 * 7 * based in part on v3dv driver which is: 8 * Copyright © 2019 Raspberry Pi 9 * 10 * Permission is hereby granted, free of charge, to any person obtaining a copy 11 * of this software and associated documentation files (the "Software"), to deal 12 * in the Software without restriction, including without limitation the rights 13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 * copies of the Software, and to permit persons to whom the Software is 15 * furnished to do so, subject to the following conditions: 16 * 17 * The above copyright notice and this permission notice (including the next 18 * paragraph) shall be included in all copies or substantial portions of the 19 * Software. 20 * 21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 * SOFTWARE. 28 */ 29 30#include <assert.h> 31#include <fcntl.h> 32#include <inttypes.h> 33#include <stdbool.h> 34#include <stddef.h> 35#include <stdint.h> 36#include <stdlib.h> 37#include <string.h> 38#include <vulkan/vulkan.h> 39#include <xf86drm.h> 40 41#include "hwdef/rogue_hw_utils.h" 42#include "pipe/p_defines.h" 43#include "pvr_bo.h" 44#include "pvr_csb.h" 45#include "pvr_csb_enum_helpers.h" 46#include "pvr_debug.h" 47#include "pvr_device_info.h" 48#include "pvr_hardcode.h" 49#include "pvr_job_render.h" 50#include "pvr_limits.h" 51#include "pvr_nop_usc.h" 52#include "pvr_pds.h" 53#include "pvr_private.h" 54#include "pvr_tex_state.h" 55#include "pvr_types.h" 56#include "pvr_winsys.h" 57#include "rogue/rogue_compiler.h" 58#include "util/build_id.h" 59#include "util/log.h" 60#include "util/mesa-sha1.h" 61#include "util/os_misc.h" 62#include "util/u_math.h" 63#include "vk_alloc.h" 64#include "vk_log.h" 65#include "vk_object.h" 66#include "vk_util.h" 67 68#define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U) 69#define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U) 70#define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U) 71 72/* The grow threshold is a percentage. This is intended to be 12.5%, but has 73 * been rounded up since the percentage is treated as an integer. 74 */ 75#define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U 76 77#if defined(VK_USE_PLATFORM_DISPLAY_KHR) 78# define PVR_USE_WSI_PLATFORM 79#endif 80 81#define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION) 82 83#define DEF_DRIVER(str_name) \ 84 { \ 85 .name = str_name, .len = sizeof(str_name) - 1 \ 86 } 87 88struct pvr_drm_device_info { 89 const char *name; 90 size_t len; 91}; 92 93/* This is the list of supported DRM display drivers. */ 94static const struct pvr_drm_device_info pvr_display_devices[] = { 95 DEF_DRIVER("mediatek-drm"), 96 DEF_DRIVER("ti,am65x-dss"), 97}; 98 99/* This is the list of supported DRM render drivers. */ 100static const struct pvr_drm_device_info pvr_render_devices[] = { 101 DEF_DRIVER("mediatek,mt8173-gpu"), 102 DEF_DRIVER("ti,am62-gpu"), 103}; 104 105#undef DEF_DRIVER 106 107static const struct vk_instance_extension_table pvr_instance_extensions = { 108#if defined(VK_USE_PLATFORM_DISPLAY_KHR) 109 .KHR_display = true, 110#endif 111 .KHR_external_memory_capabilities = true, 112 .KHR_get_physical_device_properties2 = true, 113#if defined(PVR_USE_WSI_PLATFORM) 114 .KHR_surface = true, 115#endif 116 .EXT_debug_report = true, 117 .EXT_debug_utils = true, 118}; 119 120static void pvr_physical_device_get_supported_extensions( 121 const struct pvr_physical_device *pdevice, 122 struct vk_device_extension_table *extensions) 123{ 124 /* clang-format off */ 125 *extensions = (struct vk_device_extension_table){ 126 .KHR_external_memory = true, 127 .KHR_external_memory_fd = true, 128#if defined(PVR_USE_WSI_PLATFORM) 129 .KHR_swapchain = true, 130#endif 131 .EXT_external_memory_dma_buf = true, 132 .EXT_private_data = true, 133 }; 134 /* clang-format on */ 135} 136 137VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion) 138{ 139 *pApiVersion = PVR_API_VERSION; 140 return VK_SUCCESS; 141} 142 143VkResult 144pvr_EnumerateInstanceExtensionProperties(const char *pLayerName, 145 uint32_t *pPropertyCount, 146 VkExtensionProperties *pProperties) 147{ 148 if (pLayerName) 149 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 150 151 return vk_enumerate_instance_extension_properties(&pvr_instance_extensions, 152 pPropertyCount, 153 pProperties); 154} 155 156VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, 157 const VkAllocationCallbacks *pAllocator, 158 VkInstance *pInstance) 159{ 160 struct vk_instance_dispatch_table dispatch_table; 161 struct pvr_instance *instance; 162 VkResult result; 163 164 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); 165 166 if (!pAllocator) 167 pAllocator = vk_default_allocator(); 168 169 instance = vk_alloc(pAllocator, 170 sizeof(*instance), 171 8, 172 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 173 if (!instance) 174 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 175 176 vk_instance_dispatch_table_from_entrypoints(&dispatch_table, 177 &pvr_instance_entrypoints, 178 true); 179 180 vk_instance_dispatch_table_from_entrypoints(&dispatch_table, 181 &wsi_instance_entrypoints, 182 false); 183 184 result = vk_instance_init(&instance->vk, 185 &pvr_instance_extensions, 186 &dispatch_table, 187 pCreateInfo, 188 pAllocator); 189 if (result != VK_SUCCESS) { 190 vk_free(pAllocator, instance); 191 return vk_error(NULL, result); 192 } 193 194 pvr_process_debug_variable(); 195 196 instance->physical_devices_count = -1; 197 198 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); 199 200 *pInstance = pvr_instance_to_handle(instance); 201 202 return VK_SUCCESS; 203} 204 205static void pvr_physical_device_finish(struct pvr_physical_device *pdevice) 206{ 207 /* Be careful here. The device might not have been initialized. This can 208 * happen since initialization is done in vkEnumeratePhysicalDevices() but 209 * finish is done in vkDestroyInstance(). Make sure that you check for NULL 210 * before freeing or that the freeing functions accept NULL pointers. 211 */ 212 213 if (pdevice->compiler) 214 rogue_compiler_destroy(pdevice->compiler); 215 216 pvr_wsi_finish(pdevice); 217 218 free(pdevice->name); 219 220 if (pdevice->ws) 221 pvr_winsys_destroy(pdevice->ws); 222 223 if (pdevice->master_fd >= 0) { 224 vk_free(&pdevice->vk.instance->alloc, pdevice->master_path); 225 close(pdevice->master_fd); 226 } 227 228 if (pdevice->render_fd >= 0) { 229 vk_free(&pdevice->vk.instance->alloc, pdevice->render_path); 230 close(pdevice->render_fd); 231 } 232 vk_physical_device_finish(&pdevice->vk); 233} 234 235void pvr_DestroyInstance(VkInstance _instance, 236 const VkAllocationCallbacks *pAllocator) 237{ 238 PVR_FROM_HANDLE(pvr_instance, instance, _instance); 239 240 if (!instance) 241 return; 242 243 pvr_physical_device_finish(&instance->physical_device); 244 245 VG(VALGRIND_DESTROY_MEMPOOL(instance)); 246 247 vk_instance_finish(&instance->vk); 248 vk_free(&instance->vk.alloc, instance); 249} 250 251static VkResult 252pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice) 253{ 254 struct mesa_sha1 sha1_ctx; 255 unsigned build_id_len; 256 uint8_t sha1[20]; 257 uint64_t bvnc; 258 259 const struct build_id_note *note = 260 build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids); 261 if (!note) { 262 return vk_errorf(pdevice, 263 VK_ERROR_INITIALIZATION_FAILED, 264 "Failed to find build-id"); 265 } 266 267 build_id_len = build_id_length(note); 268 if (build_id_len < 20) { 269 return vk_errorf(pdevice, 270 VK_ERROR_INITIALIZATION_FAILED, 271 "Build-id too short. It needs to be a SHA"); 272 } 273 274 bvnc = pvr_get_packed_bvnc(&pdevice->dev_info); 275 276 _mesa_sha1_init(&sha1_ctx); 277 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len); 278 _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc)); 279 _mesa_sha1_final(&sha1_ctx, sha1); 280 memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE); 281 282 return VK_SUCCESS; 283} 284 285static uint64_t pvr_compute_heap_size(void) 286{ 287 /* Query the total ram from the system */ 288 uint64_t total_ram; 289 if (!os_get_total_physical_memory(&total_ram)) 290 return 0; 291 292 /* We don't want to burn too much ram with the GPU. If the user has 4GiB 293 * or less, we use at most half. If they have more than 4GiB, we use 3/4. 294 */ 295 uint64_t available_ram; 296 if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL) 297 available_ram = total_ram / 2U; 298 else 299 available_ram = total_ram * 3U / 4U; 300 301 return available_ram; 302} 303 304static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice, 305 struct pvr_instance *instance, 306 drmDevicePtr drm_render_device, 307 drmDevicePtr drm_primary_device) 308{ 309 const char *path = drm_render_device->nodes[DRM_NODE_RENDER]; 310 struct vk_device_extension_table supported_extensions; 311 struct vk_physical_device_dispatch_table dispatch_table; 312 const char *primary_path; 313 VkResult result; 314 int ret; 315 316 if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) { 317 return vk_errorf(instance, 318 VK_ERROR_INCOMPATIBLE_DRIVER, 319 "WARNING: powervr is not a conformant Vulkan " 320 "implementation. Pass " 321 "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know " 322 "what you're doing."); 323 } 324 325 pvr_physical_device_get_supported_extensions(pdevice, &supported_extensions); 326 327 vk_physical_device_dispatch_table_from_entrypoints( 328 &dispatch_table, 329 &pvr_physical_device_entrypoints, 330 true); 331 332 vk_physical_device_dispatch_table_from_entrypoints( 333 &dispatch_table, 334 &wsi_physical_device_entrypoints, 335 false); 336 337 result = vk_physical_device_init(&pdevice->vk, 338 &instance->vk, 339 &supported_extensions, 340 &dispatch_table); 341 if (result != VK_SUCCESS) 342 return result; 343 344 pdevice->instance = instance; 345 346 pdevice->render_fd = open(path, O_RDWR | O_CLOEXEC); 347 if (pdevice->render_fd < 0) { 348 result = vk_errorf(instance, 349 VK_ERROR_INCOMPATIBLE_DRIVER, 350 "Failed to open device %s", 351 path); 352 goto err_vk_physical_device_finish; 353 } 354 355 pdevice->render_path = vk_strdup(&pdevice->vk.instance->alloc, 356 path, 357 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 358 if (!pdevice->render_path) { 359 result = VK_ERROR_OUT_OF_HOST_MEMORY; 360 goto err_close_render_fd; 361 } 362 363 if (instance->vk.enabled_extensions.KHR_display) { 364 primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY]; 365 366 pdevice->master_fd = open(primary_path, O_RDWR | O_CLOEXEC); 367 } else { 368 pdevice->master_fd = -1; 369 } 370 371 if (pdevice->master_fd >= 0) { 372 pdevice->master_path = vk_strdup(&pdevice->vk.instance->alloc, 373 primary_path, 374 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 375 if (!pdevice->master_path) { 376 result = VK_ERROR_OUT_OF_HOST_MEMORY; 377 goto err_close_master_fd; 378 } 379 } else { 380 pdevice->master_path = NULL; 381 } 382 383 pdevice->ws = pvr_winsys_create(pdevice->master_fd, 384 pdevice->render_fd, 385 &pdevice->vk.instance->alloc); 386 if (!pdevice->ws) { 387 result = VK_ERROR_INITIALIZATION_FAILED; 388 goto err_vk_free_master_path; 389 } 390 391 pdevice->vk.supported_sync_types = pdevice->ws->sync_types; 392 393 ret = pdevice->ws->ops->device_info_init(pdevice->ws, 394 &pdevice->dev_info, 395 &pdevice->dev_runtime_info); 396 if (ret) { 397 result = VK_ERROR_INITIALIZATION_FAILED; 398 goto err_pvr_winsys_destroy; 399 } 400 401 result = pvr_physical_device_init_uuids(pdevice); 402 if (result != VK_SUCCESS) 403 goto err_pvr_winsys_destroy; 404 405 if (asprintf(&pdevice->name, 406 "Imagination PowerVR %s %s", 407 pdevice->dev_info.ident.series_name, 408 pdevice->dev_info.ident.public_name) < 0) { 409 result = vk_errorf(instance, 410 VK_ERROR_OUT_OF_HOST_MEMORY, 411 "Unable to allocate memory to store device name"); 412 goto err_pvr_winsys_destroy; 413 } 414 415 /* Setup available memory heaps and types */ 416 pdevice->memory.memoryHeapCount = 1; 417 pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size(); 418 pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; 419 420 pdevice->memory.memoryTypeCount = 1; 421 pdevice->memory.memoryTypes[0].propertyFlags = 422 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 423 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 424 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; 425 pdevice->memory.memoryTypes[0].heapIndex = 0; 426 427 result = pvr_wsi_init(pdevice); 428 if (result != VK_SUCCESS) { 429 vk_error(instance, result); 430 goto err_free_name; 431 } 432 433 pdevice->compiler = rogue_compiler_create(&pdevice->dev_info); 434 if (!pdevice->compiler) { 435 result = vk_errorf(instance, 436 VK_ERROR_INITIALIZATION_FAILED, 437 "Failed to initialize Rogue compiler"); 438 goto err_wsi_finish; 439 } 440 441 return VK_SUCCESS; 442 443err_wsi_finish: 444 pvr_wsi_finish(pdevice); 445 446err_free_name: 447 free(pdevice->name); 448 449err_pvr_winsys_destroy: 450 pvr_winsys_destroy(pdevice->ws); 451 452err_vk_free_master_path: 453 vk_free(&pdevice->vk.instance->alloc, pdevice->master_path); 454 455err_close_master_fd: 456 if (pdevice->master_fd >= 0) 457 close(pdevice->master_fd); 458 459 vk_free(&pdevice->vk.instance->alloc, pdevice->render_path); 460 461err_close_render_fd: 462 close(pdevice->render_fd); 463 464err_vk_physical_device_finish: 465 vk_physical_device_finish(&pdevice->vk); 466 467 return result; 468} 469 470static bool pvr_drm_device_is_supported(drmDevicePtr drm_dev, int node_type) 471{ 472 char **compat = drm_dev->deviceinfo.platform->compatible; 473 474 if (!(drm_dev->available_nodes & BITFIELD_BIT(node_type))) { 475 assert(node_type == DRM_NODE_RENDER || node_type == DRM_NODE_PRIMARY); 476 return false; 477 } 478 479 if (node_type == DRM_NODE_RENDER) { 480 while (*compat) { 481 for (size_t i = 0U; i < ARRAY_SIZE(pvr_render_devices); i++) { 482 const char *const name = pvr_render_devices[i].name; 483 const size_t len = pvr_render_devices[i].len; 484 485 if (strncmp(*compat, name, len) == 0) 486 return true; 487 } 488 489 compat++; 490 } 491 492 return false; 493 } else if (node_type == DRM_NODE_PRIMARY) { 494 while (*compat) { 495 for (size_t i = 0U; i < ARRAY_SIZE(pvr_display_devices); i++) { 496 const char *const name = pvr_display_devices[i].name; 497 const size_t len = pvr_display_devices[i].len; 498 499 if (strncmp(*compat, name, len) == 0) 500 return true; 501 } 502 503 compat++; 504 } 505 506 return false; 507 } 508 509 unreachable("Incorrect node_type."); 510} 511 512static VkResult pvr_enumerate_devices(struct pvr_instance *instance) 513{ 514 /* FIXME: It should be possible to query the number of devices via 515 * drmGetDevices2 by passing in NULL for the 'devices' parameter. However, 516 * this was broken by libdrm commit 517 * 8cb12a2528d795c45bba5f03b3486b4040fb0f45, so, until this is fixed in 518 * upstream, hard-code the maximum number of devices. 519 */ 520 drmDevicePtr drm_primary_device = NULL; 521 drmDevicePtr drm_render_device = NULL; 522 drmDevicePtr drm_devices[8]; 523 int max_drm_devices; 524 VkResult result; 525 526 instance->physical_devices_count = 0; 527 528 max_drm_devices = drmGetDevices2(0, drm_devices, ARRAY_SIZE(drm_devices)); 529 if (max_drm_devices < 1) 530 return VK_SUCCESS; 531 532 for (unsigned i = 0; i < (unsigned)max_drm_devices; i++) { 533 if (drm_devices[i]->bustype != DRM_BUS_PLATFORM) 534 continue; 535 536 if (pvr_drm_device_is_supported(drm_devices[i], DRM_NODE_RENDER)) { 537 drm_render_device = drm_devices[i]; 538 539 mesa_logd("Found compatible render device '%s'.", 540 drm_render_device->nodes[DRM_NODE_RENDER]); 541 } else if (pvr_drm_device_is_supported(drm_devices[i], 542 DRM_NODE_PRIMARY)) { 543 drm_primary_device = drm_devices[i]; 544 545 mesa_logd("Found compatible primary device '%s'.", 546 drm_primary_device->nodes[DRM_NODE_PRIMARY]); 547 } 548 } 549 550 if (drm_render_device && drm_primary_device) { 551 result = pvr_physical_device_init(&instance->physical_device, 552 instance, 553 drm_render_device, 554 drm_primary_device); 555 if (result == VK_SUCCESS) 556 instance->physical_devices_count = 1; 557 else if (result == VK_ERROR_INCOMPATIBLE_DRIVER) 558 result = VK_SUCCESS; 559 } else { 560 result = VK_SUCCESS; 561 } 562 563 drmFreeDevices(drm_devices, max_drm_devices); 564 565 return result; 566} 567 568VkResult pvr_EnumeratePhysicalDevices(VkInstance _instance, 569 uint32_t *pPhysicalDeviceCount, 570 VkPhysicalDevice *pPhysicalDevices) 571{ 572 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, 573 out, 574 pPhysicalDevices, 575 pPhysicalDeviceCount); 576 PVR_FROM_HANDLE(pvr_instance, instance, _instance); 577 VkResult result; 578 579 if (instance->physical_devices_count < 0) { 580 result = pvr_enumerate_devices(instance); 581 if (result != VK_SUCCESS) 582 return result; 583 } 584 585 if (instance->physical_devices_count == 0) 586 return VK_SUCCESS; 587 588 assert(instance->physical_devices_count == 1); 589 vk_outarray_append_typed (VkPhysicalDevice, &out, p) { 590 *p = pvr_physical_device_to_handle(&instance->physical_device); 591 } 592 593 return vk_outarray_status(&out); 594} 595 596void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, 597 VkPhysicalDeviceFeatures2 *pFeatures) 598{ 599 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); 600 601 pFeatures->features = (VkPhysicalDeviceFeatures){ 602 .robustBufferAccess = 603 PVR_HAS_FEATURE(&pdevice->dev_info, robust_buffer_access), 604 .fullDrawIndexUint32 = true, 605 .imageCubeArray = true, 606 .independentBlend = true, 607 .geometryShader = false, 608 .tessellationShader = false, 609 .sampleRateShading = true, 610 .dualSrcBlend = false, 611 .logicOp = true, 612 .multiDrawIndirect = true, 613 .drawIndirectFirstInstance = true, 614 .depthClamp = true, 615 .depthBiasClamp = true, 616 .fillModeNonSolid = false, 617 .depthBounds = false, 618 .wideLines = true, 619 .largePoints = true, 620 .alphaToOne = true, 621 .multiViewport = false, 622 .samplerAnisotropy = true, 623 .textureCompressionETC2 = true, 624 .textureCompressionASTC_LDR = PVR_HAS_FEATURE(&pdevice->dev_info, astc), 625 .textureCompressionBC = false, 626 .occlusionQueryPrecise = true, 627 .pipelineStatisticsQuery = false, 628 .vertexPipelineStoresAndAtomics = true, 629 .fragmentStoresAndAtomics = true, 630 .shaderTessellationAndGeometryPointSize = false, 631 .shaderImageGatherExtended = false, 632 .shaderStorageImageExtendedFormats = true, 633 .shaderStorageImageMultisample = false, 634 .shaderStorageImageReadWithoutFormat = true, 635 .shaderStorageImageWriteWithoutFormat = false, 636 .shaderUniformBufferArrayDynamicIndexing = true, 637 .shaderSampledImageArrayDynamicIndexing = true, 638 .shaderStorageBufferArrayDynamicIndexing = true, 639 .shaderStorageImageArrayDynamicIndexing = true, 640 .shaderClipDistance = true, 641 .shaderCullDistance = true, 642 .shaderFloat64 = false, 643 .shaderInt64 = true, 644 .shaderInt16 = true, 645 .shaderResourceResidency = false, 646 .shaderResourceMinLod = false, 647 .sparseBinding = false, 648 .sparseResidencyBuffer = false, 649 .sparseResidencyImage2D = false, 650 .sparseResidencyImage3D = false, 651 .sparseResidency2Samples = false, 652 .sparseResidency4Samples = false, 653 .sparseResidency8Samples = false, 654 .sparseResidency16Samples = false, 655 .sparseResidencyAliased = false, 656 .variableMultisampleRate = false, 657 .inheritedQueries = false, 658 }; 659 660 vk_foreach_struct (ext, pFeatures->pNext) { 661 pvr_debug_ignored_stype(ext->sType); 662 } 663} 664 665/* TODO: See if this function can be improved once fully implemented. */ 666uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( 667 const struct pvr_physical_device *pdevice, 668 uint32_t fs_common_size, 669 uint32_t min_tiles_in_flight) 670{ 671 const struct pvr_device_runtime_info *dev_runtime_info = 672 &pdevice->dev_runtime_info; 673 const struct pvr_device_info *dev_info = &pdevice->dev_info; 674 uint32_t max_tiles_in_flight; 675 uint32_t num_allocs; 676 677 if (PVR_HAS_FEATURE(dev_info, s8xe)) { 678 num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U); 679 } else { 680 uint32_t min_cluster_per_phantom = 0; 681 682 if (dev_runtime_info->num_phantoms > 1) { 683 pvr_finishme("Unimplemented path!!"); 684 } else { 685 min_cluster_per_phantom = 686 PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U); 687 } 688 689 if (dev_runtime_info->num_phantoms > 1) 690 pvr_finishme("Unimplemented path!!"); 691 692 if (dev_runtime_info->num_phantoms > 2) 693 pvr_finishme("Unimplemented path!!"); 694 695 if (dev_runtime_info->num_phantoms > 3) 696 pvr_finishme("Unimplemented path!!"); 697 698 if (min_cluster_per_phantom >= 4) 699 num_allocs = 1; 700 else if (min_cluster_per_phantom == 2) 701 num_allocs = 2; 702 else 703 num_allocs = 4; 704 } 705 706 max_tiles_in_flight = 707 PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U); 708 709 if (fs_common_size == UINT_MAX) { 710 const struct pvr_device_runtime_info *dev_runtime_info = 711 &pdevice->dev_runtime_info; 712 uint32_t max_common_size; 713 714 num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight); 715 716 if (!PVR_HAS_ERN(dev_info, 38748)) { 717 /* Hardware needs space for one extra shared allocation. */ 718 num_allocs += 1; 719 } 720 721 max_common_size = 722 dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs; 723 724 /* Double resource requirements to deal with fragmentation. */ 725 max_common_size /= num_allocs * 2; 726 max_common_size = 727 ROUND_DOWN_TO(max_common_size, 728 PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE)); 729 730 return max_common_size; 731 } else if (fs_common_size == 0) { 732 return max_tiles_in_flight; 733 } 734 735 pvr_finishme("Unimplemented path!!"); 736 737 return 0; 738} 739 740struct pvr_descriptor_limits { 741 uint32_t max_per_stage_resources; 742 uint32_t max_per_stage_samplers; 743 uint32_t max_per_stage_uniform_buffers; 744 uint32_t max_per_stage_storage_buffers; 745 uint32_t max_per_stage_sampled_images; 746 uint32_t max_per_stage_storage_images; 747 uint32_t max_per_stage_input_attachments; 748}; 749 750static const struct pvr_descriptor_limits * 751pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice) 752{ 753 enum pvr_descriptor_cs_level { 754 /* clang-format off */ 755 CS4096, /* 6XT and some XE cores with large CS. */ 756 CS2560, /* Mid range Rogue XE cores. */ 757 CS2048, /* Low end Rogue XE cores. */ 758 CS1536, /* Ultra-low-end 9XEP. */ 759 CS680, /* lower limits for older devices. */ 760 CS408, /* 7XE. */ 761 /* clang-format on */ 762 }; 763 764 static const struct pvr_descriptor_limits descriptor_limits[] = { 765 [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, }, 766 [CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, }, 767 [CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, }, 768 [CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, }, 769 [CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, }, 770 [CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, }, 771 }; 772 773 const uint32_t common_size = 774 pvr_calc_fscommon_size_and_tiles_in_flight(pdevice, -1, 1); 775 enum pvr_descriptor_cs_level cs_level; 776 777 if (common_size >= 2048) { 778 cs_level = CS2048; 779 } else if (common_size >= 1526) { 780 cs_level = CS1536; 781 } else if (common_size >= 680) { 782 cs_level = CS680; 783 } else if (common_size >= 408) { 784 cs_level = CS408; 785 } else { 786 mesa_loge("This core appears to have a very limited amount of shared " 787 "register space and may not meet the Vulkan spec limits."); 788 abort(); 789 } 790 791 return &descriptor_limits[cs_level]; 792} 793 794void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, 795 VkPhysicalDeviceProperties2 *pProperties) 796{ 797 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); 798 const struct pvr_descriptor_limits *descriptor_limits = 799 pvr_get_physical_device_descriptor_limits(pdevice); 800 801 /* Default value based on the minimum value found in all existing cores. */ 802 const uint32_t max_multisample = 803 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, max_multisample, 4); 804 805 /* Default value based on the minimum value found in all existing cores. */ 806 const uint32_t uvs_banks = 807 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_banks, 2); 808 809 /* Default value based on the minimum value found in all existing cores. */ 810 const uint32_t uvs_pba_entries = 811 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_pba_entries, 160); 812 813 /* Default value based on the minimum value found in all existing cores. */ 814 const uint32_t num_user_clip_planes = 815 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, num_user_clip_planes, 8); 816 817 const uint32_t sub_pixel_precision = 818 PVR_HAS_FEATURE(&pdevice->dev_info, simple_internal_parameter_format) 819 ? 4U 820 : 8U; 821 822 const uint32_t max_render_size = 823 rogue_get_render_size_max(&pdevice->dev_info); 824 825 const uint32_t max_sample_bits = ((max_multisample << 1) - 1); 826 827 const uint32_t max_user_vertex_components = 828 ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U; 829 830 /* The workgroup invocations are limited by the case where we have a compute 831 * barrier - each slot has a fixed number of invocations, the whole workgroup 832 * may need to span multiple slots. As each slot will WAIT at the barrier 833 * until the last invocation completes, all have to be schedulable at the 834 * same time. 835 * 836 * Typically all Rogue cores have 16 slots. Some of the smallest cores are 837 * reduced to 14. 838 * 839 * The compute barrier slot exhaustion scenario can be tested with: 840 * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence 841 * .atomicwrite*guard*comp 842 */ 843 844 /* Default value based on the minimum value found in all existing cores. */ 845 const uint32_t usc_slots = 846 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, usc_slots, 14); 847 848 /* Default value based on the minimum value found in all existing cores. */ 849 const uint32_t max_instances_per_pds_task = 850 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, 851 max_instances_per_pds_task, 852 32U); 853 854 const uint32_t max_compute_work_group_invocations = 855 (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U; 856 857 VkPhysicalDeviceLimits limits = { 858 .maxImageDimension1D = max_render_size, 859 .maxImageDimension2D = max_render_size, 860 .maxImageDimension3D = 2U * 1024U, 861 .maxImageDimensionCube = max_render_size, 862 .maxImageArrayLayers = 2U * 1024U, 863 .maxTexelBufferElements = 64U * 1024U, 864 .maxUniformBufferRange = 128U * 1024U * 1024U, 865 .maxStorageBufferRange = 128U * 1024U * 1024U, 866 .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE, 867 .maxMemoryAllocationCount = UINT32_MAX, 868 .maxSamplerAllocationCount = UINT32_MAX, 869 .bufferImageGranularity = 1U, 870 .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL, 871 872 /* Maximum number of descriptor sets that can be bound at the same time. 873 */ 874 .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS, 875 876 .maxPerStageResources = descriptor_limits->max_per_stage_resources, 877 .maxPerStageDescriptorSamplers = 878 descriptor_limits->max_per_stage_samplers, 879 .maxPerStageDescriptorUniformBuffers = 880 descriptor_limits->max_per_stage_uniform_buffers, 881 .maxPerStageDescriptorStorageBuffers = 882 descriptor_limits->max_per_stage_storage_buffers, 883 .maxPerStageDescriptorSampledImages = 884 descriptor_limits->max_per_stage_sampled_images, 885 .maxPerStageDescriptorStorageImages = 886 descriptor_limits->max_per_stage_storage_images, 887 .maxPerStageDescriptorInputAttachments = 888 descriptor_limits->max_per_stage_input_attachments, 889 890 .maxDescriptorSetSamplers = 256U, 891 .maxDescriptorSetUniformBuffers = 256U, 892 .maxDescriptorSetUniformBuffersDynamic = 8U, 893 .maxDescriptorSetStorageBuffers = 256U, 894 .maxDescriptorSetStorageBuffersDynamic = 8U, 895 .maxDescriptorSetSampledImages = 256U, 896 .maxDescriptorSetStorageImages = 256U, 897 .maxDescriptorSetInputAttachments = 256U, 898 899 /* Vertex Shader Limits */ 900 .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS, 901 .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS, 902 .maxVertexInputAttributeOffset = 0xFFFF, 903 .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U, 904 .maxVertexOutputComponents = max_user_vertex_components, 905 906 /* Tessellation Limits */ 907 .maxTessellationGenerationLevel = 0, 908 .maxTessellationPatchSize = 0, 909 .maxTessellationControlPerVertexInputComponents = 0, 910 .maxTessellationControlPerVertexOutputComponents = 0, 911 .maxTessellationControlPerPatchOutputComponents = 0, 912 .maxTessellationControlTotalOutputComponents = 0, 913 .maxTessellationEvaluationInputComponents = 0, 914 .maxTessellationEvaluationOutputComponents = 0, 915 916 /* Geometry Shader Limits */ 917 .maxGeometryShaderInvocations = 0, 918 .maxGeometryInputComponents = 0, 919 .maxGeometryOutputComponents = 0, 920 .maxGeometryOutputVertices = 0, 921 .maxGeometryTotalOutputComponents = 0, 922 923 /* Fragment Shader Limits */ 924 .maxFragmentInputComponents = max_user_vertex_components, 925 .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS, 926 .maxFragmentDualSrcAttachments = 0, 927 .maxFragmentCombinedOutputResources = 928 descriptor_limits->max_per_stage_storage_buffers + 929 descriptor_limits->max_per_stage_storage_images + 930 PVR_MAX_COLOR_ATTACHMENTS, 931 932 /* Compute Shader Limits */ 933 .maxComputeSharedMemorySize = 16U * 1024U, 934 .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U }, 935 .maxComputeWorkGroupInvocations = max_compute_work_group_invocations, 936 .maxComputeWorkGroupSize = { max_compute_work_group_invocations, 937 max_compute_work_group_invocations, 938 64U }, 939 940 /* Rasterization Limits */ 941 .subPixelPrecisionBits = sub_pixel_precision, 942 .subTexelPrecisionBits = 8U, 943 .mipmapPrecisionBits = 8U, 944 945 .maxDrawIndexedIndexValue = UINT32_MAX, 946 .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U, 947 .maxSamplerLodBias = 16.0f, 948 .maxSamplerAnisotropy = 1.0f, 949 .maxViewports = PVR_MAX_VIEWPORTS, 950 951 .maxViewportDimensions[0] = max_render_size, 952 .maxViewportDimensions[1] = max_render_size, 953 .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size), 954 .viewportBoundsRange[1] = 2U * max_render_size, 955 956 .viewportSubPixelBits = 0, 957 .minMemoryMapAlignment = 64U, 958 .minTexelBufferOffsetAlignment = 16U, 959 .minUniformBufferOffsetAlignment = 4U, 960 .minStorageBufferOffsetAlignment = 4U, 961 962 .minTexelOffset = -8, 963 .maxTexelOffset = 7U, 964 .minTexelGatherOffset = -8, 965 .maxTexelGatherOffset = 7, 966 .minInterpolationOffset = -0.5, 967 .maxInterpolationOffset = 0.5, 968 .subPixelInterpolationOffsetBits = 4U, 969 970 .maxFramebufferWidth = max_render_size, 971 .maxFramebufferHeight = max_render_size, 972 .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS, 973 974 .framebufferColorSampleCounts = max_sample_bits, 975 .framebufferDepthSampleCounts = max_sample_bits, 976 .framebufferStencilSampleCounts = max_sample_bits, 977 .framebufferNoAttachmentsSampleCounts = max_sample_bits, 978 .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS, 979 .sampledImageColorSampleCounts = max_sample_bits, 980 .sampledImageIntegerSampleCounts = max_sample_bits, 981 .sampledImageDepthSampleCounts = max_sample_bits, 982 .sampledImageStencilSampleCounts = max_sample_bits, 983 .storageImageSampleCounts = max_sample_bits, 984 .maxSampleMaskWords = 1U, 985 .timestampComputeAndGraphics = false, 986 .timestampPeriod = 0.0f, 987 .maxClipDistances = num_user_clip_planes, 988 .maxCullDistances = num_user_clip_planes, 989 .maxCombinedClipAndCullDistances = num_user_clip_planes, 990 .discreteQueuePriorities = 2U, 991 .pointSizeRange[0] = 1.0f, 992 .pointSizeRange[1] = 511.0f, 993 .pointSizeGranularity = 0.0625f, 994 .lineWidthRange[0] = 1.0f / 16.0f, 995 .lineWidthRange[1] = 16.0f, 996 .lineWidthGranularity = 1.0f / 16.0f, 997 .strictLines = false, 998 .standardSampleLocations = true, 999 .optimalBufferCopyOffsetAlignment = 4U, 1000 .optimalBufferCopyRowPitchAlignment = 4U, 1001 .nonCoherentAtomSize = 1U, 1002 }; 1003 1004 pProperties->properties = (VkPhysicalDeviceProperties){ 1005 .apiVersion = PVR_API_VERSION, 1006 .driverVersion = vk_get_driver_version(), 1007 .vendorID = VK_VENDOR_ID_IMAGINATION, 1008 .deviceID = pdevice->dev_info.ident.device_id, 1009 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, 1010 .limits = limits, 1011 .sparseProperties = { 0 }, 1012 }; 1013 1014 snprintf(pProperties->properties.deviceName, 1015 sizeof(pProperties->properties.deviceName), 1016 "%s", 1017 pdevice->name); 1018 1019 memcpy(pProperties->properties.pipelineCacheUUID, 1020 pdevice->pipeline_cache_uuid, 1021 VK_UUID_SIZE); 1022 1023 vk_foreach_struct (ext, pProperties->pNext) { 1024 pvr_debug_ignored_stype(ext->sType); 1025 } 1026} 1027 1028const static VkQueueFamilyProperties pvr_queue_family_properties = { 1029 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT | 1030 VK_QUEUE_TRANSFER_BIT, 1031 .queueCount = PVR_MAX_QUEUES, 1032 .timestampValidBits = 0, 1033 .minImageTransferGranularity = { 1, 1, 1 }, 1034}; 1035 1036void pvr_GetPhysicalDeviceQueueFamilyProperties( 1037 VkPhysicalDevice physicalDevice, 1038 uint32_t *pCount, 1039 VkQueueFamilyProperties *pQueueFamilyProperties) 1040{ 1041 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties, 1042 out, 1043 pQueueFamilyProperties, 1044 pCount); 1045 1046 vk_outarray_append_typed (VkQueueFamilyProperties, &out, p) { 1047 *p = pvr_queue_family_properties; 1048 } 1049} 1050 1051void pvr_GetPhysicalDeviceQueueFamilyProperties2( 1052 VkPhysicalDevice physicalDevice, 1053 uint32_t *pQueueFamilyPropertyCount, 1054 VkQueueFamilyProperties2 *pQueueFamilyProperties) 1055{ 1056 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, 1057 out, 1058 pQueueFamilyProperties, 1059 pQueueFamilyPropertyCount); 1060 1061 vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) { 1062 p->queueFamilyProperties = pvr_queue_family_properties; 1063 1064 vk_foreach_struct (ext, p->pNext) { 1065 pvr_debug_ignored_stype(ext->sType); 1066 } 1067 } 1068} 1069 1070void pvr_GetPhysicalDeviceMemoryProperties2( 1071 VkPhysicalDevice physicalDevice, 1072 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) 1073{ 1074 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); 1075 1076 pMemoryProperties->memoryProperties = pdevice->memory; 1077 1078 vk_foreach_struct (ext, pMemoryProperties->pNext) { 1079 pvr_debug_ignored_stype(ext->sType); 1080 } 1081} 1082 1083PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance, 1084 const char *pName) 1085{ 1086 PVR_FROM_HANDLE(pvr_instance, instance, _instance); 1087 return vk_instance_get_proc_addr(&instance->vk, 1088 &pvr_instance_entrypoints, 1089 pName); 1090} 1091 1092/* With version 1+ of the loader interface the ICD should expose 1093 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in 1094 * apps. 1095 */ 1096PUBLIC 1097VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 1098vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) 1099{ 1100 return pvr_GetInstanceProcAddr(instance, pName); 1101} 1102 1103/* With version 4+ of the loader interface the ICD should expose 1104 * vk_icdGetPhysicalDeviceProcAddr(). 1105 */ 1106PUBLIC 1107VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 1108vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName) 1109{ 1110 PVR_FROM_HANDLE(pvr_instance, instance, _instance); 1111 return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); 1112} 1113 1114static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device) 1115{ 1116 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 1117 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info); 1118 struct pvr_pds_compute_shader_program program = { 0U }; 1119 size_t staging_buffer_size; 1120 uint32_t *staging_buffer; 1121 uint32_t *data_buffer; 1122 uint32_t *code_buffer; 1123 VkResult result; 1124 1125 STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) == 1126 ARRAY_SIZE(program.work_group_input_regs)); 1127 STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) == 1128 ARRAY_SIZE(program.global_input_regs)); 1129 1130 /* Initialize PDS structure. */ 1131 for (uint32_t i = 0U; i < ARRAY_SIZE(program.local_input_regs); i++) { 1132 program.local_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED; 1133 program.work_group_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED; 1134 program.global_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED; 1135 } 1136 1137 program.barrier_coefficient = PVR_PDS_COMPUTE_INPUT_REG_UNUSED; 1138 1139 /* Fence kernel. */ 1140 program.fence = true; 1141 program.clear_pds_barrier = true; 1142 1143 /* Calculate how much space we'll need for the compute shader PDS program. 1144 */ 1145 pvr_pds_set_sizes_compute_shader(&program, dev_info); 1146 1147 /* FIXME: Fix the below inconsistency of code size being in bytes whereas 1148 * data size being in dwords. 1149 */ 1150 /* Code size is in bytes, data size in dwords. */ 1151 staging_buffer_size = 1152 program.data_size * sizeof(uint32_t) + program.code_size; 1153 1154 staging_buffer = vk_alloc(&device->vk.alloc, 1155 staging_buffer_size, 1156 8U, 1157 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1158 if (!staging_buffer) 1159 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1160 1161 data_buffer = staging_buffer; 1162 code_buffer = pvr_pds_generate_compute_shader_data_segment(&program, 1163 data_buffer, 1164 dev_info); 1165 pvr_pds_generate_compute_shader_code_segment(&program, 1166 code_buffer, 1167 dev_info); 1168 result = pvr_gpu_upload_pds(device, 1169 data_buffer, 1170 program.data_size, 1171 PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT), 1172 code_buffer, 1173 program.code_size / sizeof(uint32_t), 1174 PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT), 1175 cache_line_size, 1176 &device->pds_compute_fence_program); 1177 1178 vk_free(&device->vk.alloc, staging_buffer); 1179 1180 return result; 1181} 1182 1183static VkResult pvr_pds_idfwdf_programs_create_and_upload( 1184 struct pvr_device *device, 1185 pvr_dev_addr_t usc_addr, 1186 uint32_t shareds, 1187 uint32_t temps, 1188 pvr_dev_addr_t shareds_buffer_addr, 1189 struct pvr_pds_upload *const upload_out, 1190 struct pvr_pds_upload *const sw_compute_barrier_upload_out) 1191{ 1192 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 1193 struct pvr_pds_vertex_shader_sa_program program = { 1194 .kick_usc = true, 1195 .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info), 1196 }; 1197 size_t staging_buffer_size; 1198 uint32_t *staging_buffer; 1199 VkResult result; 1200 1201 /* We'll need to DMA the shareds into the USC's Common Store. */ 1202 program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control, 1203 program.dma_address, 1204 0, 1205 shareds, 1206 shareds_buffer_addr.addr, 1207 dev_info); 1208 1209 /* DMA temp regs. */ 1210 pvr_pds_setup_doutu(&program.usc_task_control, 1211 usc_addr.addr, 1212 temps, 1213 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), 1214 false); 1215 1216 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info); 1217 1218 staging_buffer_size = 1219 (program.code_size + program.data_size) * sizeof(*staging_buffer); 1220 1221 staging_buffer = vk_alloc(&device->vk.alloc, 1222 staging_buffer_size, 1223 8, 1224 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1225 if (!staging_buffer) 1226 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1227 1228 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */ 1229 pvr_pds_vertex_shader_sa(&program, 1230 staging_buffer, 1231 PDS_GENERATE_DATA_SEGMENT, 1232 dev_info); 1233 pvr_pds_vertex_shader_sa(&program, 1234 &staging_buffer[program.data_size], 1235 PDS_GENERATE_CODE_SEGMENT, 1236 dev_info); 1237 1238 /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program 1239 * is bigger so we handle it first (if needed) and realloc() for a smaller 1240 * size. 1241 */ 1242 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) { 1243 /* FIXME: Figure out the define for alignment of 16. */ 1244 result = pvr_gpu_upload_pds(device, 1245 &staging_buffer[0], 1246 program.data_size, 1247 16, 1248 &staging_buffer[program.data_size], 1249 program.code_size, 1250 16, 1251 16, 1252 sw_compute_barrier_upload_out); 1253 if (result != VK_SUCCESS) { 1254 vk_free(&device->vk.alloc, staging_buffer); 1255 return result; 1256 } 1257 1258 program.clear_pds_barrier = false; 1259 1260 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info); 1261 1262 staging_buffer_size = 1263 (program.code_size + program.data_size) * sizeof(*staging_buffer); 1264 1265 staging_buffer = vk_realloc(&device->vk.alloc, 1266 staging_buffer, 1267 staging_buffer_size, 1268 8, 1269 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1270 if (!staging_buffer) { 1271 pvr_bo_free(device, sw_compute_barrier_upload_out->pvr_bo); 1272 1273 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1274 } 1275 1276 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */ 1277 pvr_pds_vertex_shader_sa(&program, 1278 staging_buffer, 1279 PDS_GENERATE_DATA_SEGMENT, 1280 dev_info); 1281 pvr_pds_vertex_shader_sa(&program, 1282 &staging_buffer[program.data_size], 1283 PDS_GENERATE_CODE_SEGMENT, 1284 dev_info); 1285 } else { 1286 *sw_compute_barrier_upload_out = (struct pvr_pds_upload){ 1287 .pvr_bo = NULL, 1288 }; 1289 } 1290 1291 /* FIXME: Figure out the define for alignment of 16. */ 1292 result = pvr_gpu_upload_pds(device, 1293 &staging_buffer[0], 1294 program.data_size, 1295 16, 1296 &staging_buffer[program.data_size], 1297 program.code_size, 1298 16, 1299 16, 1300 upload_out); 1301 if (result != VK_SUCCESS) { 1302 vk_free(&device->vk.alloc, staging_buffer); 1303 pvr_bo_free(device, sw_compute_barrier_upload_out->pvr_bo); 1304 1305 return result; 1306 } 1307 1308 vk_free(&device->vk.alloc, staging_buffer); 1309 1310 return VK_SUCCESS; 1311} 1312 1313static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device) 1314{ 1315 uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS]; 1316 uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS]; 1317 const struct rogue_shader_binary *usc_program; 1318 struct pvr_texture_state_info tex_info; 1319 uint32_t *dword_ptr; 1320 uint32_t usc_shareds; 1321 uint32_t usc_temps; 1322 VkResult result; 1323 1324 pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info, 1325 &usc_program, 1326 &usc_shareds, 1327 &usc_temps); 1328 1329 device->idfwdf_state.usc_shareds = usc_shareds; 1330 1331 /* FIXME: Figure out the define for alignment of 16. */ 1332 result = pvr_gpu_upload_usc(device, 1333 usc_program->data, 1334 usc_program->size, 1335 16, 1336 &device->idfwdf_state.usc); 1337 if (result != VK_SUCCESS) 1338 return result; 1339 1340 /* TODO: Get the store buffer size from the compiler? */ 1341 /* TODO: How was the size derived here? */ 1342 result = pvr_bo_alloc(device, 1343 device->heaps.general_heap, 1344 4 * sizeof(float) * 4 * 2, 1345 4, 1346 0, 1347 &device->idfwdf_state.store_bo); 1348 if (result != VK_SUCCESS) 1349 goto err_free_usc_program; 1350 1351 result = pvr_bo_alloc(device, 1352 device->heaps.general_heap, 1353 usc_shareds * ROGUE_REG_SIZE_BYTES, 1354 ROGUE_REG_SIZE_BYTES, 1355 PVR_BO_ALLOC_FLAG_CPU_MAPPED, 1356 &device->idfwdf_state.shareds_bo); 1357 if (result != VK_SUCCESS) 1358 goto err_free_store_buffer; 1359 1360 /* Pack state words. */ 1361 1362 pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) { 1363 sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT); 1364 sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT); 1365 sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); 1366 sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); 1367 } 1368 1369 /* clang-format off */ 1370 pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {} 1371 /* clang-format on */ 1372 1373 STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS); 1374 1375 tex_info = (struct pvr_texture_state_info){ 1376 .format = VK_FORMAT_R32G32B32A32_SFLOAT, 1377 .mem_layout = PVR_MEMLAYOUT_LINEAR, 1378 .flags = PVR_TEXFLAGS_INDEX_LOOKUP, 1379 /* TODO: Is this correct? Is it 2D, 3D, or 2D_ARRAY? */ 1380 .type = VK_IMAGE_VIEW_TYPE_2D, 1381 .extent = { .width = 4, .height = 2, .depth = 0 }, 1382 .mip_levels = 1, 1383 .sample_count = 1, 1384 .stride = 4, 1385 .swizzle = { PIPE_SWIZZLE_X, 1386 PIPE_SWIZZLE_Y, 1387 PIPE_SWIZZLE_Z, 1388 PIPE_SWIZZLE_W }, 1389 .addr = device->idfwdf_state.store_bo->vma->dev_addr, 1390 }; 1391 1392 result = pvr_pack_tex_state(device, &tex_info, image_state); 1393 if (result != VK_SUCCESS) 1394 goto err_free_shareds_buffer; 1395 1396 /* Fill the shareds buffer. */ 1397 1398 dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map; 1399 1400#define HIGH_32(val) ((uint32_t)((val) >> 32U)) 1401#define LOW_32(val) ((uint32_t)(val)) 1402 1403 /* TODO: Should we use compiler info to setup the shareds data instead of 1404 * assuming there's always 12 and this is how they should be setup? 1405 */ 1406 1407 dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr); 1408 dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr); 1409 1410 /* Pad the shareds as the texture/sample state words are 128 bit aligned. */ 1411 dword_ptr[2] = 0U; 1412 dword_ptr[3] = 0U; 1413 1414 dword_ptr[4] = LOW_32(image_state[0]); 1415 dword_ptr[5] = HIGH_32(image_state[0]); 1416 dword_ptr[6] = LOW_32(image_state[1]); 1417 dword_ptr[7] = HIGH_32(image_state[1]); 1418 1419 dword_ptr[8] = LOW_32(sampler_state[0]); 1420 dword_ptr[9] = HIGH_32(sampler_state[0]); 1421 dword_ptr[10] = LOW_32(sampler_state[1]); 1422 dword_ptr[11] = HIGH_32(sampler_state[1]); 1423 assert(11 + 1 == usc_shareds); 1424 1425#undef HIGH_32 1426#undef LOW_32 1427 1428 pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo); 1429 dword_ptr = NULL; 1430 1431 /* Generate and upload PDS programs. */ 1432 result = pvr_pds_idfwdf_programs_create_and_upload( 1433 device, 1434 device->idfwdf_state.usc->vma->dev_addr, 1435 usc_shareds, 1436 usc_temps, 1437 device->idfwdf_state.shareds_bo->vma->dev_addr, 1438 &device->idfwdf_state.pds, 1439 &device->idfwdf_state.sw_compute_barrier_pds); 1440 if (result != VK_SUCCESS) 1441 goto err_free_shareds_buffer; 1442 1443 return VK_SUCCESS; 1444 1445err_free_shareds_buffer: 1446 pvr_bo_free(device, device->idfwdf_state.shareds_bo); 1447 1448err_free_store_buffer: 1449 pvr_bo_free(device, device->idfwdf_state.store_bo); 1450 1451err_free_usc_program: 1452 pvr_bo_free(device, device->idfwdf_state.usc); 1453 1454 return result; 1455} 1456 1457static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device) 1458{ 1459 pvr_bo_free(device, device->idfwdf_state.pds.pvr_bo); 1460 pvr_bo_free(device, device->idfwdf_state.sw_compute_barrier_pds.pvr_bo); 1461 pvr_bo_free(device, device->idfwdf_state.shareds_bo); 1462 pvr_bo_free(device, device->idfwdf_state.store_bo); 1463 pvr_bo_free(device, device->idfwdf_state.usc); 1464} 1465 1466/* FIXME: We should be calculating the size when we upload the code in 1467 * pvr_srv_setup_static_pixel_event_program(). 1468 */ 1469static void pvr_device_get_pixel_event_pds_program_data_size( 1470 const struct pvr_device_info *dev_info, 1471 uint32_t *const data_size_in_dwords_out) 1472{ 1473 struct pvr_pds_event_program program = { 1474 /* No data to DMA, just a DOUTU needed. */ 1475 .num_emit_word_pairs = 0, 1476 }; 1477 1478 pvr_pds_set_sizes_pixel_event(&program, dev_info); 1479 1480 *data_size_in_dwords_out = program.data_size; 1481} 1482 1483static VkResult pvr_device_init_nop_program(struct pvr_device *device) 1484{ 1485 const uint32_t cache_line_size = 1486 rogue_get_slc_cache_line_size(&device->pdevice->dev_info); 1487 struct pvr_pds_kickusc_program program = { 0 }; 1488 uint32_t staging_buffer_size; 1489 uint32_t *staging_buffer; 1490 VkResult result; 1491 1492 result = pvr_gpu_upload_usc(device, 1493 pvr_nop_usc_code, 1494 sizeof(pvr_nop_usc_code), 1495 cache_line_size, 1496 &device->nop_program.usc); 1497 if (result != VK_SUCCESS) 1498 return result; 1499 1500 /* Setup a PDS program that kicks the static USC program. */ 1501 pvr_pds_setup_doutu(&program.usc_task_control, 1502 device->nop_program.usc->vma->dev_addr.addr, 1503 0U, 1504 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE), 1505 false); 1506 1507 pvr_pds_set_sizes_pixel_shader(&program); 1508 1509 staging_buffer_size = 1510 (program.code_size + program.data_size) * sizeof(*staging_buffer); 1511 1512 staging_buffer = vk_alloc(&device->vk.alloc, 1513 staging_buffer_size, 1514 8U, 1515 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1516 if (!staging_buffer) { 1517 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1518 goto err_free_nop_usc_bo; 1519 } 1520 1521 pvr_pds_generate_pixel_shader_program(&program, staging_buffer); 1522 1523 /* FIXME: Figure out the define for alignment of 16. */ 1524 result = pvr_gpu_upload_pds(device, 1525 staging_buffer, 1526 program.data_size, 1527 16U, 1528 &staging_buffer[program.data_size], 1529 program.code_size, 1530 16U, 1531 16U, 1532 &device->nop_program.pds); 1533 if (result != VK_SUCCESS) 1534 goto err_free_staging_buffer; 1535 1536 vk_free(&device->vk.alloc, staging_buffer); 1537 1538 return VK_SUCCESS; 1539 1540err_free_staging_buffer: 1541 vk_free(&device->vk.alloc, staging_buffer); 1542 1543err_free_nop_usc_bo: 1544 pvr_bo_free(device, device->nop_program.usc); 1545 1546 return result; 1547} 1548 1549static void pvr_device_init_default_sampler_state(struct pvr_device *device) 1550{ 1551 pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) { 1552 sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); 1553 sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); 1554 sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); 1555 sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT); 1556 sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT); 1557 sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT); 1558 sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED); 1559 sampler.non_normalized_coords = true; 1560 } 1561} 1562 1563VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice, 1564 const VkDeviceCreateInfo *pCreateInfo, 1565 const VkAllocationCallbacks *pAllocator, 1566 VkDevice *pDevice) 1567{ 1568 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice); 1569 struct pvr_instance *instance = pdevice->instance; 1570 struct vk_device_dispatch_table dispatch_table; 1571 struct pvr_device *device; 1572 VkResult result; 1573 1574 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); 1575 1576 device = vk_alloc2(&pdevice->vk.instance->alloc, 1577 pAllocator, 1578 sizeof(*device), 1579 8, 1580 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1581 if (!device) 1582 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1583 1584 vk_device_dispatch_table_from_entrypoints(&dispatch_table, 1585 &pvr_device_entrypoints, 1586 true); 1587 1588 vk_device_dispatch_table_from_entrypoints(&dispatch_table, 1589 &wsi_device_entrypoints, 1590 false); 1591 1592 result = vk_device_init(&device->vk, 1593 &pdevice->vk, 1594 &dispatch_table, 1595 pCreateInfo, 1596 pAllocator); 1597 if (result != VK_SUCCESS) 1598 goto err_free_device; 1599 1600 device->render_fd = open(pdevice->render_path, O_RDWR | O_CLOEXEC); 1601 if (device->render_fd < 0) { 1602 result = vk_errorf(instance, 1603 VK_ERROR_INITIALIZATION_FAILED, 1604 "Failed to open device %s", 1605 pdevice->render_path); 1606 goto err_vk_device_finish; 1607 } 1608 1609 if (pdevice->master_path) 1610 device->master_fd = open(pdevice->master_path, O_RDWR | O_CLOEXEC); 1611 else 1612 device->master_fd = -1; 1613 1614 vk_device_set_drm_fd(&device->vk, device->render_fd); 1615 1616 device->instance = instance; 1617 device->pdevice = pdevice; 1618 1619 device->ws = pvr_winsys_create(device->master_fd, 1620 device->render_fd, 1621 &device->vk.alloc); 1622 if (!device->ws) { 1623 result = VK_ERROR_INITIALIZATION_FAILED; 1624 goto err_close_master_fd; 1625 } 1626 1627 device->ws->ops->get_heaps_info(device->ws, &device->heaps); 1628 1629 result = pvr_free_list_create(device, 1630 PVR_GLOBAL_FREE_LIST_INITIAL_SIZE, 1631 PVR_GLOBAL_FREE_LIST_MAX_SIZE, 1632 PVR_GLOBAL_FREE_LIST_GROW_SIZE, 1633 PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD, 1634 NULL /* parent_free_list */, 1635 &device->global_free_list); 1636 if (result != VK_SUCCESS) 1637 goto err_pvr_winsys_destroy; 1638 1639 result = pvr_device_init_nop_program(device); 1640 if (result != VK_SUCCESS) 1641 goto err_pvr_free_list_destroy; 1642 1643 result = pvr_device_init_compute_fence_program(device); 1644 if (result != VK_SUCCESS) 1645 goto err_pvr_free_nop_program; 1646 1647 result = pvr_device_init_compute_idfwdf_state(device); 1648 if (result != VK_SUCCESS) 1649 goto err_pvr_free_compute_fence; 1650 1651 result = pvr_queues_create(device, pCreateInfo); 1652 if (result != VK_SUCCESS) 1653 goto err_pvr_finish_compute_idfwdf; 1654 1655 pvr_device_init_default_sampler_state(device); 1656 1657 if (pCreateInfo->pEnabledFeatures) 1658 memcpy(&device->features, 1659 pCreateInfo->pEnabledFeatures, 1660 sizeof(device->features)); 1661 1662 /* FIXME: Move this to a later stage and possibly somewhere other than 1663 * pvr_device. The purpose of this is so that we don't have to get the size 1664 * on each kick. 1665 */ 1666 pvr_device_get_pixel_event_pds_program_data_size( 1667 &pdevice->dev_info, 1668 &device->pixel_event_data_size_in_dwords); 1669 1670 device->global_queue_job_count = 0; 1671 device->global_queue_present_count = 0; 1672 1673 *pDevice = pvr_device_to_handle(device); 1674 1675 return VK_SUCCESS; 1676 1677err_pvr_finish_compute_idfwdf: 1678 pvr_device_finish_compute_idfwdf_state(device); 1679 1680err_pvr_free_compute_fence: 1681 pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo); 1682 1683err_pvr_free_nop_program: 1684 pvr_bo_free(device, device->nop_program.pds.pvr_bo); 1685 pvr_bo_free(device, device->nop_program.usc); 1686 1687err_pvr_free_list_destroy: 1688 pvr_free_list_destroy(device->global_free_list); 1689 1690err_pvr_winsys_destroy: 1691 pvr_winsys_destroy(device->ws); 1692 1693err_close_master_fd: 1694 if (device->master_fd >= 0) 1695 close(device->master_fd); 1696 1697 close(device->render_fd); 1698 1699err_vk_device_finish: 1700 vk_device_finish(&device->vk); 1701 1702err_free_device: 1703 vk_free(&device->vk.alloc, device); 1704 1705 return result; 1706} 1707 1708void pvr_DestroyDevice(VkDevice _device, 1709 const VkAllocationCallbacks *pAllocator) 1710{ 1711 PVR_FROM_HANDLE(pvr_device, device, _device); 1712 1713 pvr_queues_destroy(device); 1714 pvr_device_finish_compute_idfwdf_state(device); 1715 pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo); 1716 pvr_bo_free(device, device->nop_program.pds.pvr_bo); 1717 pvr_bo_free(device, device->nop_program.usc); 1718 pvr_free_list_destroy(device->global_free_list); 1719 pvr_winsys_destroy(device->ws); 1720 close(device->render_fd); 1721 vk_device_finish(&device->vk); 1722 vk_free(&device->vk.alloc, device); 1723} 1724 1725VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, 1726 VkLayerProperties *pProperties) 1727{ 1728 if (!pProperties) { 1729 *pPropertyCount = 0; 1730 return VK_SUCCESS; 1731 } 1732 1733 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 1734} 1735 1736VkResult pvr_AllocateMemory(VkDevice _device, 1737 const VkMemoryAllocateInfo *pAllocateInfo, 1738 const VkAllocationCallbacks *pAllocator, 1739 VkDeviceMemory *pMem) 1740{ 1741 const VkImportMemoryFdInfoKHR *fd_info = NULL; 1742 PVR_FROM_HANDLE(pvr_device, device, _device); 1743 enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU; 1744 struct pvr_device_memory *mem; 1745 VkResult result; 1746 1747 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); 1748 assert(pAllocateInfo->allocationSize > 0); 1749 1750 mem = vk_object_alloc(&device->vk, 1751 pAllocator, 1752 sizeof(*mem), 1753 VK_OBJECT_TYPE_DEVICE_MEMORY); 1754 if (!mem) 1755 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1756 1757 vk_foreach_struct_const (ext, pAllocateInfo->pNext) { 1758 switch ((unsigned)ext->sType) { 1759 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA: 1760 type = PVR_WINSYS_BO_TYPE_DISPLAY; 1761 break; 1762 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR: 1763 fd_info = (void *)ext; 1764 break; 1765 default: 1766 pvr_debug_ignored_stype(ext->sType); 1767 break; 1768 } 1769 } 1770 1771 if (fd_info && fd_info->handleType) { 1772 VkDeviceSize aligned_alloc_size = 1773 ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size); 1774 1775 assert( 1776 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 1777 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 1778 1779 result = device->ws->ops->buffer_create_from_fd(device->ws, 1780 fd_info->fd, 1781 &mem->bo); 1782 if (result != VK_SUCCESS) 1783 goto err_vk_object_free_mem; 1784 1785 /* For security purposes, we reject importing the bo if it's smaller 1786 * than the requested allocation size. This prevents a malicious client 1787 * from passing a buffer to a trusted client, lying about the size, and 1788 * telling the trusted client to try and texture from an image that goes 1789 * out-of-bounds. This sort of thing could lead to GPU hangs or worse 1790 * in the trusted client. The trusted client can protect itself against 1791 * this sort of attack but only if it can trust the buffer size. 1792 */ 1793 if (aligned_alloc_size > mem->bo->size) { 1794 result = vk_errorf(device, 1795 VK_ERROR_INVALID_EXTERNAL_HANDLE, 1796 "Aligned requested size too large for the given fd " 1797 "%" PRIu64 "B > %" PRIu64 "B", 1798 pAllocateInfo->allocationSize, 1799 mem->bo->size); 1800 device->ws->ops->buffer_destroy(mem->bo); 1801 goto err_vk_object_free_mem; 1802 } 1803 1804 /* From the Vulkan spec: 1805 * 1806 * "Importing memory from a file descriptor transfers ownership of 1807 * the file descriptor from the application to the Vulkan 1808 * implementation. The application must not perform any operations on 1809 * the file descriptor after a successful import." 1810 * 1811 * If the import fails, we leave the file descriptor open. 1812 */ 1813 close(fd_info->fd); 1814 } else { 1815 /* Align physical allocations to the page size of the heap that will be 1816 * used when binding device memory (see pvr_bind_memory()) to ensure the 1817 * entire allocation can be mapped. 1818 */ 1819 const uint64_t alignment = device->heaps.general_heap->page_size; 1820 1821 /* FIXME: Need to determine the flags based on 1822 * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags. 1823 * 1824 * The alternative would be to store the flags alongside the memory 1825 * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so 1826 * that they can be looked up. 1827 */ 1828 result = device->ws->ops->buffer_create(device->ws, 1829 pAllocateInfo->allocationSize, 1830 alignment, 1831 type, 1832 PVR_WINSYS_BO_FLAG_CPU_ACCESS, 1833 &mem->bo); 1834 if (result != VK_SUCCESS) 1835 goto err_vk_object_free_mem; 1836 } 1837 1838 *pMem = pvr_device_memory_to_handle(mem); 1839 1840 return VK_SUCCESS; 1841 1842err_vk_object_free_mem: 1843 vk_object_free(&device->vk, pAllocator, mem); 1844 1845 return result; 1846} 1847 1848VkResult pvr_GetMemoryFdKHR(VkDevice _device, 1849 const VkMemoryGetFdInfoKHR *pGetFdInfo, 1850 int *pFd) 1851{ 1852 PVR_FROM_HANDLE(pvr_device, device, _device); 1853 PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory); 1854 1855 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); 1856 1857 assert( 1858 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 1859 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 1860 1861 return device->ws->ops->buffer_get_fd(mem->bo, pFd); 1862} 1863 1864VkResult 1865pvr_GetMemoryFdPropertiesKHR(VkDevice _device, 1866 VkExternalMemoryHandleTypeFlagBits handleType, 1867 int fd, 1868 VkMemoryFdPropertiesKHR *pMemoryFdProperties) 1869{ 1870 PVR_FROM_HANDLE(pvr_device, device, _device); 1871 1872 switch (handleType) { 1873 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: 1874 /* FIXME: This should only allow memory types having 1875 * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as 1876 * dma-buf should be imported using cacheable memory types, 1877 * given exporter's mmap will always map it as cacheable. 1878 * Ref: 1879 * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops 1880 */ 1881 pMemoryFdProperties->memoryTypeBits = 1882 (1 << device->pdevice->memory.memoryTypeCount) - 1; 1883 return VK_SUCCESS; 1884 default: 1885 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); 1886 } 1887} 1888 1889void pvr_FreeMemory(VkDevice _device, 1890 VkDeviceMemory _mem, 1891 const VkAllocationCallbacks *pAllocator) 1892{ 1893 PVR_FROM_HANDLE(pvr_device, device, _device); 1894 PVR_FROM_HANDLE(pvr_device_memory, mem, _mem); 1895 1896 if (!mem) 1897 return; 1898 1899 device->ws->ops->buffer_destroy(mem->bo); 1900 1901 vk_object_free(&device->vk, pAllocator, mem); 1902} 1903 1904VkResult pvr_MapMemory(VkDevice _device, 1905 VkDeviceMemory _memory, 1906 VkDeviceSize offset, 1907 VkDeviceSize size, 1908 VkMemoryMapFlags flags, 1909 void **ppData) 1910{ 1911 PVR_FROM_HANDLE(pvr_device, device, _device); 1912 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory); 1913 void *map; 1914 1915 if (!mem) { 1916 *ppData = NULL; 1917 return VK_SUCCESS; 1918 } 1919 1920 if (size == VK_WHOLE_SIZE) 1921 size = mem->bo->size - offset; 1922 1923 /* From the Vulkan spec version 1.0.32 docs for MapMemory: 1924 * 1925 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 1926 * assert(size != 0); 1927 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or 1928 * equal to the size of the memory minus offset 1929 */ 1930 1931 assert(size > 0); 1932 assert(offset + size <= mem->bo->size); 1933 1934 /* Check if already mapped */ 1935 if (mem->bo->map) { 1936 *ppData = mem->bo->map + offset; 1937 return VK_SUCCESS; 1938 } 1939 1940 /* Map it all at once */ 1941 map = device->ws->ops->buffer_map(mem->bo); 1942 if (!map) 1943 return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); 1944 1945 *ppData = map + offset; 1946 1947 return VK_SUCCESS; 1948} 1949 1950void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) 1951{ 1952 PVR_FROM_HANDLE(pvr_device, device, _device); 1953 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory); 1954 1955 if (!mem || !mem->bo->map) 1956 return; 1957 1958 device->ws->ops->buffer_unmap(mem->bo); 1959} 1960 1961VkResult pvr_FlushMappedMemoryRanges(VkDevice _device, 1962 uint32_t memoryRangeCount, 1963 const VkMappedMemoryRange *pMemoryRanges) 1964{ 1965 return VK_SUCCESS; 1966} 1967 1968VkResult 1969pvr_InvalidateMappedMemoryRanges(VkDevice _device, 1970 uint32_t memoryRangeCount, 1971 const VkMappedMemoryRange *pMemoryRanges) 1972{ 1973 return VK_SUCCESS; 1974} 1975 1976void pvr_GetImageSparseMemoryRequirements2( 1977 VkDevice device, 1978 const VkImageSparseMemoryRequirementsInfo2 *pInfo, 1979 uint32_t *pSparseMemoryRequirementCount, 1980 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) 1981{ 1982 *pSparseMemoryRequirementCount = 0; 1983} 1984 1985void pvr_GetDeviceMemoryCommitment(VkDevice device, 1986 VkDeviceMemory memory, 1987 VkDeviceSize *pCommittedMemoryInBytes) 1988{ 1989 *pCommittedMemoryInBytes = 0; 1990} 1991 1992VkResult pvr_bind_memory(struct pvr_device *device, 1993 struct pvr_device_memory *mem, 1994 VkDeviceSize offset, 1995 VkDeviceSize size, 1996 VkDeviceSize alignment, 1997 struct pvr_winsys_vma **const vma_out, 1998 pvr_dev_addr_t *const dev_addr_out) 1999{ 2000 VkDeviceSize virt_size = 2001 size + (offset & (device->heaps.general_heap->page_size - 1)); 2002 struct pvr_winsys_vma *vma; 2003 pvr_dev_addr_t dev_addr; 2004 2005 /* Valid usage: 2006 * 2007 * "memoryOffset must be an integer multiple of the alignment member of 2008 * the VkMemoryRequirements structure returned from a call to 2009 * vkGetBufferMemoryRequirements with buffer" 2010 * 2011 * "memoryOffset must be an integer multiple of the alignment member of 2012 * the VkMemoryRequirements structure returned from a call to 2013 * vkGetImageMemoryRequirements with image" 2014 */ 2015 assert(offset % alignment == 0); 2016 assert(offset < mem->bo->size); 2017 2018 vma = device->ws->ops->heap_alloc(device->heaps.general_heap, 2019 virt_size, 2020 alignment); 2021 if (!vma) 2022 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2023 2024 dev_addr = device->ws->ops->vma_map(vma, mem->bo, offset, size); 2025 if (!dev_addr.addr) { 2026 device->ws->ops->heap_free(vma); 2027 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2028 } 2029 2030 *dev_addr_out = dev_addr; 2031 *vma_out = vma; 2032 2033 return VK_SUCCESS; 2034} 2035 2036void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma) 2037{ 2038 device->ws->ops->vma_unmap(vma); 2039 device->ws->ops->heap_free(vma); 2040} 2041 2042VkResult pvr_BindBufferMemory2(VkDevice _device, 2043 uint32_t bindInfoCount, 2044 const VkBindBufferMemoryInfo *pBindInfos) 2045{ 2046 PVR_FROM_HANDLE(pvr_device, device, _device); 2047 uint32_t i; 2048 2049 for (i = 0; i < bindInfoCount; i++) { 2050 PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory); 2051 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer); 2052 2053 VkResult result = pvr_bind_memory(device, 2054 mem, 2055 pBindInfos[i].memoryOffset, 2056 buffer->vk.size, 2057 buffer->alignment, 2058 &buffer->vma, 2059 &buffer->dev_addr); 2060 if (result != VK_SUCCESS) { 2061 while (i--) { 2062 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer); 2063 pvr_unbind_memory(device, buffer->vma); 2064 } 2065 2066 return result; 2067 } 2068 } 2069 2070 return VK_SUCCESS; 2071} 2072 2073VkResult pvr_QueueBindSparse(VkQueue _queue, 2074 uint32_t bindInfoCount, 2075 const VkBindSparseInfo *pBindInfo, 2076 VkFence fence) 2077{ 2078 return VK_SUCCESS; 2079} 2080 2081/* Event functions. */ 2082 2083VkResult pvr_CreateEvent(VkDevice _device, 2084 const VkEventCreateInfo *pCreateInfo, 2085 const VkAllocationCallbacks *pAllocator, 2086 VkEvent *pEvent) 2087{ 2088 assert(!"Unimplemented"); 2089 return VK_SUCCESS; 2090} 2091 2092void pvr_DestroyEvent(VkDevice _device, 2093 VkEvent _event, 2094 const VkAllocationCallbacks *pAllocator) 2095{ 2096 assert(!"Unimplemented"); 2097} 2098 2099VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event) 2100{ 2101 assert(!"Unimplemented"); 2102 return VK_SUCCESS; 2103} 2104 2105VkResult pvr_SetEvent(VkDevice _device, VkEvent _event) 2106{ 2107 assert(!"Unimplemented"); 2108 return VK_SUCCESS; 2109} 2110 2111VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event) 2112{ 2113 assert(!"Unimplemented"); 2114 return VK_SUCCESS; 2115} 2116 2117/* Buffer functions. */ 2118 2119VkResult pvr_CreateBuffer(VkDevice _device, 2120 const VkBufferCreateInfo *pCreateInfo, 2121 const VkAllocationCallbacks *pAllocator, 2122 VkBuffer *pBuffer) 2123{ 2124 PVR_FROM_HANDLE(pvr_device, device, _device); 2125 const uint32_t alignment = 4096; 2126 struct pvr_buffer *buffer; 2127 2128 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); 2129 assert(pCreateInfo->usage != 0); 2130 2131 /* We check against (ULONG_MAX - alignment) to prevent overflow issues */ 2132 if (pCreateInfo->size >= ULONG_MAX - alignment) 2133 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 2134 2135 buffer = 2136 vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer)); 2137 if (!buffer) 2138 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2139 2140 buffer->alignment = alignment; 2141 2142 *pBuffer = pvr_buffer_to_handle(buffer); 2143 2144 return VK_SUCCESS; 2145} 2146 2147void pvr_DestroyBuffer(VkDevice _device, 2148 VkBuffer _buffer, 2149 const VkAllocationCallbacks *pAllocator) 2150{ 2151 PVR_FROM_HANDLE(pvr_device, device, _device); 2152 PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer); 2153 2154 if (!buffer) 2155 return; 2156 2157 if (buffer->vma) 2158 pvr_unbind_memory(device, buffer->vma); 2159 2160 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk); 2161} 2162 2163VkResult pvr_gpu_upload(struct pvr_device *device, 2164 struct pvr_winsys_heap *heap, 2165 const void *data, 2166 size_t size, 2167 uint64_t alignment, 2168 struct pvr_bo **const pvr_bo_out) 2169{ 2170 struct pvr_bo *pvr_bo = NULL; 2171 VkResult result; 2172 2173 assert(size > 0); 2174 2175 result = pvr_bo_alloc(device, 2176 heap, 2177 size, 2178 alignment, 2179 PVR_BO_ALLOC_FLAG_CPU_MAPPED, 2180 &pvr_bo); 2181 if (result != VK_SUCCESS) 2182 return result; 2183 2184 memcpy(pvr_bo->bo->map, data, size); 2185 pvr_bo_cpu_unmap(device, pvr_bo); 2186 2187 *pvr_bo_out = pvr_bo; 2188 2189 return VK_SUCCESS; 2190} 2191 2192VkResult pvr_gpu_upload_usc(struct pvr_device *device, 2193 const void *code, 2194 size_t code_size, 2195 uint64_t code_alignment, 2196 struct pvr_bo **const pvr_bo_out) 2197{ 2198 struct pvr_bo *pvr_bo = NULL; 2199 VkResult result; 2200 2201 assert(code_size > 0); 2202 2203 /* The USC will prefetch the next instruction, so over allocate by 1 2204 * instruction to prevent reading off the end of a page into a potentially 2205 * unallocated page. 2206 */ 2207 result = pvr_bo_alloc(device, 2208 device->heaps.usc_heap, 2209 code_size + ROGUE_MAX_INSTR_BYTES, 2210 code_alignment, 2211 PVR_BO_ALLOC_FLAG_CPU_MAPPED, 2212 &pvr_bo); 2213 if (result != VK_SUCCESS) 2214 return result; 2215 2216 memcpy(pvr_bo->bo->map, code, code_size); 2217 pvr_bo_cpu_unmap(device, pvr_bo); 2218 2219 *pvr_bo_out = pvr_bo; 2220 2221 return VK_SUCCESS; 2222} 2223 2224/** 2225 * \brief Upload PDS program data and code segments from host memory to device 2226 * memory. 2227 * 2228 * \param[in] device Logical device pointer. 2229 * \param[in] data Pointer to PDS data segment to upload. 2230 * \param[in] data_size_dwords Size of PDS data segment in dwords. 2231 * \param[in] data_alignment Required alignment of the PDS data segment in 2232 * bytes. Must be a power of two. 2233 * \param[in] code Pointer to PDS code segment to upload. 2234 * \param[in] code_size_dwords Size of PDS code segment in dwords. 2235 * \param[in] code_alignment Required alignment of the PDS code segment in 2236 * bytes. Must be a power of two. 2237 * \param[in] min_alignment Minimum alignment of the bo holding the PDS 2238 * program in bytes. 2239 * \param[out] pds_upload_out On success will be initialized based on the 2240 * uploaded PDS program. 2241 * \return VK_SUCCESS on success, or error code otherwise. 2242 */ 2243VkResult pvr_gpu_upload_pds(struct pvr_device *device, 2244 const uint32_t *data, 2245 uint32_t data_size_dwords, 2246 uint32_t data_alignment, 2247 const uint32_t *code, 2248 uint32_t code_size_dwords, 2249 uint32_t code_alignment, 2250 uint64_t min_alignment, 2251 struct pvr_pds_upload *const pds_upload_out) 2252{ 2253 /* All alignment and sizes below are in bytes. */ 2254 const size_t data_size = data_size_dwords * sizeof(*data); 2255 const size_t code_size = code_size_dwords * sizeof(*code); 2256 const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment); 2257 const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment); 2258 const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment); 2259 const uint64_t bo_alignment = MAX2(min_alignment, data_alignment); 2260 const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size) 2261 : data_aligned_size; 2262 const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_CPU_MAPPED | 2263 PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC; 2264 VkResult result; 2265 2266 assert(code || data); 2267 assert(!code || (code_size_dwords != 0 && code_alignment != 0)); 2268 assert(!data || (data_size_dwords != 0 && data_alignment != 0)); 2269 2270 result = pvr_bo_alloc(device, 2271 device->heaps.pds_heap, 2272 bo_size, 2273 bo_alignment, 2274 bo_flags, 2275 &pds_upload_out->pvr_bo); 2276 if (result != VK_SUCCESS) 2277 return result; 2278 2279 if (data) { 2280 memcpy(pds_upload_out->pvr_bo->bo->map, data, data_size); 2281 2282 pds_upload_out->data_offset = pds_upload_out->pvr_bo->vma->dev_addr.addr - 2283 device->heaps.pds_heap->base_addr.addr; 2284 2285 /* Store data size in dwords. */ 2286 assert(data_aligned_size % 4 == 0); 2287 pds_upload_out->data_size = data_aligned_size / 4; 2288 } else { 2289 pds_upload_out->data_offset = 0; 2290 pds_upload_out->data_size = 0; 2291 } 2292 2293 if (code) { 2294 memcpy((uint8_t *)pds_upload_out->pvr_bo->bo->map + code_offset, 2295 code, 2296 code_size); 2297 2298 pds_upload_out->code_offset = 2299 (pds_upload_out->pvr_bo->vma->dev_addr.addr + code_offset) - 2300 device->heaps.pds_heap->base_addr.addr; 2301 2302 /* Store code size in dwords. */ 2303 assert(code_aligned_size % 4 == 0); 2304 pds_upload_out->code_size = code_aligned_size / 4; 2305 } else { 2306 pds_upload_out->code_offset = 0; 2307 pds_upload_out->code_size = 0; 2308 } 2309 2310 pvr_bo_cpu_unmap(device, pds_upload_out->pvr_bo); 2311 2312 return VK_SUCCESS; 2313} 2314 2315static VkResult 2316pvr_framebuffer_create_ppp_state(struct pvr_device *device, 2317 struct pvr_framebuffer *framebuffer) 2318{ 2319 const uint32_t cache_line_size = 2320 rogue_get_slc_cache_line_size(&device->pdevice->dev_info); 2321 uint32_t ppp_state[3]; 2322 VkResult result; 2323 2324 pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) { 2325 header.pres_terminate = true; 2326 } 2327 2328 pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) { 2329 term0.clip_right = 2330 DIV_ROUND_UP( 2331 framebuffer->width, 2332 PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) - 2333 1; 2334 term0.clip_bottom = 2335 DIV_ROUND_UP( 2336 framebuffer->height, 2337 PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) - 2338 1; 2339 } 2340 2341 pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) { 2342 term1.render_target = 0; 2343 term1.clip_left = 0; 2344 } 2345 2346 result = pvr_gpu_upload(device, 2347 device->heaps.general_heap, 2348 ppp_state, 2349 sizeof(ppp_state), 2350 cache_line_size, 2351 &framebuffer->ppp_state_bo); 2352 if (result != VK_SUCCESS) 2353 return result; 2354 2355 /* Calculate the size of PPP state in dwords. */ 2356 framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t); 2357 2358 return VK_SUCCESS; 2359} 2360 2361static bool pvr_render_targets_init(struct pvr_render_target *render_targets, 2362 uint32_t render_targets_count) 2363{ 2364 uint32_t i; 2365 2366 for (i = 0; i < render_targets_count; i++) { 2367 if (pthread_mutex_init(&render_targets[i].mutex, NULL)) 2368 goto err_mutex_destroy; 2369 } 2370 2371 return true; 2372 2373err_mutex_destroy: 2374 while (i--) 2375 pthread_mutex_destroy(&render_targets[i].mutex); 2376 2377 return false; 2378} 2379 2380static void pvr_render_targets_fini(struct pvr_render_target *render_targets, 2381 uint32_t render_targets_count) 2382{ 2383 for (uint32_t i = 0; i < render_targets_count; i++) { 2384 if (render_targets[i].valid) { 2385 pvr_render_target_dataset_destroy(render_targets[i].rt_dataset); 2386 render_targets[i].valid = false; 2387 } 2388 2389 pthread_mutex_destroy(&render_targets[i].mutex); 2390 } 2391} 2392 2393VkResult pvr_CreateFramebuffer(VkDevice _device, 2394 const VkFramebufferCreateInfo *pCreateInfo, 2395 const VkAllocationCallbacks *pAllocator, 2396 VkFramebuffer *pFramebuffer) 2397{ 2398 PVR_FROM_HANDLE(pvr_device, device, _device); 2399 struct pvr_render_target *render_targets; 2400 struct pvr_framebuffer *framebuffer; 2401 struct pvr_image_view **attachments; 2402 uint32_t render_targets_count; 2403 VkResult result; 2404 2405 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); 2406 2407 render_targets_count = 2408 PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info); 2409 2410 VK_MULTIALLOC(ma); 2411 vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1); 2412 vk_multialloc_add(&ma, 2413 &attachments, 2414 __typeof__(*attachments), 2415 pCreateInfo->attachmentCount); 2416 vk_multialloc_add(&ma, 2417 &render_targets, 2418 __typeof__(*render_targets), 2419 render_targets_count); 2420 2421 if (!vk_multialloc_zalloc2(&ma, 2422 &device->vk.alloc, 2423 pAllocator, 2424 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) 2425 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2426 2427 vk_object_base_init(&device->vk, 2428 &framebuffer->base, 2429 VK_OBJECT_TYPE_FRAMEBUFFER); 2430 2431 framebuffer->width = pCreateInfo->width; 2432 framebuffer->height = pCreateInfo->height; 2433 framebuffer->layers = pCreateInfo->layers; 2434 2435 framebuffer->attachments = attachments; 2436 framebuffer->attachment_count = pCreateInfo->attachmentCount; 2437 for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { 2438 framebuffer->attachments[i] = 2439 pvr_image_view_from_handle(pCreateInfo->pAttachments[i]); 2440 } 2441 2442 result = pvr_framebuffer_create_ppp_state(device, framebuffer); 2443 if (result != VK_SUCCESS) 2444 goto err_free_framebuffer; 2445 2446 framebuffer->render_targets = render_targets; 2447 framebuffer->render_targets_count = render_targets_count; 2448 if (!pvr_render_targets_init(framebuffer->render_targets, 2449 render_targets_count)) { 2450 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2451 goto err_free_ppp_state_bo; 2452 } 2453 2454 *pFramebuffer = pvr_framebuffer_to_handle(framebuffer); 2455 2456 return VK_SUCCESS; 2457 2458err_free_ppp_state_bo: 2459 pvr_bo_free(device, framebuffer->ppp_state_bo); 2460 2461err_free_framebuffer: 2462 vk_object_base_finish(&framebuffer->base); 2463 vk_free2(&device->vk.alloc, pAllocator, framebuffer); 2464 2465 return result; 2466} 2467 2468void pvr_DestroyFramebuffer(VkDevice _device, 2469 VkFramebuffer _fb, 2470 const VkAllocationCallbacks *pAllocator) 2471{ 2472 PVR_FROM_HANDLE(pvr_device, device, _device); 2473 PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb); 2474 2475 if (!framebuffer) 2476 return; 2477 2478 pvr_render_targets_fini(framebuffer->render_targets, 2479 framebuffer->render_targets_count); 2480 pvr_bo_free(device, framebuffer->ppp_state_bo); 2481 vk_object_base_finish(&framebuffer->base); 2482 vk_free2(&device->vk.alloc, pAllocator, framebuffer); 2483} 2484 2485PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 2486vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) 2487{ 2488 /* For the full details on loader interface versioning, see 2489 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. 2490 * What follows is a condensed summary, to help you navigate the large and 2491 * confusing official doc. 2492 * 2493 * - Loader interface v0 is incompatible with later versions. We don't 2494 * support it. 2495 * 2496 * - In loader interface v1: 2497 * - The first ICD entrypoint called by the loader is 2498 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this 2499 * entrypoint. 2500 * - The ICD must statically expose no other Vulkan symbol unless it 2501 * is linked with -Bsymbolic. 2502 * - Each dispatchable Vulkan handle created by the ICD must be 2503 * a pointer to a struct whose first member is VK_LOADER_DATA. The 2504 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. 2505 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and 2506 * vkDestroySurfaceKHR(). The ICD must be capable of working with 2507 * such loader-managed surfaces. 2508 * 2509 * - Loader interface v2 differs from v1 in: 2510 * - The first ICD entrypoint called by the loader is 2511 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must 2512 * statically expose this entrypoint. 2513 * 2514 * - Loader interface v3 differs from v2 in: 2515 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), 2516 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, 2517 * because the loader no longer does so. 2518 * 2519 * - Loader interface v4 differs from v3 in: 2520 * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). 2521 */ 2522 *pSupportedVersion = MIN2(*pSupportedVersion, 4u); 2523 return VK_SUCCESS; 2524} 2525 2526static uint32_t 2527pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info, 2528 VkFilter filter) 2529{ 2530 switch (filter) { 2531 case VK_FILTER_NEAREST: 2532 return PVRX(TEXSTATE_FILTER_POINT); 2533 case VK_FILTER_LINEAR: 2534 return PVRX(TEXSTATE_FILTER_LINEAR); 2535 default: 2536 unreachable("Unknown filter type."); 2537 } 2538} 2539 2540static uint32_t 2541pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode) 2542{ 2543 switch (addr_mode) { 2544 case VK_SAMPLER_ADDRESS_MODE_REPEAT: 2545 return PVRX(TEXSTATE_ADDRMODE_REPEAT); 2546 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: 2547 return PVRX(TEXSTATE_ADDRMODE_FLIP); 2548 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: 2549 return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE); 2550 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: 2551 return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP); 2552 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: 2553 return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER); 2554 default: 2555 unreachable("Invalid sampler address mode."); 2556 } 2557} 2558 2559VkResult pvr_CreateSampler(VkDevice _device, 2560 const VkSamplerCreateInfo *pCreateInfo, 2561 const VkAllocationCallbacks *pAllocator, 2562 VkSampler *pSampler) 2563{ 2564 PVR_FROM_HANDLE(pvr_device, device, _device); 2565 struct pvr_sampler *sampler; 2566 float lod_rounding_bias; 2567 VkFilter min_filter; 2568 VkFilter mag_filter; 2569 float min_lod; 2570 float max_lod; 2571 2572 STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) == 2573 sizeof(((union pvr_sampler_descriptor *)NULL)->words)); 2574 2575 sampler = vk_object_alloc(&device->vk, 2576 pAllocator, 2577 sizeof(*sampler), 2578 VK_OBJECT_TYPE_SAMPLER); 2579 if (!sampler) 2580 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2581 2582 mag_filter = pCreateInfo->magFilter; 2583 min_filter = pCreateInfo->minFilter; 2584 2585 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) { 2586 /* The min/mag filters may need adjustment here, the GPU should decide 2587 * which of the two filters to use based on the clamped LOD value: LOD 2588 * <= 0 implies magnification, while LOD > 0 implies minification. 2589 * 2590 * As a workaround, we override magFilter with minFilter if we know that 2591 * the magnification filter will never be used due to clamping anyway 2592 * (i.e. minLod > 0). Conversely, we override minFilter with magFilter 2593 * if maxLod <= 0. 2594 */ 2595 if (pCreateInfo->minLod > 0.0f) { 2596 /* The clamped LOD will always be positive => always minify. */ 2597 mag_filter = pCreateInfo->minFilter; 2598 } 2599 2600 if (pCreateInfo->maxLod <= 0.0f) { 2601 /* The clamped LOD will always be negative or zero => always 2602 * magnify. 2603 */ 2604 min_filter = pCreateInfo->magFilter; 2605 } 2606 } 2607 2608 if (pCreateInfo->compareEnable) { 2609 sampler->descriptor.data.compare_op = 2610 (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp); 2611 } else { 2612 sampler->descriptor.data.compare_op = 2613 (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER); 2614 } 2615 2616 sampler->descriptor.data.word3 = 0; 2617 pvr_csb_pack (&sampler->descriptor.data.sampler_word, 2618 TEXSTATE_SAMPLER, 2619 word) { 2620 const struct pvr_device_info *dev_info = &device->pdevice->dev_info; 2621 const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) / 2622 (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS)); 2623 const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) - 2624 PVRX(TEXSTATE_DADJUST_ZERO_UINT))) / 2625 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS)); 2626 const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) - 2627 PVRX(TEXSTATE_DADJUST_ZERO_UINT))) / 2628 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS)); 2629 2630 word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter); 2631 word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter); 2632 2633 if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR) 2634 word.mipfilter = true; 2635 2636 word.addrmode_u = 2637 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU); 2638 word.addrmode_v = 2639 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV); 2640 word.addrmode_w = 2641 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW); 2642 2643 /* TODO: Figure out defines for these. */ 2644 if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP)) 2645 sampler->descriptor.data.word3 |= 0x40000000; 2646 2647 if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP)) 2648 sampler->descriptor.data.word3 |= 0x20000000; 2649 2650 /* The Vulkan 1.0.205 spec says: 2651 * 2652 * The absolute value of mipLodBias must be less than or equal to 2653 * VkPhysicalDeviceLimits::maxSamplerLodBias. 2654 */ 2655 word.dadjust = 2656 PVRX(TEXSTATE_DADJUST_ZERO_UINT) + 2657 util_signed_fixed( 2658 CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust), 2659 PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS)); 2660 2661 /* Anisotropy is not supported for now. */ 2662 word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED); 2663 2664 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) && 2665 pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) { 2666 /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be 2667 * selected by adding 0.5 and then truncating the input LOD value. 2668 * This hardware adds the 0.5 bias before clamping against 2669 * lodmin/lodmax, while Vulkan specifies the bias to be added after 2670 * clamping. We compensate for this difference by adding the 0.5 2671 * bias to the LOD bounds, too. 2672 */ 2673 lod_rounding_bias = 0.5f; 2674 } else { 2675 lod_rounding_bias = 0.0f; 2676 } 2677 2678 min_lod = pCreateInfo->minLod + lod_rounding_bias; 2679 word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max), 2680 PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS)); 2681 2682 max_lod = pCreateInfo->maxLod + lod_rounding_bias; 2683 word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max), 2684 PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS)); 2685 2686 word.bordercolor_index = pCreateInfo->borderColor; 2687 2688 if (pCreateInfo->unnormalizedCoordinates) 2689 word.non_normalized_coords = true; 2690 } 2691 2692 *pSampler = pvr_sampler_to_handle(sampler); 2693 2694 return VK_SUCCESS; 2695} 2696 2697void pvr_DestroySampler(VkDevice _device, 2698 VkSampler _sampler, 2699 const VkAllocationCallbacks *pAllocator) 2700{ 2701 PVR_FROM_HANDLE(pvr_device, device, _device); 2702 PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler); 2703 2704 if (!sampler) 2705 return; 2706 2707 vk_object_free(&device->vk, pAllocator, sampler); 2708} 2709 2710void pvr_GetBufferMemoryRequirements2( 2711 VkDevice _device, 2712 const VkBufferMemoryRequirementsInfo2 *pInfo, 2713 VkMemoryRequirements2 *pMemoryRequirements) 2714{ 2715 PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer); 2716 PVR_FROM_HANDLE(pvr_device, device, _device); 2717 2718 /* The Vulkan 1.0.166 spec says: 2719 * 2720 * memoryTypeBits is a bitmask and contains one bit set for every 2721 * supported memory type for the resource. Bit 'i' is set if and only 2722 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties 2723 * structure for the physical device is supported for the resource. 2724 * 2725 * All types are currently supported for buffers. 2726 */ 2727 pMemoryRequirements->memoryRequirements.memoryTypeBits = 2728 (1ul << device->pdevice->memory.memoryTypeCount) - 1; 2729 2730 pMemoryRequirements->memoryRequirements.alignment = buffer->alignment; 2731 pMemoryRequirements->memoryRequirements.size = 2732 ALIGN_POT(buffer->vk.size, buffer->alignment); 2733} 2734 2735void pvr_GetImageMemoryRequirements2(VkDevice _device, 2736 const VkImageMemoryRequirementsInfo2 *pInfo, 2737 VkMemoryRequirements2 *pMemoryRequirements) 2738{ 2739 PVR_FROM_HANDLE(pvr_device, device, _device); 2740 PVR_FROM_HANDLE(pvr_image, image, pInfo->image); 2741 2742 /* The Vulkan 1.0.166 spec says: 2743 * 2744 * memoryTypeBits is a bitmask and contains one bit set for every 2745 * supported memory type for the resource. Bit 'i' is set if and only 2746 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties 2747 * structure for the physical device is supported for the resource. 2748 * 2749 * All types are currently supported for images. 2750 */ 2751 const uint32_t memory_types = 2752 (1ul << device->pdevice->memory.memoryTypeCount) - 1; 2753 2754 /* TODO: The returned size is aligned here in case of arrays/CEM (as is done 2755 * in GetImageMemoryRequirements()), but this should be known at image 2756 * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in 2757 * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to 2758 * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually 2759 * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1? 2760 * 2761 * Note: Presumably the 4096 alignment requirement comes from the Vulkan 2762 * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up 2763 * render and compute jobs. 2764 */ 2765 pMemoryRequirements->memoryRequirements.alignment = image->alignment; 2766 pMemoryRequirements->memoryRequirements.size = 2767 ALIGN(image->size, image->alignment); 2768 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types; 2769} 2770