1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * SPDX-License-Identifier: MIT 5 * 6 * based in part on anv driver which is: 7 * Copyright © 2015 Intel Corporation 8 */ 9 10#include "tu_device.h" 11 12#include <fcntl.h> 13#include <poll.h> 14#include <sys/sysinfo.h> 15 16#include "git_sha1.h" 17#include "util/debug.h" 18#include "util/disk_cache.h" 19#include "util/driconf.h" 20#include "util/os_misc.h" 21#include "vk_sampler.h" 22#include "vk_util.h" 23 24/* for fd_get_driver/device_uuid() */ 25#include "freedreno/common/freedreno_uuid.h" 26 27#include "tu_clear_blit.h" 28#include "tu_cmd_buffer.h" 29#include "tu_cs.h" 30#include "tu_descriptor_set.h" 31#include "tu_dynamic_rendering.h" 32#include "tu_image.h" 33#include "tu_pass.h" 34#include "tu_query.h" 35#include "tu_tracepoints.h" 36#include "tu_wsi.h" 37 38#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \ 39 defined(VK_USE_PLATFORM_XCB_KHR) || \ 40 defined(VK_USE_PLATFORM_XLIB_KHR) || \ 41 defined(VK_USE_PLATFORM_DISPLAY_KHR) 42#define TU_HAS_SURFACE 1 43#else 44#define TU_HAS_SURFACE 0 45#endif 46 47 48static int 49tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid) 50{ 51 struct mesa_sha1 ctx; 52 unsigned char sha1[20]; 53 /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not 54 * initialized until after compiler creation so we have to add it to the 55 * shader hash instead, since the compiler is only created with the logical 56 * device. 57 */ 58 uint64_t driver_flags = device->instance->debug_flags & TU_DEBUG_NOMULTIPOS; 59 uint16_t family = fd_dev_gpu_id(&device->dev_id); 60 61 memset(uuid, 0, VK_UUID_SIZE); 62 _mesa_sha1_init(&ctx); 63 64 if (!disk_cache_get_function_identifier(tu_device_get_cache_uuid, &ctx)) 65 return -1; 66 67 _mesa_sha1_update(&ctx, &family, sizeof(family)); 68 _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags)); 69 _mesa_sha1_final(&ctx, sha1); 70 71 memcpy(uuid, sha1, VK_UUID_SIZE); 72 return 0; 73} 74 75#define TU_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION) 76 77VKAPI_ATTR VkResult VKAPI_CALL 78tu_EnumerateInstanceVersion(uint32_t *pApiVersion) 79{ 80 *pApiVersion = TU_API_VERSION; 81 return VK_SUCCESS; 82} 83 84static const struct vk_instance_extension_table tu_instance_extensions_supported = { 85 .KHR_device_group_creation = true, 86 .KHR_external_fence_capabilities = true, 87 .KHR_external_memory_capabilities = true, 88 .KHR_external_semaphore_capabilities = true, 89 .KHR_get_physical_device_properties2 = true, 90 .KHR_surface = TU_HAS_SURFACE, 91 .KHR_get_surface_capabilities2 = TU_HAS_SURFACE, 92 .EXT_debug_report = true, 93 .EXT_debug_utils = true, 94#ifdef VK_USE_PLATFORM_WAYLAND_KHR 95 .KHR_wayland_surface = true, 96#endif 97#ifdef VK_USE_PLATFORM_XCB_KHR 98 .KHR_xcb_surface = true, 99#endif 100#ifdef VK_USE_PLATFORM_XLIB_KHR 101 .KHR_xlib_surface = true, 102#endif 103#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT 104 .EXT_acquire_xlib_display = true, 105#endif 106#ifdef VK_USE_PLATFORM_DISPLAY_KHR 107 .KHR_display = true, 108 .KHR_get_display_properties2 = true, 109 .EXT_direct_mode_display = true, 110 .EXT_display_surface_counter = true, 111 .EXT_acquire_drm_display = true, 112#endif 113}; 114 115static void 116get_device_extensions(const struct tu_physical_device *device, 117 struct vk_device_extension_table *ext) 118{ 119 *ext = (struct vk_device_extension_table) { 120 .KHR_16bit_storage = device->info->a6xx.storage_16bit, 121 .KHR_bind_memory2 = true, 122 .KHR_copy_commands2 = true, 123 .KHR_create_renderpass2 = true, 124 .KHR_dedicated_allocation = true, 125 .KHR_depth_stencil_resolve = true, 126 .KHR_descriptor_update_template = true, 127 .KHR_device_group = true, 128 .KHR_draw_indirect_count = true, 129 .KHR_external_fence = true, 130 .KHR_external_fence_fd = true, 131 .KHR_external_memory = true, 132 .KHR_external_memory_fd = true, 133 .KHR_external_semaphore = true, 134 .KHR_external_semaphore_fd = true, 135 .KHR_format_feature_flags2 = true, 136 .KHR_get_memory_requirements2 = true, 137 .KHR_imageless_framebuffer = true, 138 .KHR_incremental_present = TU_HAS_SURFACE, 139 .KHR_image_format_list = true, 140 .KHR_maintenance1 = true, 141 .KHR_maintenance2 = true, 142 .KHR_maintenance3 = true, 143 .KHR_maintenance4 = true, 144 .KHR_multiview = true, 145 .KHR_performance_query = device->instance->debug_flags & TU_DEBUG_PERFC, 146 .KHR_pipeline_executable_properties = true, 147 .KHR_push_descriptor = true, 148 .KHR_relaxed_block_layout = true, 149 .KHR_sampler_mirror_clamp_to_edge = true, 150 .KHR_sampler_ycbcr_conversion = true, 151 .KHR_shader_draw_parameters = true, 152 .KHR_shader_float_controls = true, 153 .KHR_shader_float16_int8 = true, 154 .KHR_shader_subgroup_extended_types = true, 155 .KHR_shader_terminate_invocation = true, 156 .KHR_spirv_1_4 = true, 157 .KHR_storage_buffer_storage_class = true, 158 .KHR_swapchain = TU_HAS_SURFACE, 159 .KHR_swapchain_mutable_format = TU_HAS_SURFACE, 160 .KHR_uniform_buffer_standard_layout = true, 161 .KHR_variable_pointers = true, 162 .KHR_vulkan_memory_model = true, 163 .KHR_driver_properties = true, 164 .KHR_separate_depth_stencil_layouts = true, 165 .KHR_buffer_device_address = true, 166 .KHR_shader_integer_dot_product = true, 167 .KHR_zero_initialize_workgroup_memory = true, 168 .KHR_shader_non_semantic_info = true, 169 .KHR_synchronization2 = true, 170 .KHR_dynamic_rendering = true, 171#ifndef TU_USE_KGSL 172 .KHR_timeline_semaphore = true, 173#endif 174#ifdef VK_USE_PLATFORM_DISPLAY_KHR 175 .EXT_display_control = true, 176#endif 177 .EXT_external_memory_dma_buf = true, 178 .EXT_image_drm_format_modifier = true, 179 .EXT_sample_locations = device->info->a6xx.has_sample_locations, 180 .EXT_sampler_filter_minmax = true, 181 .EXT_transform_feedback = true, 182 .EXT_4444_formats = true, 183 .EXT_border_color_swizzle = true, 184 .EXT_conditional_rendering = true, 185 .EXT_custom_border_color = true, 186 .EXT_depth_clip_control = true, 187 .EXT_depth_clip_enable = true, 188 .EXT_descriptor_indexing = true, 189 .EXT_extended_dynamic_state = true, 190 .EXT_extended_dynamic_state2 = true, 191 .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic, 192 .EXT_host_query_reset = true, 193 .EXT_index_type_uint8 = true, 194 .EXT_memory_budget = true, 195 .EXT_primitive_topology_list_restart = true, 196 .EXT_private_data = true, 197 .EXT_queue_family_foreign = true, 198 .EXT_robustness2 = true, 199 .EXT_scalar_block_layout = true, 200 .EXT_separate_stencil_usage = true, 201 .EXT_shader_demote_to_helper_invocation = true, 202 .EXT_shader_stencil_export = true, 203 .EXT_shader_viewport_index_layer = true, 204 .EXT_shader_module_identifier = true, 205 .EXT_texel_buffer_alignment = true, 206 .EXT_vertex_attribute_divisor = true, 207 .EXT_provoking_vertex = true, 208 .EXT_line_rasterization = true, 209 .EXT_subgroup_size_control = true, 210 .EXT_image_robustness = true, 211 .EXT_primitives_generated_query = true, 212 .EXT_image_view_min_lod = true, 213 .EXT_pipeline_creation_feedback = true, 214 .EXT_pipeline_creation_cache_control = true, 215#ifndef TU_USE_KGSL 216 .EXT_physical_device_drm = true, 217#endif 218 /* For Graphics Flight Recorder (GFR) */ 219 .AMD_buffer_marker = true, 220 .ARM_rasterization_order_attachment_access = true, 221#ifdef ANDROID 222 .ANDROID_native_buffer = true, 223#endif 224 .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic, 225 .VALVE_mutable_descriptor_type = true, 226 .EXT_image_2d_view_of_3d = true, 227 .EXT_color_write_enable = true, 228 .EXT_load_store_op_none = true, 229 }; 230} 231 232static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = { 233 &tu_shaders_ops, 234 NULL, 235}; 236 237VkResult 238tu_physical_device_init(struct tu_physical_device *device, 239 struct tu_instance *instance) 240{ 241 VkResult result = VK_SUCCESS; 242 243 const char *fd_name = fd_dev_name(&device->dev_id); 244 if (strncmp(fd_name, "FD", 2) == 0) { 245 device->name = vk_asprintf(&instance->vk.alloc, 246 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE, 247 "Turnip Adreno (TM) %s", &fd_name[2]); 248 } else { 249 device->name = vk_strdup(&instance->vk.alloc, fd_name, 250 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 251 252 } 253 if (!device->name) { 254 return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY, 255 "device name alloc fail"); 256 } 257 258 const struct fd_dev_info *info = fd_dev_info(&device->dev_id); 259 if (!info) { 260 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 261 "device %s is unsupported", device->name); 262 goto fail_free_name; 263 } 264 switch (fd_dev_gen(&device->dev_id)) { 265 case 6: 266 device->info = info; 267 device->ccu_offset_bypass = device->info->num_ccu * A6XX_CCU_DEPTH_SIZE; 268 device->ccu_offset_gmem = (device->gmem_size - 269 device->info->num_ccu * A6XX_CCU_GMEM_COLOR_SIZE); 270 break; 271 default: 272 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 273 "device %s is unsupported", device->name); 274 goto fail_free_name; 275 } 276 if (tu_device_get_cache_uuid(device, device->cache_uuid)) { 277 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 278 "cannot generate UUID"); 279 goto fail_free_name; 280 } 281 282 fd_get_driver_uuid(device->driver_uuid); 283 fd_get_device_uuid(device->device_uuid, &device->dev_id); 284 285 struct vk_device_extension_table supported_extensions; 286 get_device_extensions(device, &supported_extensions); 287 288 struct vk_physical_device_dispatch_table dispatch_table; 289 vk_physical_device_dispatch_table_from_entrypoints( 290 &dispatch_table, &tu_physical_device_entrypoints, true); 291 vk_physical_device_dispatch_table_from_entrypoints( 292 &dispatch_table, &wsi_physical_device_entrypoints, false); 293 294 result = vk_physical_device_init(&device->vk, &instance->vk, 295 &supported_extensions, 296 &dispatch_table); 297 if (result != VK_SUCCESS) 298 goto fail_free_name; 299 300 device->vk.supported_sync_types = device->sync_types; 301 302#if TU_HAS_SURFACE 303 result = tu_wsi_init(device); 304 if (result != VK_SUCCESS) { 305 vk_startup_errorf(instance, result, "WSI init failure"); 306 vk_physical_device_finish(&device->vk); 307 goto fail_free_name; 308 } 309#endif 310 311 /* The gpu id is already embedded in the uuid so we just pass "tu" 312 * when creating the cache. 313 */ 314 char buf[VK_UUID_SIZE * 2 + 1]; 315 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); 316 device->vk.disk_cache = disk_cache_create(device->name, buf, 0); 317 318 device->vk.pipeline_cache_import_ops = cache_import_ops; 319 320 return VK_SUCCESS; 321 322fail_free_name: 323 vk_free(&instance->vk.alloc, (void *)device->name); 324 return result; 325} 326 327static void 328tu_physical_device_finish(struct tu_physical_device *device) 329{ 330#if TU_HAS_SURFACE 331 tu_wsi_finish(device); 332#endif 333 334 close(device->local_fd); 335 if (device->master_fd != -1) 336 close(device->master_fd); 337 338 vk_free(&device->instance->vk.alloc, (void *)device->name); 339 340 vk_physical_device_finish(&device->vk); 341} 342 343static const struct debug_control tu_debug_options[] = { 344 { "startup", TU_DEBUG_STARTUP }, 345 { "nir", TU_DEBUG_NIR }, 346 { "nobin", TU_DEBUG_NOBIN }, 347 { "sysmem", TU_DEBUG_SYSMEM }, 348 { "gmem", TU_DEBUG_GMEM }, 349 { "forcebin", TU_DEBUG_FORCEBIN }, 350 { "layout", TU_DEBUG_LAYOUT }, 351 { "noubwc", TU_DEBUG_NOUBWC }, 352 { "nomultipos", TU_DEBUG_NOMULTIPOS }, 353 { "nolrz", TU_DEBUG_NOLRZ }, 354 { "nolrzfc", TU_DEBUG_NOLRZFC }, 355 { "perf", TU_DEBUG_PERF }, 356 { "perfc", TU_DEBUG_PERFC }, 357 { "flushall", TU_DEBUG_FLUSHALL }, 358 { "syncdraw", TU_DEBUG_SYNCDRAW }, 359 { "dontcare_as_load", TU_DEBUG_DONT_CARE_AS_LOAD }, 360 { "rast_order", TU_DEBUG_RAST_ORDER }, 361 { "unaligned_store", TU_DEBUG_UNALIGNED_STORE }, 362 { "log_skip_gmem_ops", TU_DEBUG_LOG_SKIP_GMEM_OPS }, 363 { "dynamic", TU_DEBUG_DYNAMIC }, 364 { NULL, 0 } 365}; 366 367const char * 368tu_get_debug_option_name(int id) 369{ 370 assert(id < ARRAY_SIZE(tu_debug_options) - 1); 371 return tu_debug_options[id].string; 372} 373 374static const driOptionDescription tu_dri_options[] = { 375 DRI_CONF_SECTION_PERFORMANCE 376 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0) 377 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false) 378 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false) 379 DRI_CONF_VK_XWAYLAND_WAIT_READY(true) 380 DRI_CONF_SECTION_END 381 382 DRI_CONF_SECTION_DEBUG 383 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) 384 DRI_CONF_VK_DONT_CARE_AS_LOAD(false) 385 DRI_CONF_SECTION_END 386}; 387 388static void 389tu_init_dri_options(struct tu_instance *instance) 390{ 391 driParseOptionInfo(&instance->available_dri_options, tu_dri_options, 392 ARRAY_SIZE(tu_dri_options)); 393 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL, 394 instance->vk.app_info.app_name, instance->vk.app_info.app_version, 395 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version); 396 397 if (driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load")) 398 instance->debug_flags |= TU_DEBUG_DONT_CARE_AS_LOAD; 399} 400 401VKAPI_ATTR VkResult VKAPI_CALL 402tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, 403 const VkAllocationCallbacks *pAllocator, 404 VkInstance *pInstance) 405{ 406 struct tu_instance *instance; 407 VkResult result; 408 409 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); 410 411 if (pAllocator == NULL) 412 pAllocator = vk_default_allocator(); 413 414 instance = vk_zalloc(pAllocator, sizeof(*instance), 8, 415 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 416 417 if (!instance) 418 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 419 420 struct vk_instance_dispatch_table dispatch_table; 421 vk_instance_dispatch_table_from_entrypoints( 422 &dispatch_table, &tu_instance_entrypoints, true); 423 vk_instance_dispatch_table_from_entrypoints( 424 &dispatch_table, &wsi_instance_entrypoints, false); 425 426 result = vk_instance_init(&instance->vk, 427 &tu_instance_extensions_supported, 428 &dispatch_table, 429 pCreateInfo, pAllocator); 430 if (result != VK_SUCCESS) { 431 vk_free(pAllocator, instance); 432 return vk_error(NULL, result); 433 } 434 435 instance->physical_device_count = -1; 436 437 instance->debug_flags = 438 parse_debug_string(os_get_option("TU_DEBUG"), tu_debug_options); 439 440#ifdef DEBUG 441 /* Enable startup debugging by default on debug drivers. You almost always 442 * want to see your startup failures in that case, and it's hard to set 443 * this env var on android. 444 */ 445 instance->debug_flags |= TU_DEBUG_STARTUP; 446#endif 447 448 if (instance->debug_flags & TU_DEBUG_STARTUP) 449 mesa_logi("Created an instance"); 450 451 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); 452 453 tu_init_dri_options(instance); 454 455 *pInstance = tu_instance_to_handle(instance); 456 457#ifdef HAVE_PERFETTO 458 tu_perfetto_init(); 459#endif 460 461 return VK_SUCCESS; 462} 463 464VKAPI_ATTR void VKAPI_CALL 465tu_DestroyInstance(VkInstance _instance, 466 const VkAllocationCallbacks *pAllocator) 467{ 468 TU_FROM_HANDLE(tu_instance, instance, _instance); 469 470 if (!instance) 471 return; 472 473 for (int i = 0; i < instance->physical_device_count; ++i) { 474 tu_physical_device_finish(instance->physical_devices + i); 475 } 476 477 VG(VALGRIND_DESTROY_MEMPOOL(instance)); 478 479 driDestroyOptionCache(&instance->dri_options); 480 driDestroyOptionInfo(&instance->available_dri_options); 481 482 vk_instance_finish(&instance->vk); 483 vk_free(&instance->vk.alloc, instance); 484} 485 486VKAPI_ATTR VkResult VKAPI_CALL 487tu_EnumeratePhysicalDevices(VkInstance _instance, 488 uint32_t *pPhysicalDeviceCount, 489 VkPhysicalDevice *pPhysicalDevices) 490{ 491 TU_FROM_HANDLE(tu_instance, instance, _instance); 492 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, 493 pPhysicalDevices, pPhysicalDeviceCount); 494 495 VkResult result; 496 497 if (instance->physical_device_count < 0) { 498 result = tu_enumerate_devices(instance); 499 if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER) 500 return result; 501 } 502 503 for (uint32_t i = 0; i < instance->physical_device_count; ++i) { 504 vk_outarray_append_typed(VkPhysicalDevice, &out, p) 505 { 506 *p = tu_physical_device_to_handle(instance->physical_devices + i); 507 } 508 } 509 510 return vk_outarray_status(&out); 511} 512 513VKAPI_ATTR VkResult VKAPI_CALL 514tu_EnumeratePhysicalDeviceGroups( 515 VkInstance _instance, 516 uint32_t *pPhysicalDeviceGroupCount, 517 VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties) 518{ 519 TU_FROM_HANDLE(tu_instance, instance, _instance); 520 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, 521 pPhysicalDeviceGroupProperties, 522 pPhysicalDeviceGroupCount); 523 VkResult result; 524 525 if (instance->physical_device_count < 0) { 526 result = tu_enumerate_devices(instance); 527 if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER) 528 return result; 529 } 530 531 for (uint32_t i = 0; i < instance->physical_device_count; ++i) { 532 vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) 533 { 534 p->physicalDeviceCount = 1; 535 p->physicalDevices[0] = 536 tu_physical_device_to_handle(instance->physical_devices + i); 537 p->subsetAllocation = false; 538 } 539 } 540 541 return vk_outarray_status(&out); 542} 543 544static void 545tu_get_physical_device_features_1_1(struct tu_physical_device *pdevice, 546 VkPhysicalDeviceVulkan11Features *features) 547{ 548 features->storageBuffer16BitAccess = pdevice->info->a6xx.storage_16bit; 549 features->uniformAndStorageBuffer16BitAccess = false; 550 features->storagePushConstant16 = false; 551 features->storageInputOutput16 = false; 552 features->multiview = true; 553 features->multiviewGeometryShader = false; 554 features->multiviewTessellationShader = false; 555 features->variablePointersStorageBuffer = true; 556 features->variablePointers = true; 557 features->protectedMemory = false; 558 features->samplerYcbcrConversion = true; 559 features->shaderDrawParameters = true; 560} 561 562static void 563tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice, 564 VkPhysicalDeviceVulkan12Features *features) 565{ 566 features->samplerMirrorClampToEdge = true; 567 features->drawIndirectCount = true; 568 features->storageBuffer8BitAccess = false; 569 features->uniformAndStorageBuffer8BitAccess = false; 570 features->storagePushConstant8 = false; 571 features->shaderBufferInt64Atomics = false; 572 features->shaderSharedInt64Atomics = false; 573 features->shaderFloat16 = true; 574 features->shaderInt8 = false; 575 576 features->descriptorIndexing = true; 577 features->shaderInputAttachmentArrayDynamicIndexing = false; 578 features->shaderUniformTexelBufferArrayDynamicIndexing = true; 579 features->shaderStorageTexelBufferArrayDynamicIndexing = true; 580 features->shaderUniformBufferArrayNonUniformIndexing = true; 581 features->shaderSampledImageArrayNonUniformIndexing = true; 582 features->shaderStorageBufferArrayNonUniformIndexing = true; 583 features->shaderStorageImageArrayNonUniformIndexing = true; 584 features->shaderInputAttachmentArrayNonUniformIndexing = false; 585 features->shaderUniformTexelBufferArrayNonUniformIndexing = true; 586 features->shaderStorageTexelBufferArrayNonUniformIndexing = true; 587 features->descriptorBindingUniformBufferUpdateAfterBind = true; 588 features->descriptorBindingSampledImageUpdateAfterBind = true; 589 features->descriptorBindingStorageImageUpdateAfterBind = true; 590 features->descriptorBindingStorageBufferUpdateAfterBind = true; 591 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true; 592 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true; 593 features->descriptorBindingUpdateUnusedWhilePending = true; 594 features->descriptorBindingPartiallyBound = true; 595 features->descriptorBindingVariableDescriptorCount = true; 596 features->runtimeDescriptorArray = true; 597 598 features->samplerFilterMinmax = true; 599 features->scalarBlockLayout = true; 600 features->imagelessFramebuffer = true; 601 features->uniformBufferStandardLayout = true; 602 features->shaderSubgroupExtendedTypes = true; 603 features->separateDepthStencilLayouts = true; 604 features->hostQueryReset = true; 605 features->timelineSemaphore = true; 606 features->bufferDeviceAddress = true; 607 features->bufferDeviceAddressCaptureReplay = false; 608 features->bufferDeviceAddressMultiDevice = false; 609 features->vulkanMemoryModel = true; 610 features->vulkanMemoryModelDeviceScope = true; 611 features->vulkanMemoryModelAvailabilityVisibilityChains = true; 612 features->shaderOutputViewportIndex = true; 613 features->shaderOutputLayer = true; 614 features->subgroupBroadcastDynamicId = true; 615} 616 617static void 618tu_get_physical_device_features_1_3(struct tu_physical_device *pdevice, 619 VkPhysicalDeviceVulkan13Features *features) 620{ 621 features->robustImageAccess = true; 622 features->inlineUniformBlock = false; 623 features->descriptorBindingInlineUniformBlockUpdateAfterBind = false; 624 features->pipelineCreationCacheControl = true; 625 features->privateData = true; 626 features->shaderDemoteToHelperInvocation = true; 627 features->shaderTerminateInvocation = true; 628 features->subgroupSizeControl = true; 629 features->computeFullSubgroups = true; 630 features->synchronization2 = true; 631 features->textureCompressionASTC_HDR = false; 632 features->shaderZeroInitializeWorkgroupMemory = true; 633 features->dynamicRendering = true; 634 features->shaderIntegerDotProduct = true; 635 features->maintenance4 = true; 636} 637 638void 639tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, 640 VkPhysicalDeviceFeatures2 *pFeatures) 641{ 642 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); 643 644 pFeatures->features = (VkPhysicalDeviceFeatures) { 645 .robustBufferAccess = true, 646 .fullDrawIndexUint32 = true, 647 .imageCubeArray = true, 648 .independentBlend = true, 649 .geometryShader = true, 650 .tessellationShader = true, 651 .sampleRateShading = true, 652 .dualSrcBlend = true, 653 .logicOp = true, 654 .multiDrawIndirect = true, 655 .drawIndirectFirstInstance = true, 656 .depthClamp = true, 657 .depthBiasClamp = true, 658 .fillModeNonSolid = true, 659 .depthBounds = true, 660 .wideLines = false, 661 .largePoints = true, 662 .alphaToOne = true, 663 .multiViewport = true, 664 .samplerAnisotropy = true, 665 .textureCompressionETC2 = true, 666 .textureCompressionASTC_LDR = true, 667 .textureCompressionBC = true, 668 .occlusionQueryPrecise = true, 669 .pipelineStatisticsQuery = true, 670 .vertexPipelineStoresAndAtomics = true, 671 .fragmentStoresAndAtomics = true, 672 .shaderTessellationAndGeometryPointSize = true, 673 .shaderImageGatherExtended = true, 674 .shaderStorageImageExtendedFormats = true, 675 .shaderStorageImageMultisample = false, 676 .shaderUniformBufferArrayDynamicIndexing = true, 677 .shaderSampledImageArrayDynamicIndexing = true, 678 .shaderStorageBufferArrayDynamicIndexing = true, 679 .shaderStorageImageArrayDynamicIndexing = true, 680 .shaderStorageImageReadWithoutFormat = true, 681 .shaderStorageImageWriteWithoutFormat = true, 682 .shaderClipDistance = true, 683 .shaderCullDistance = true, 684 .shaderFloat64 = false, 685 .shaderInt64 = false, 686 .shaderInt16 = true, 687 .sparseBinding = false, 688 .variableMultisampleRate = true, 689 .inheritedQueries = true, 690 }; 691 692 VkPhysicalDeviceVulkan11Features core_1_1 = { 693 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, 694 }; 695 tu_get_physical_device_features_1_1(pdevice, &core_1_1); 696 697 VkPhysicalDeviceVulkan12Features core_1_2 = { 698 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, 699 }; 700 tu_get_physical_device_features_1_2(pdevice, &core_1_2); 701 702 VkPhysicalDeviceVulkan13Features core_1_3 = { 703 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, 704 }; 705 tu_get_physical_device_features_1_3(pdevice, &core_1_3); 706 707 vk_foreach_struct(ext, pFeatures->pNext) 708 { 709 if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1)) 710 continue; 711 if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2)) 712 continue; 713 if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3)) 714 continue; 715 716 switch (ext->sType) { 717 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { 718 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = 719 (VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext; 720 features->conditionalRendering = true; 721 features->inheritedConditionalRendering = true; 722 break; 723 } 724 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: { 725 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features = 726 (VkPhysicalDeviceTransformFeedbackFeaturesEXT *) ext; 727 features->transformFeedback = true; 728 features->geometryStreams = true; 729 break; 730 } 731 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: { 732 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features = 733 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext; 734 features->indexTypeUint8 = true; 735 break; 736 } 737 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { 738 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = 739 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; 740 features->vertexAttributeInstanceRateDivisor = true; 741 features->vertexAttributeInstanceRateZeroDivisor = true; 742 break; 743 } 744 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: { 745 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features = 746 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext; 747 features->depthClipEnable = true; 748 break; 749 } 750 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: { 751 VkPhysicalDevice4444FormatsFeaturesEXT *features = (void *)ext; 752 features->formatA4R4G4B4 = true; 753 features->formatA4B4G4R4 = true; 754 break; 755 } 756 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: { 757 VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features = (void *)ext; 758 features->borderColorSwizzle = true; 759 features->borderColorSwizzleFromImage = true; 760 break; 761 } 762 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { 763 VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = (void *) ext; 764 features->customBorderColors = true; 765 features->customBorderColorWithoutFormat = true; 766 break; 767 } 768 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: { 769 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = (void *)ext; 770 features->extendedDynamicState = true; 771 break; 772 } 773 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: { 774 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features = 775 (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext; 776 features->extendedDynamicState2 = true; 777 features->extendedDynamicState2LogicOp = true; 778 features->extendedDynamicState2PatchControlPoints = false; 779 break; 780 } 781 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: { 782 VkPhysicalDevicePerformanceQueryFeaturesKHR *feature = 783 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext; 784 feature->performanceCounterQueryPools = true; 785 feature->performanceCounterMultipleQueryPools = false; 786 break; 787 } 788 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: { 789 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features = 790 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext; 791 features->pipelineExecutableInfo = true; 792 break; 793 } 794 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: { 795 VkPhysicalDeviceShaderFloat16Int8Features *features = 796 (VkPhysicalDeviceShaderFloat16Int8Features *) ext; 797 features->shaderFloat16 = true; 798 features->shaderInt8 = false; 799 break; 800 } 801 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: { 802 VkPhysicalDeviceScalarBlockLayoutFeatures *features = (void *)ext; 803 features->scalarBlockLayout = true; 804 break; 805 } 806 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: { 807 VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext; 808 features->robustBufferAccess2 = true; 809 features->robustImageAccess2 = true; 810 features->nullDescriptor = true; 811 break; 812 } 813 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: { 814 VkPhysicalDeviceTimelineSemaphoreFeatures *features = 815 (VkPhysicalDeviceTimelineSemaphoreFeatures *) ext; 816 features->timelineSemaphore = true; 817 break; 818 } 819 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: { 820 VkPhysicalDeviceProvokingVertexFeaturesEXT *features = 821 (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext; 822 features->provokingVertexLast = true; 823 features->transformFeedbackPreservesProvokingVertex = true; 824 break; 825 } 826 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: { 827 VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features = 828 (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext; 829 features->mutableDescriptorType = true; 830 break; 831 } 832 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: { 833 VkPhysicalDeviceLineRasterizationFeaturesEXT *features = 834 (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext; 835 features->rectangularLines = true; 836 features->bresenhamLines = true; 837 features->smoothLines = false; 838 features->stippledRectangularLines = false; 839 features->stippledBresenhamLines = false; 840 features->stippledSmoothLines = false; 841 break; 842 } 843 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: { 844 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features = 845 (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext; 846 features->primitiveTopologyListRestart = true; 847 features->primitiveTopologyPatchListRestart = false; 848 break; 849 } 850 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM: { 851 VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *features = 852 (VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *)ext; 853 features->rasterizationOrderColorAttachmentAccess = true; 854 features->rasterizationOrderDepthAttachmentAccess = true; 855 features->rasterizationOrderStencilAttachmentAccess = true; 856 break; 857 } 858 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: { 859 VkPhysicalDeviceDepthClipControlFeaturesEXT *features = 860 (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext; 861 features->depthClipControl = true; 862 break; 863 } 864 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: { 865 VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features = 866 (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext; 867 features->texelBufferAlignment = true; 868 break; 869 } 870 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: { 871 VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features = 872 (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext; 873 features->primitivesGeneratedQuery = true; 874 features->primitivesGeneratedQueryWithRasterizerDiscard = false; 875 features->primitivesGeneratedQueryWithNonZeroStreams = false; 876 break; 877 } 878 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: { 879 VkPhysicalDeviceImageViewMinLodFeaturesEXT *features = 880 (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext; 881 features->minLod = true; 882 break; 883 } 884 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: { 885 VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features = 886 (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext; 887 features->image2DViewOf3D = true; 888 features->sampler2DViewOf3D = true; 889 break; 890 } 891 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: { 892 VkPhysicalDeviceColorWriteEnableFeaturesEXT *features = 893 (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext; 894 features->colorWriteEnable = true; 895 break; 896 } 897 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: { 898 VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features = 899 (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext; 900 features->shaderModuleIdentifier = true; 901 break; 902 } 903 904 default: 905 break; 906 } 907 } 908} 909 910 911static void 912tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice, 913 VkPhysicalDeviceVulkan11Properties *p) 914{ 915 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES); 916 917 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); 918 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); 919 memset(p->deviceLUID, 0, VK_LUID_SIZE); 920 p->deviceNodeMask = 0; 921 p->deviceLUIDValid = false; 922 923 p->subgroupSize = 128; 924 p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT; 925 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | 926 VK_SUBGROUP_FEATURE_VOTE_BIT | 927 VK_SUBGROUP_FEATURE_BALLOT_BIT | 928 VK_SUBGROUP_FEATURE_SHUFFLE_BIT | 929 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | 930 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT; 931 if (pdevice->info->a6xx.has_getfiberid) { 932 p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS; 933 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT; 934 } 935 936 p->subgroupQuadOperationsInAllStages = false; 937 938 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; 939 p->maxMultiviewViewCount = MAX_VIEWS; 940 p->maxMultiviewInstanceIndex = INT_MAX; 941 p->protectedNoFault = false; 942 /* Make sure everything is addressable by a signed 32-bit int, and 943 * our largest descriptors are 96 bytes. 944 */ 945 p->maxPerSetDescriptors = (1ull << 31) / 96; 946 /* Our buffer size fields allow only this much */ 947 p->maxMemoryAllocationSize = 0xFFFFFFFFull; 948 949} 950 951 952/* I have no idea what the maximum size is, but the hardware supports very 953 * large numbers of descriptors (at least 2^16). This limit is based on 954 * CP_LOAD_STATE6, which has a 28-bit field for the DWORD offset, so that 955 * we don't have to think about what to do if that overflows, but really 956 * nothing is likely to get close to this. 957 */ 958static const size_t max_descriptor_set_size = (1 << 28) / A6XX_TEX_CONST_DWORDS; 959static const VkSampleCountFlags sample_counts = 960 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT; 961 962static void 963tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice, 964 VkPhysicalDeviceVulkan12Properties *p) 965{ 966 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES); 967 968 p->driverID = VK_DRIVER_ID_MESA_TURNIP; 969 memset(p->driverName, 0, sizeof(p->driverName)); 970 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, 971 "turnip Mesa driver"); 972 memset(p->driverInfo, 0, sizeof(p->driverInfo)); 973 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, 974 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); 975 p->conformanceVersion = (VkConformanceVersion) { 976 .major = 1, 977 .minor = 2, 978 .subminor = 7, 979 .patch = 1, 980 }; 981 982 p->denormBehaviorIndependence = 983 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL; 984 p->roundingModeIndependence = 985 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL; 986 987 p->shaderDenormFlushToZeroFloat16 = true; 988 p->shaderDenormPreserveFloat16 = false; 989 p->shaderRoundingModeRTEFloat16 = true; 990 p->shaderRoundingModeRTZFloat16 = false; 991 p->shaderSignedZeroInfNanPreserveFloat16 = true; 992 993 p->shaderDenormFlushToZeroFloat32 = true; 994 p->shaderDenormPreserveFloat32 = false; 995 p->shaderRoundingModeRTEFloat32 = true; 996 p->shaderRoundingModeRTZFloat32 = false; 997 p->shaderSignedZeroInfNanPreserveFloat32 = true; 998 999 p->shaderDenormFlushToZeroFloat64 = false; 1000 p->shaderDenormPreserveFloat64 = false; 1001 p->shaderRoundingModeRTEFloat64 = false; 1002 p->shaderRoundingModeRTZFloat64 = false; 1003 p->shaderSignedZeroInfNanPreserveFloat64 = false; 1004 1005 p->shaderUniformBufferArrayNonUniformIndexingNative = true; 1006 p->shaderSampledImageArrayNonUniformIndexingNative = true; 1007 p->shaderStorageBufferArrayNonUniformIndexingNative = true; 1008 p->shaderStorageImageArrayNonUniformIndexingNative = true; 1009 p->shaderInputAttachmentArrayNonUniformIndexingNative = false; 1010 p->robustBufferAccessUpdateAfterBind = false; 1011 p->quadDivergentImplicitLod = false; 1012 1013 p->maxUpdateAfterBindDescriptorsInAllPools = max_descriptor_set_size; 1014 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size; 1015 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size; 1016 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size; 1017 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size; 1018 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size; 1019 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS; 1020 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size; 1021 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size; 1022 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size; 1023 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS; 1024 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size; 1025 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS; 1026 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size; 1027 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size; 1028 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_RTS; 1029 1030 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; 1031 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; 1032 p->independentResolveNone = false; 1033 p->independentResolve = false; 1034 1035 p->filterMinmaxSingleComponentFormats = true; 1036 p->filterMinmaxImageComponentMapping = true; 1037 1038 p->maxTimelineSemaphoreValueDifference = UINT64_MAX; 1039 1040 p->framebufferIntegerColorSampleCounts = sample_counts; 1041} 1042 1043static void 1044tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice, 1045 VkPhysicalDeviceVulkan13Properties *p) 1046{ 1047 /* TODO move threadsize_base and max_waves to fd_dev_info and use them here */ 1048 p->minSubgroupSize = 64; /* threadsize_base */ 1049 p->maxSubgroupSize = 128; /* threadsize_base * 2 */ 1050 p->maxComputeWorkgroupSubgroups = 16; /* max_waves */ 1051 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL; 1052 1053 /* VK_EXT_inline_uniform_block is not implemented */ 1054 p->maxInlineUniformBlockSize = 0; 1055 p->maxPerStageDescriptorInlineUniformBlocks = 0; 1056 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 0; 1057 p->maxDescriptorSetInlineUniformBlocks = 0; 1058 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 0; 1059 p->maxInlineUniformTotalSize = 0; 1060 1061 p->integerDotProduct8BitUnsignedAccelerated = false; 1062 p->integerDotProduct8BitSignedAccelerated = false; 1063 p->integerDotProduct8BitMixedSignednessAccelerated = false; 1064 p->integerDotProduct4x8BitPackedUnsignedAccelerated = 1065 pdevice->info->a6xx.has_dp2acc; 1066 /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */ 1067 p->integerDotProduct4x8BitPackedSignedAccelerated = false; 1068 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = 1069 pdevice->info->a6xx.has_dp2acc; 1070 p->integerDotProduct16BitUnsignedAccelerated = false; 1071 p->integerDotProduct16BitSignedAccelerated = false; 1072 p->integerDotProduct16BitMixedSignednessAccelerated = false; 1073 p->integerDotProduct32BitUnsignedAccelerated = false; 1074 p->integerDotProduct32BitSignedAccelerated = false; 1075 p->integerDotProduct32BitMixedSignednessAccelerated = false; 1076 p->integerDotProduct64BitUnsignedAccelerated = false; 1077 p->integerDotProduct64BitSignedAccelerated = false; 1078 p->integerDotProduct64BitMixedSignednessAccelerated = false; 1079 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false; 1080 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false; 1081 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false; 1082 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = 1083 pdevice->info->a6xx.has_dp2acc; 1084 /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */ 1085 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false; 1086 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = 1087 pdevice->info->a6xx.has_dp2acc; 1088 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false; 1089 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false; 1090 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false; 1091 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false; 1092 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false; 1093 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false; 1094 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false; 1095 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false; 1096 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false; 1097 1098 p->storageTexelBufferOffsetAlignmentBytes = 64; 1099 p->storageTexelBufferOffsetSingleTexelAlignment = false; 1100 p->uniformTexelBufferOffsetAlignmentBytes = 64; 1101 p->uniformTexelBufferOffsetSingleTexelAlignment = false; 1102 1103 /* The address space is 4GB for current kernels, so there's no point 1104 * allowing a larger buffer. Our buffer sizes are 64-bit though, so 1105 * GetBufferDeviceRequirements won't fall over if someone actually creates 1106 * a 4GB buffer. 1107 */ 1108 p->maxBufferSize = 1ull << 32; 1109} 1110 1111VKAPI_ATTR void VKAPI_CALL 1112tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, 1113 VkPhysicalDeviceProperties2 *pProperties) 1114{ 1115 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); 1116 1117 VkPhysicalDeviceLimits limits = { 1118 .maxImageDimension1D = (1 << 14), 1119 .maxImageDimension2D = (1 << 14), 1120 .maxImageDimension3D = (1 << 11), 1121 .maxImageDimensionCube = (1 << 14), 1122 .maxImageArrayLayers = (1 << 11), 1123 .maxTexelBufferElements = 128 * 1024 * 1024, 1124 .maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE, 1125 .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE, 1126 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, 1127 .maxMemoryAllocationCount = UINT32_MAX, 1128 .maxSamplerAllocationCount = 64 * 1024, 1129 .bufferImageGranularity = 64, /* A cache line */ 1130 .sparseAddressSpaceSize = 0, 1131 .maxBoundDescriptorSets = MAX_SETS, 1132 .maxPerStageDescriptorSamplers = max_descriptor_set_size, 1133 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size, 1134 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size, 1135 .maxPerStageDescriptorSampledImages = max_descriptor_set_size, 1136 .maxPerStageDescriptorStorageImages = max_descriptor_set_size, 1137 .maxPerStageDescriptorInputAttachments = MAX_RTS, 1138 .maxPerStageResources = max_descriptor_set_size, 1139 .maxDescriptorSetSamplers = max_descriptor_set_size, 1140 .maxDescriptorSetUniformBuffers = max_descriptor_set_size, 1141 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, 1142 .maxDescriptorSetStorageBuffers = max_descriptor_set_size, 1143 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, 1144 .maxDescriptorSetSampledImages = max_descriptor_set_size, 1145 .maxDescriptorSetStorageImages = max_descriptor_set_size, 1146 .maxDescriptorSetInputAttachments = MAX_RTS, 1147 .maxVertexInputAttributes = 32, 1148 .maxVertexInputBindings = 32, 1149 .maxVertexInputAttributeOffset = 4095, 1150 .maxVertexInputBindingStride = 2048, 1151 .maxVertexOutputComponents = 128, 1152 .maxTessellationGenerationLevel = 64, 1153 .maxTessellationPatchSize = 32, 1154 .maxTessellationControlPerVertexInputComponents = 128, 1155 .maxTessellationControlPerVertexOutputComponents = 128, 1156 .maxTessellationControlPerPatchOutputComponents = 120, 1157 .maxTessellationControlTotalOutputComponents = 4096, 1158 .maxTessellationEvaluationInputComponents = 128, 1159 .maxTessellationEvaluationOutputComponents = 128, 1160 .maxGeometryShaderInvocations = 32, 1161 .maxGeometryInputComponents = 64, 1162 .maxGeometryOutputComponents = 128, 1163 .maxGeometryOutputVertices = 256, 1164 .maxGeometryTotalOutputComponents = 1024, 1165 .maxFragmentInputComponents = 124, 1166 .maxFragmentOutputAttachments = 8, 1167 .maxFragmentDualSrcAttachments = 1, 1168 .maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2, 1169 .maxComputeSharedMemorySize = 32768, 1170 .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, 1171 .maxComputeWorkGroupInvocations = 2048, 1172 .maxComputeWorkGroupSize = { 1024, 1024, 1024 }, 1173 .subPixelPrecisionBits = 8, 1174 .subTexelPrecisionBits = 8, 1175 .mipmapPrecisionBits = 8, 1176 .maxDrawIndexedIndexValue = UINT32_MAX, 1177 .maxDrawIndirectCount = UINT32_MAX, 1178 .maxSamplerLodBias = 4095.0 / 256.0, /* [-16, 15.99609375] */ 1179 .maxSamplerAnisotropy = 16, 1180 .maxViewports = MAX_VIEWPORTS, 1181 .maxViewportDimensions = { MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE }, 1182 .viewportBoundsRange = { INT16_MIN, INT16_MAX }, 1183 .viewportSubPixelBits = 8, 1184 .minMemoryMapAlignment = 4096, /* A page */ 1185 .minTexelBufferOffsetAlignment = 64, 1186 .minUniformBufferOffsetAlignment = 64, 1187 .minStorageBufferOffsetAlignment = 64, 1188 .minTexelOffset = -16, 1189 .maxTexelOffset = 15, 1190 .minTexelGatherOffset = -32, 1191 .maxTexelGatherOffset = 31, 1192 .minInterpolationOffset = -0.5, 1193 .maxInterpolationOffset = 0.4375, 1194 .subPixelInterpolationOffsetBits = 4, 1195 .maxFramebufferWidth = (1 << 14), 1196 .maxFramebufferHeight = (1 << 14), 1197 .maxFramebufferLayers = (1 << 10), 1198 .framebufferColorSampleCounts = sample_counts, 1199 .framebufferDepthSampleCounts = sample_counts, 1200 .framebufferStencilSampleCounts = sample_counts, 1201 .framebufferNoAttachmentsSampleCounts = sample_counts, 1202 .maxColorAttachments = MAX_RTS, 1203 .sampledImageColorSampleCounts = sample_counts, 1204 .sampledImageIntegerSampleCounts = sample_counts, 1205 .sampledImageDepthSampleCounts = sample_counts, 1206 .sampledImageStencilSampleCounts = sample_counts, 1207 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, 1208 .maxSampleMaskWords = 1, 1209 .timestampComputeAndGraphics = true, 1210 .timestampPeriod = 1000000000.0 / 19200000.0, /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */ 1211 .maxClipDistances = 8, 1212 .maxCullDistances = 8, 1213 .maxCombinedClipAndCullDistances = 8, 1214 .discreteQueuePriorities = 2, 1215 .pointSizeRange = { 1, 4092 }, 1216 .lineWidthRange = { 1.0, 1.0 }, 1217 .pointSizeGranularity = 0.0625, 1218 .lineWidthGranularity = 0.0, 1219 .strictLines = true, 1220 .standardSampleLocations = true, 1221 .optimalBufferCopyOffsetAlignment = 128, 1222 .optimalBufferCopyRowPitchAlignment = 128, 1223 .nonCoherentAtomSize = 64, 1224 }; 1225 1226 pProperties->properties = (VkPhysicalDeviceProperties) { 1227 .apiVersion = TU_API_VERSION, 1228 .driverVersion = vk_get_driver_version(), 1229 .vendorID = 0x5143, 1230 .deviceID = pdevice->dev_id.chip_id, 1231 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, 1232 .limits = limits, 1233 .sparseProperties = { 0 }, 1234 }; 1235 1236 strcpy(pProperties->properties.deviceName, pdevice->name); 1237 memcpy(pProperties->properties.pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE); 1238 1239 VkPhysicalDeviceVulkan11Properties core_1_1 = { 1240 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES, 1241 }; 1242 tu_get_physical_device_properties_1_1(pdevice, &core_1_1); 1243 1244 VkPhysicalDeviceVulkan12Properties core_1_2 = { 1245 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES, 1246 }; 1247 tu_get_physical_device_properties_1_2(pdevice, &core_1_2); 1248 1249 VkPhysicalDeviceVulkan13Properties core_1_3 = { 1250 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES, 1251 }; 1252 tu_get_physical_device_properties_1_3(pdevice, &core_1_3); 1253 1254 vk_foreach_struct(ext, pProperties->pNext) 1255 { 1256 if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1)) 1257 continue; 1258 if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2)) 1259 continue; 1260 if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3)) 1261 continue; 1262 1263 switch (ext->sType) { 1264 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { 1265 VkPhysicalDevicePushDescriptorPropertiesKHR *properties = 1266 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; 1267 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; 1268 break; 1269 } 1270 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: { 1271 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties = 1272 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext; 1273 1274 properties->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS; 1275 properties->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS; 1276 properties->maxTransformFeedbackBufferSize = UINT32_MAX; 1277 properties->maxTransformFeedbackStreamDataSize = 512; 1278 properties->maxTransformFeedbackBufferDataSize = 512; 1279 properties->maxTransformFeedbackBufferDataStride = 512; 1280 properties->transformFeedbackQueries = true; 1281 properties->transformFeedbackStreamsLinesTriangles = true; 1282 properties->transformFeedbackRasterizationStreamSelect = true; 1283 properties->transformFeedbackDraw = true; 1284 break; 1285 } 1286 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { 1287 VkPhysicalDeviceSampleLocationsPropertiesEXT *properties = 1288 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext; 1289 properties->sampleLocationSampleCounts = 0; 1290 if (pdevice->vk.supported_extensions.EXT_sample_locations) { 1291 properties->sampleLocationSampleCounts = 1292 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT; 1293 } 1294 properties->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 }; 1295 properties->sampleLocationCoordinateRange[0] = 0.0f; 1296 properties->sampleLocationCoordinateRange[1] = 0.9375f; 1297 properties->sampleLocationSubPixelBits = 4; 1298 properties->variableSampleLocations = true; 1299 break; 1300 } 1301 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { 1302 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props = 1303 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; 1304 props->maxVertexAttribDivisor = UINT32_MAX; 1305 break; 1306 } 1307 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: { 1308 VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = (void *)ext; 1309 props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT; 1310 break; 1311 } 1312 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: { 1313 VkPhysicalDevicePerformanceQueryPropertiesKHR *properties = 1314 (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext; 1315 properties->allowCommandBufferQueryCopies = false; 1316 break; 1317 } 1318 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { 1319 VkPhysicalDeviceRobustness2PropertiesEXT *props = (void *)ext; 1320 /* see write_buffer_descriptor() */ 1321 props->robustStorageBufferAccessSizeAlignment = 4; 1322 /* see write_ubo_descriptor() */ 1323 props->robustUniformBufferAccessSizeAlignment = 16; 1324 break; 1325 } 1326 1327 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: { 1328 VkPhysicalDeviceProvokingVertexPropertiesEXT *properties = 1329 (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext; 1330 properties->provokingVertexModePerPipeline = true; 1331 properties->transformFeedbackPreservesTriangleFanProvokingVertex = false; 1332 break; 1333 } 1334 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: { 1335 VkPhysicalDeviceLineRasterizationPropertiesEXT *props = 1336 (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext; 1337 props->lineSubPixelPrecisionBits = 8; 1338 break; 1339 } 1340 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: { 1341 VkPhysicalDeviceDrmPropertiesEXT *props = 1342 (VkPhysicalDeviceDrmPropertiesEXT *)ext; 1343 props->hasPrimary = pdevice->has_master; 1344 props->primaryMajor = pdevice->master_major; 1345 props->primaryMinor = pdevice->master_minor; 1346 1347 props->hasRender = pdevice->has_local; 1348 props->renderMajor = pdevice->local_major; 1349 props->renderMinor = pdevice->local_minor; 1350 break; 1351 } 1352 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: { 1353 VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props = 1354 (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext; 1355 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == 1356 sizeof(props->shaderModuleIdentifierAlgorithmUUID)); 1357 memcpy(props->shaderModuleIdentifierAlgorithmUUID, 1358 vk_shaderModuleIdentifierAlgorithmUUID, 1359 sizeof(props->shaderModuleIdentifierAlgorithmUUID)); 1360 break; 1361 } 1362 default: 1363 break; 1364 } 1365 } 1366} 1367 1368static const VkQueueFamilyProperties tu_queue_family_properties = { 1369 .queueFlags = 1370 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, 1371 .queueCount = 1, 1372 .timestampValidBits = 48, 1373 .minImageTransferGranularity = { 1, 1, 1 }, 1374}; 1375 1376VKAPI_ATTR void VKAPI_CALL 1377tu_GetPhysicalDeviceQueueFamilyProperties2( 1378 VkPhysicalDevice physicalDevice, 1379 uint32_t *pQueueFamilyPropertyCount, 1380 VkQueueFamilyProperties2 *pQueueFamilyProperties) 1381{ 1382 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, 1383 pQueueFamilyProperties, pQueueFamilyPropertyCount); 1384 1385 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) 1386 { 1387 p->queueFamilyProperties = tu_queue_family_properties; 1388 } 1389} 1390 1391uint64_t 1392tu_get_system_heap_size() 1393{ 1394 struct sysinfo info; 1395 sysinfo(&info); 1396 1397 uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit; 1398 1399 /* We don't want to burn too much ram with the GPU. If the user has 4GiB 1400 * or less, we use at most half. If they have more than 4GiB, we use 3/4. 1401 */ 1402 uint64_t available_ram; 1403 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) 1404 available_ram = total_ram / 2; 1405 else 1406 available_ram = total_ram * 3 / 4; 1407 1408 return available_ram; 1409} 1410 1411static VkDeviceSize 1412tu_get_budget_memory(struct tu_physical_device *physical_device) 1413{ 1414 uint64_t heap_size = physical_device->heap.size; 1415 uint64_t heap_used = physical_device->heap.used; 1416 uint64_t sys_available; 1417 ASSERTED bool has_available_memory = 1418 os_get_available_system_memory(&sys_available); 1419 assert(has_available_memory); 1420 1421 /* 1422 * Let's not incite the app to starve the system: report at most 90% of 1423 * available system memory. 1424 */ 1425 uint64_t heap_available = sys_available * 9 / 10; 1426 return MIN2(heap_size, heap_used + heap_available); 1427} 1428 1429VKAPI_ATTR void VKAPI_CALL 1430tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev, 1431 VkPhysicalDeviceMemoryProperties2 *props2) 1432{ 1433 TU_FROM_HANDLE(tu_physical_device, physical_device, pdev); 1434 1435 VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties; 1436 props->memoryHeapCount = 1; 1437 props->memoryHeaps[0].size = physical_device->heap.size; 1438 props->memoryHeaps[0].flags = physical_device->heap.flags; 1439 1440 props->memoryTypeCount = 1; 1441 props->memoryTypes[0].propertyFlags = 1442 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 1443 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 1444 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; 1445 props->memoryTypes[0].heapIndex = 0; 1446 1447 vk_foreach_struct(ext, props2->pNext) 1448 { 1449 switch (ext->sType) { 1450 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: { 1451 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props = 1452 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext; 1453 memory_budget_props->heapUsage[0] = physical_device->heap.used; 1454 memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device); 1455 1456 /* The heapBudget and heapUsage values must be zero for array elements 1457 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount 1458 */ 1459 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) { 1460 memory_budget_props->heapBudget[i] = 0u; 1461 memory_budget_props->heapUsage[i] = 0u; 1462 } 1463 break; 1464 } 1465 default: 1466 break; 1467 } 1468 } 1469} 1470 1471static VkResult 1472tu_queue_init(struct tu_device *device, 1473 struct tu_queue *queue, 1474 int idx, 1475 const VkDeviceQueueCreateInfo *create_info) 1476{ 1477 1478 /* Match the default priority of fd_context_init. We ignore 1479 * pQueuePriorities because the spec says 1480 * 1481 * An implementation may allow a higher-priority queue to starve a 1482 * lower-priority queue on the same VkDevice until the higher-priority 1483 * queue has no further commands to execute. The relationship of queue 1484 * priorities must not cause queues on one VkDevice to starve queues on 1485 * another VkDevice. 1486 * 1487 * We cannot let one VkDevice starve another. 1488 */ 1489 const int priority = 1; 1490 1491 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx); 1492 if (result != VK_SUCCESS) 1493 return result; 1494 1495 queue->device = device; 1496#ifndef TU_USE_KGSL 1497 queue->vk.driver_submit = tu_queue_submit; 1498#endif 1499 1500 int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id); 1501 if (ret) 1502 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED, 1503 "submitqueue create failed"); 1504 1505 queue->fence = -1; 1506 1507 return VK_SUCCESS; 1508} 1509 1510static void 1511tu_queue_finish(struct tu_queue *queue) 1512{ 1513 vk_queue_finish(&queue->vk); 1514 if (queue->fence >= 0) 1515 close(queue->fence); 1516 tu_drm_submitqueue_close(queue->device, queue->msm_queue_id); 1517} 1518 1519uint64_t 1520tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts) 1521{ 1522 /* This is based on the 19.2MHz always-on rbbm timer. 1523 * 1524 * TODO we should probably query this value from kernel.. 1525 */ 1526 return ts * (1000000000 / 19200000); 1527} 1528 1529static void* 1530tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size) 1531{ 1532 struct tu_device *device = 1533 container_of(utctx, struct tu_device, trace_context); 1534 1535 struct tu_bo *bo; 1536 tu_bo_init_new(device, &bo, size, false); 1537 1538 return bo; 1539} 1540 1541static void 1542tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) 1543{ 1544 struct tu_device *device = 1545 container_of(utctx, struct tu_device, trace_context); 1546 struct tu_bo *bo = timestamps; 1547 1548 tu_bo_finish(device, bo); 1549} 1550 1551static void 1552tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps, 1553 unsigned idx, bool end_of_pipe) 1554{ 1555 struct tu_bo *bo = timestamps; 1556 struct tu_cs *ts_cs = cs; 1557 1558 unsigned ts_offset = idx * sizeof(uint64_t); 1559 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4); 1560 tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP); 1561 tu_cs_emit_qw(ts_cs, bo->iova + ts_offset); 1562 tu_cs_emit(ts_cs, 0x00000000); 1563} 1564 1565static uint64_t 1566tu_trace_read_ts(struct u_trace_context *utctx, 1567 void *timestamps, unsigned idx, void *flush_data) 1568{ 1569 struct tu_device *device = 1570 container_of(utctx, struct tu_device, trace_context); 1571 struct tu_bo *bo = timestamps; 1572 struct tu_u_trace_submission_data *submission_data = flush_data; 1573 1574 /* Only need to stall on results for the first entry: */ 1575 if (idx == 0) { 1576 tu_device_wait_u_trace(device, submission_data->syncobj); 1577 } 1578 1579 if (tu_bo_map(device, bo) != VK_SUCCESS) { 1580 return U_TRACE_NO_TIMESTAMP; 1581 } 1582 1583 uint64_t *ts = bo->map; 1584 1585 /* Don't translate the no-timestamp marker: */ 1586 if (ts[idx] == U_TRACE_NO_TIMESTAMP) 1587 return U_TRACE_NO_TIMESTAMP; 1588 1589 return tu_device_ticks_to_ns(device, ts[idx]); 1590} 1591 1592static void 1593tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data) 1594{ 1595 struct tu_device *device = 1596 container_of(utctx, struct tu_device, trace_context); 1597 struct tu_u_trace_submission_data *submission_data = flush_data; 1598 1599 tu_u_trace_submission_data_finish(device, submission_data); 1600} 1601 1602void 1603tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream, 1604 void *ts_from, uint32_t from_offset, 1605 void *ts_to, uint32_t to_offset, 1606 uint32_t count) 1607{ 1608 struct tu_cs *cs = cmdstream; 1609 struct tu_bo *bo_from = ts_from; 1610 struct tu_bo *bo_to = ts_to; 1611 1612 tu_cs_emit_pkt7(cs, CP_MEMCPY, 5); 1613 tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t)); 1614 tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t)); 1615 tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t)); 1616} 1617 1618/* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator() 1619 * that ignore tracepoints at the beginning/end that are part of a 1620 * suspend/resume chain. 1621 */ 1622static struct u_trace_iterator 1623tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf) 1624{ 1625 switch (cmdbuf->state.suspend_resume) { 1626 case SR_IN_PRE_CHAIN: 1627 return cmdbuf->trace_renderpass_end; 1628 case SR_AFTER_PRE_CHAIN: 1629 case SR_IN_CHAIN_AFTER_PRE_CHAIN: 1630 return cmdbuf->pre_chain.trace_renderpass_end; 1631 default: 1632 return u_trace_begin_iterator(&cmdbuf->trace); 1633 } 1634} 1635 1636static struct u_trace_iterator 1637tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf) 1638{ 1639 switch (cmdbuf->state.suspend_resume) { 1640 case SR_IN_PRE_CHAIN: 1641 return cmdbuf->trace_renderpass_end; 1642 case SR_IN_CHAIN: 1643 case SR_IN_CHAIN_AFTER_PRE_CHAIN: 1644 return cmdbuf->trace_renderpass_start; 1645 default: 1646 return u_trace_end_iterator(&cmdbuf->trace); 1647 } 1648} 1649VkResult 1650tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs, 1651 struct u_trace **trace_copy) 1652{ 1653 *cs = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct tu_cs), 8, 1654 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1655 1656 if (*cs == NULL) { 1657 return VK_ERROR_OUT_OF_HOST_MEMORY; 1658 } 1659 1660 tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW, 1661 list_length(&cmdbuf->trace.trace_chunks) * 6 + 3); 1662 1663 tu_cs_begin(*cs); 1664 1665 tu_cs_emit_wfi(*cs); 1666 tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0); 1667 1668 *trace_copy = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8, 1669 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1670 1671 if (*trace_copy == NULL) { 1672 return VK_ERROR_OUT_OF_HOST_MEMORY; 1673 } 1674 1675 u_trace_init(*trace_copy, cmdbuf->trace.utctx); 1676 u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf), 1677 tu_cmd_end_iterator(cmdbuf), 1678 *trace_copy, *cs, 1679 tu_copy_timestamp_buffer); 1680 1681 tu_cs_emit_wfi(*cs); 1682 1683 tu_cs_end(*cs); 1684 1685 return VK_SUCCESS; 1686} 1687 1688VkResult 1689tu_u_trace_submission_data_create( 1690 struct tu_device *device, 1691 struct tu_cmd_buffer **cmd_buffers, 1692 uint32_t cmd_buffer_count, 1693 struct tu_u_trace_submission_data **submission_data) 1694{ 1695 *submission_data = 1696 vk_zalloc(&device->vk.alloc, 1697 sizeof(struct tu_u_trace_submission_data), 8, 1698 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1699 1700 if (!(*submission_data)) { 1701 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1702 } 1703 1704 struct tu_u_trace_submission_data *data = *submission_data; 1705 1706 data->cmd_trace_data = 1707 vk_zalloc(&device->vk.alloc, 1708 cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8, 1709 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1710 1711 if (!data->cmd_trace_data) { 1712 goto fail; 1713 } 1714 1715 data->cmd_buffer_count = cmd_buffer_count; 1716 data->last_buffer_with_tracepoints = -1; 1717 1718 for (uint32_t i = 0; i < cmd_buffer_count; ++i) { 1719 struct tu_cmd_buffer *cmdbuf = cmd_buffers[i]; 1720 1721 if (!u_trace_has_points(&cmdbuf->trace)) 1722 continue; 1723 1724 data->last_buffer_with_tracepoints = i; 1725 1726 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) { 1727 /* A single command buffer could be submitted several times, but we 1728 * already baked timestamp iova addresses and trace points are 1729 * single-use. Therefor we have to copy trace points and create 1730 * a new timestamp buffer on every submit of reusable command buffer. 1731 */ 1732 if (tu_create_copy_timestamp_cs(cmdbuf, 1733 &data->cmd_trace_data[i].timestamp_copy_cs, 1734 &data->cmd_trace_data[i].trace) != VK_SUCCESS) { 1735 goto fail; 1736 } 1737 1738 assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1); 1739 } else { 1740 data->cmd_trace_data[i].trace = &cmdbuf->trace; 1741 } 1742 } 1743 1744 assert(data->last_buffer_with_tracepoints != -1); 1745 1746 return VK_SUCCESS; 1747 1748fail: 1749 tu_u_trace_submission_data_finish(device, data); 1750 *submission_data = NULL; 1751 1752 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 1753} 1754 1755void 1756tu_u_trace_submission_data_finish( 1757 struct tu_device *device, 1758 struct tu_u_trace_submission_data *submission_data) 1759{ 1760 for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) { 1761 /* Only if we had to create a copy of trace we should free it */ 1762 struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i]; 1763 if (cmd_data->timestamp_copy_cs) { 1764 tu_cs_finish(cmd_data->timestamp_copy_cs); 1765 vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs); 1766 1767 u_trace_fini(cmd_data->trace); 1768 vk_free(&device->vk.alloc, cmd_data->trace); 1769 } 1770 } 1771 1772 vk_free(&device->vk.alloc, submission_data->cmd_trace_data); 1773 vk_free(&device->vk.alloc, submission_data->syncobj); 1774 vk_free(&device->vk.alloc, submission_data); 1775} 1776 1777VKAPI_ATTR VkResult VKAPI_CALL 1778tu_CreateDevice(VkPhysicalDevice physicalDevice, 1779 const VkDeviceCreateInfo *pCreateInfo, 1780 const VkAllocationCallbacks *pAllocator, 1781 VkDevice *pDevice) 1782{ 1783 TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); 1784 VkResult result; 1785 struct tu_device *device; 1786 bool custom_border_colors = false; 1787 bool perf_query_pools = false; 1788 bool robust_buffer_access2 = false; 1789 bool border_color_without_format = false; 1790 1791 vk_foreach_struct_const(ext, pCreateInfo->pNext) { 1792 switch (ext->sType) { 1793 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { 1794 const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext; 1795 custom_border_colors = border_color_features->customBorderColors; 1796 border_color_without_format = 1797 border_color_features->customBorderColorWithoutFormat; 1798 break; 1799 } 1800 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: { 1801 const VkPhysicalDevicePerformanceQueryFeaturesKHR *feature = 1802 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext; 1803 perf_query_pools = feature->performanceCounterQueryPools; 1804 break; 1805 } 1806 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: { 1807 VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext; 1808 robust_buffer_access2 = features->robustBufferAccess2; 1809 break; 1810 } 1811 default: 1812 break; 1813 } 1814 } 1815 1816 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, 1817 sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1818 if (!device) 1819 return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM"); 1820 1821 struct vk_device_dispatch_table dispatch_table; 1822 vk_device_dispatch_table_from_entrypoints( 1823 &dispatch_table, &tu_device_entrypoints, true); 1824 vk_device_dispatch_table_from_entrypoints( 1825 &dispatch_table, &wsi_device_entrypoints, false); 1826 1827 result = vk_device_init(&device->vk, &physical_device->vk, 1828 &dispatch_table, pCreateInfo, pAllocator); 1829 if (result != VK_SUCCESS) { 1830 vk_free(&device->vk.alloc, device); 1831 return vk_startup_errorf(physical_device->instance, result, 1832 "vk_device_init failed"); 1833 } 1834 1835 device->instance = physical_device->instance; 1836 device->physical_device = physical_device; 1837 device->fd = physical_device->local_fd; 1838 device->vk.check_status = tu_device_check_status; 1839 1840 mtx_init(&device->bo_mutex, mtx_plain); 1841 mtx_init(&device->pipeline_mutex, mtx_plain); 1842 mtx_init(&device->autotune_mutex, mtx_plain); 1843 u_rwlock_init(&device->dma_bo_lock); 1844 pthread_mutex_init(&device->submit_mutex, NULL); 1845 1846#ifndef TU_USE_KGSL 1847 vk_device_set_drm_fd(&device->vk, device->fd); 1848#endif 1849 1850 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 1851 const VkDeviceQueueCreateInfo *queue_create = 1852 &pCreateInfo->pQueueCreateInfos[i]; 1853 uint32_t qfi = queue_create->queueFamilyIndex; 1854 device->queues[qfi] = vk_alloc( 1855 &device->vk.alloc, queue_create->queueCount * sizeof(struct tu_queue), 1856 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 1857 if (!device->queues[qfi]) { 1858 result = vk_startup_errorf(physical_device->instance, 1859 VK_ERROR_OUT_OF_HOST_MEMORY, 1860 "OOM"); 1861 goto fail_queues; 1862 } 1863 1864 memset(device->queues[qfi], 0, 1865 queue_create->queueCount * sizeof(struct tu_queue)); 1866 1867 device->queue_count[qfi] = queue_create->queueCount; 1868 1869 for (unsigned q = 0; q < queue_create->queueCount; q++) { 1870 result = tu_queue_init(device, &device->queues[qfi][q], q, 1871 queue_create); 1872 if (result != VK_SUCCESS) 1873 goto fail_queues; 1874 } 1875 } 1876 1877 device->compiler = 1878 ir3_compiler_create(NULL, &physical_device->dev_id, 1879 &(struct ir3_compiler_options) { 1880 .robust_buffer_access2 = robust_buffer_access2, 1881 .push_ubo_with_preamble = true, 1882 .disable_cache = true, 1883 }); 1884 if (!device->compiler) { 1885 result = vk_startup_errorf(physical_device->instance, 1886 VK_ERROR_INITIALIZATION_FAILED, 1887 "failed to initialize ir3 compiler"); 1888 goto fail_queues; 1889 } 1890 1891 /* Initialize sparse array for refcounting imported BOs */ 1892 util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512); 1893 1894 /* initial sizes, these will increase if there is overflow */ 1895 device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD; 1896 device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD; 1897 1898 uint32_t global_size = sizeof(struct tu6_global); 1899 if (custom_border_colors) 1900 global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry); 1901 1902 tu_bo_suballocator_init(&device->pipeline_suballoc, device, 1903 128 * 1024, TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP); 1904 tu_bo_suballocator_init(&device->autotune_suballoc, device, 1905 128 * 1024, 0); 1906 1907 result = tu_bo_init_new(device, &device->global_bo, global_size, 1908 TU_BO_ALLOC_ALLOW_DUMP); 1909 if (result != VK_SUCCESS) { 1910 vk_startup_errorf(device->instance, result, "BO init"); 1911 goto fail_global_bo; 1912 } 1913 1914 result = tu_bo_map(device, device->global_bo); 1915 if (result != VK_SUCCESS) { 1916 vk_startup_errorf(device->instance, result, "BO map"); 1917 goto fail_global_bo_map; 1918 } 1919 1920 struct tu6_global *global = device->global_bo->map; 1921 tu_init_clear_blit_shaders(device); 1922 global->predicate = 0; 1923 global->vtx_stats_query_not_running = 1; 1924 global->dbg_one = (uint32_t)-1; 1925 global->dbg_gmem_total_loads = 0; 1926 global->dbg_gmem_taken_loads = 0; 1927 global->dbg_gmem_total_stores = 0; 1928 global->dbg_gmem_taken_stores = 0; 1929 for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) { 1930 VkClearColorValue border_color = vk_border_color_value(i); 1931 tu6_pack_border_color(&global->bcolor_builtin[i], &border_color, 1932 vk_border_color_is_int(i)); 1933 } 1934 1935 /* initialize to ones so ffs can be used to find unused slots */ 1936 BITSET_ONES(device->custom_border_color); 1937 1938 result = tu_init_dynamic_rendering(device); 1939 if (result != VK_SUCCESS) { 1940 vk_startup_errorf(device->instance, result, "dynamic rendering"); 1941 goto fail_dynamic_rendering; 1942 } 1943 1944 struct vk_pipeline_cache_create_info pcc_info = { }; 1945 device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info, 1946 false); 1947 if (!device->mem_cache) { 1948 result = VK_ERROR_OUT_OF_HOST_MEMORY; 1949 vk_startup_errorf(device->instance, result, "create pipeline cache failed"); 1950 goto fail_pipeline_cache; 1951 } 1952 1953 if (perf_query_pools) { 1954 /* Prepare command streams setting pass index to the PERF_CNTRS_REG 1955 * from 0 to 31. One of these will be picked up at cmd submit time 1956 * when the perf query is executed. 1957 */ 1958 struct tu_cs *cs; 1959 1960 if (!(device->perfcntrs_pass_cs = calloc(1, sizeof(struct tu_cs)))) { 1961 result = vk_startup_errorf(device->instance, 1962 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM"); 1963 goto fail_perfcntrs_pass_alloc; 1964 } 1965 1966 device->perfcntrs_pass_cs_entries = calloc(32, sizeof(struct tu_cs_entry)); 1967 if (!device->perfcntrs_pass_cs_entries) { 1968 result = vk_startup_errorf(device->instance, 1969 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM"); 1970 goto fail_perfcntrs_pass_entries_alloc; 1971 } 1972 1973 cs = device->perfcntrs_pass_cs; 1974 tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96); 1975 1976 for (unsigned i = 0; i < 32; i++) { 1977 struct tu_cs sub_cs; 1978 1979 result = tu_cs_begin_sub_stream(cs, 3, &sub_cs); 1980 if (result != VK_SUCCESS) { 1981 vk_startup_errorf(device->instance, result, 1982 "failed to allocate commands streams"); 1983 goto fail_prepare_perfcntrs_pass_cs; 1984 } 1985 1986 tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i)); 1987 tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0); 1988 1989 device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs); 1990 } 1991 } 1992 1993 /* Initialize a condition variable for timeline semaphore */ 1994 pthread_condattr_t condattr; 1995 if (pthread_condattr_init(&condattr) != 0) { 1996 result = vk_startup_errorf(physical_device->instance, 1997 VK_ERROR_INITIALIZATION_FAILED, 1998 "pthread condattr init"); 1999 goto fail_timeline_cond; 2000 } 2001 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) { 2002 pthread_condattr_destroy(&condattr); 2003 result = vk_startup_errorf(physical_device->instance, 2004 VK_ERROR_INITIALIZATION_FAILED, 2005 "pthread condattr clock setup"); 2006 goto fail_timeline_cond; 2007 } 2008 if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) { 2009 pthread_condattr_destroy(&condattr); 2010 result = vk_startup_errorf(physical_device->instance, 2011 VK_ERROR_INITIALIZATION_FAILED, 2012 "pthread cond init"); 2013 goto fail_timeline_cond; 2014 } 2015 pthread_condattr_destroy(&condattr); 2016 2017 result = tu_autotune_init(&device->autotune, device); 2018 if (result != VK_SUCCESS) { 2019 goto fail_timeline_cond; 2020 } 2021 2022 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) 2023 mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain); 2024 2025 mtx_init(&device->mutex, mtx_plain); 2026 2027 device->use_z24uint_s8uint = 2028 physical_device->info->a6xx.has_z24uint_s8uint && 2029 !border_color_without_format; 2030 2031 tu_gpu_tracepoint_config_variable(); 2032 2033 device->submit_count = 0; 2034 u_trace_context_init(&device->trace_context, device, 2035 tu_trace_create_ts_buffer, 2036 tu_trace_destroy_ts_buffer, 2037 tu_trace_record_ts, 2038 tu_trace_read_ts, 2039 tu_trace_delete_flush_data); 2040 2041 tu_breadcrumbs_init(device); 2042 2043 *pDevice = tu_device_to_handle(device); 2044 return VK_SUCCESS; 2045 2046fail_timeline_cond: 2047fail_prepare_perfcntrs_pass_cs: 2048 free(device->perfcntrs_pass_cs_entries); 2049 tu_cs_finish(device->perfcntrs_pass_cs); 2050fail_perfcntrs_pass_entries_alloc: 2051 free(device->perfcntrs_pass_cs); 2052fail_perfcntrs_pass_alloc: 2053 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc); 2054fail_pipeline_cache: 2055 tu_destroy_dynamic_rendering(device); 2056fail_dynamic_rendering: 2057 tu_destroy_clear_blit_shaders(device); 2058fail_global_bo_map: 2059 tu_bo_finish(device, device->global_bo); 2060 vk_free(&device->vk.alloc, device->bo_list); 2061fail_global_bo: 2062 ir3_compiler_destroy(device->compiler); 2063 util_sparse_array_finish(&device->bo_map); 2064 2065fail_queues: 2066 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { 2067 for (unsigned q = 0; q < device->queue_count[i]; q++) 2068 tu_queue_finish(&device->queues[i][q]); 2069 if (device->queue_count[i]) 2070 vk_free(&device->vk.alloc, device->queues[i]); 2071 } 2072 2073 u_rwlock_destroy(&device->dma_bo_lock); 2074 vk_device_finish(&device->vk); 2075 vk_free(&device->vk.alloc, device); 2076 return result; 2077} 2078 2079VKAPI_ATTR void VKAPI_CALL 2080tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) 2081{ 2082 TU_FROM_HANDLE(tu_device, device, _device); 2083 2084 if (!device) 2085 return; 2086 2087 tu_breadcrumbs_finish(device); 2088 2089 u_trace_context_fini(&device->trace_context); 2090 2091 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { 2092 for (unsigned q = 0; q < device->queue_count[i]; q++) 2093 tu_queue_finish(&device->queues[i][q]); 2094 if (device->queue_count[i]) 2095 vk_free(&device->vk.alloc, device->queues[i]); 2096 } 2097 2098 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) { 2099 if (device->scratch_bos[i].initialized) 2100 tu_bo_finish(device, device->scratch_bos[i].bo); 2101 } 2102 2103 tu_destroy_clear_blit_shaders(device); 2104 2105 tu_destroy_dynamic_rendering(device); 2106 2107 ir3_compiler_destroy(device->compiler); 2108 2109 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc); 2110 2111 if (device->perfcntrs_pass_cs) { 2112 free(device->perfcntrs_pass_cs_entries); 2113 tu_cs_finish(device->perfcntrs_pass_cs); 2114 free(device->perfcntrs_pass_cs); 2115 } 2116 2117 tu_autotune_fini(&device->autotune, device); 2118 2119 tu_bo_suballocator_finish(&device->pipeline_suballoc); 2120 tu_bo_suballocator_finish(&device->autotune_suballoc); 2121 2122 util_sparse_array_finish(&device->bo_map); 2123 u_rwlock_destroy(&device->dma_bo_lock); 2124 2125 pthread_cond_destroy(&device->timeline_cond); 2126 vk_free(&device->vk.alloc, device->bo_list); 2127 vk_device_finish(&device->vk); 2128 vk_free(&device->vk.alloc, device); 2129} 2130 2131VkResult 2132tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo) 2133{ 2134 unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2); 2135 unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2; 2136 assert(index < ARRAY_SIZE(dev->scratch_bos)); 2137 2138 for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) { 2139 if (p_atomic_read(&dev->scratch_bos[i].initialized)) { 2140 /* Fast path: just return the already-allocated BO. */ 2141 *bo = dev->scratch_bos[i].bo; 2142 return VK_SUCCESS; 2143 } 2144 } 2145 2146 /* Slow path: actually allocate the BO. We take a lock because the process 2147 * of allocating it is slow, and we don't want to block the CPU while it 2148 * finishes. 2149 */ 2150 mtx_lock(&dev->scratch_bos[index].construct_mtx); 2151 2152 /* Another thread may have allocated it already while we were waiting on 2153 * the lock. We need to check this in order to avoid double-allocating. 2154 */ 2155 if (dev->scratch_bos[index].initialized) { 2156 mtx_unlock(&dev->scratch_bos[index].construct_mtx); 2157 *bo = dev->scratch_bos[index].bo; 2158 return VK_SUCCESS; 2159 } 2160 2161 unsigned bo_size = 1ull << size_log2; 2162 VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size, 2163 TU_BO_ALLOC_NO_FLAGS); 2164 if (result != VK_SUCCESS) { 2165 mtx_unlock(&dev->scratch_bos[index].construct_mtx); 2166 return result; 2167 } 2168 2169 p_atomic_set(&dev->scratch_bos[index].initialized, true); 2170 2171 mtx_unlock(&dev->scratch_bos[index].construct_mtx); 2172 2173 *bo = dev->scratch_bos[index].bo; 2174 return VK_SUCCESS; 2175} 2176 2177VKAPI_ATTR VkResult VKAPI_CALL 2178tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, 2179 VkLayerProperties *pProperties) 2180{ 2181 *pPropertyCount = 0; 2182 return VK_SUCCESS; 2183} 2184 2185/* Only used for kgsl since drm started using common implementation */ 2186#ifdef TU_USE_KGSL 2187VKAPI_ATTR VkResult VKAPI_CALL 2188tu_QueueWaitIdle(VkQueue _queue) 2189{ 2190 TU_FROM_HANDLE(tu_queue, queue, _queue); 2191 2192 if (vk_device_is_lost(&queue->device->vk)) 2193 return VK_ERROR_DEVICE_LOST; 2194 2195 if (queue->fence < 0) 2196 return VK_SUCCESS; 2197 2198 struct pollfd fds = { .fd = queue->fence, .events = POLLIN }; 2199 int ret; 2200 do { 2201 ret = poll(&fds, 1, -1); 2202 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2203 2204 /* TODO: otherwise set device lost ? */ 2205 assert(ret == 1 && !(fds.revents & (POLLERR | POLLNVAL))); 2206 2207 close(queue->fence); 2208 queue->fence = -1; 2209 return VK_SUCCESS; 2210} 2211#endif 2212 2213VKAPI_ATTR VkResult VKAPI_CALL 2214tu_EnumerateInstanceExtensionProperties(const char *pLayerName, 2215 uint32_t *pPropertyCount, 2216 VkExtensionProperties *pProperties) 2217{ 2218 if (pLayerName) 2219 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 2220 2221 return vk_enumerate_instance_extension_properties( 2222 &tu_instance_extensions_supported, pPropertyCount, pProperties); 2223} 2224 2225VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 2226tu_GetInstanceProcAddr(VkInstance _instance, const char *pName) 2227{ 2228 TU_FROM_HANDLE(tu_instance, instance, _instance); 2229 return vk_instance_get_proc_addr(&instance->vk, 2230 &tu_instance_entrypoints, 2231 pName); 2232} 2233 2234/* The loader wants us to expose a second GetInstanceProcAddr function 2235 * to work around certain LD_PRELOAD issues seen in apps. 2236 */ 2237PUBLIC 2238VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 2239vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName); 2240 2241PUBLIC 2242VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 2243vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) 2244{ 2245 return tu_GetInstanceProcAddr(instance, pName); 2246} 2247 2248/* With version 4+ of the loader interface the ICD should expose 2249 * vk_icdGetPhysicalDeviceProcAddr() 2250 */ 2251PUBLIC 2252VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 2253vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, 2254 const char* pName); 2255 2256PFN_vkVoidFunction 2257vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, 2258 const char* pName) 2259{ 2260 TU_FROM_HANDLE(tu_instance, instance, _instance); 2261 2262 return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); 2263} 2264 2265VKAPI_ATTR VkResult VKAPI_CALL 2266tu_AllocateMemory(VkDevice _device, 2267 const VkMemoryAllocateInfo *pAllocateInfo, 2268 const VkAllocationCallbacks *pAllocator, 2269 VkDeviceMemory *pMem) 2270{ 2271 TU_FROM_HANDLE(tu_device, device, _device); 2272 struct tu_device_memory *mem; 2273 VkResult result; 2274 2275 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); 2276 2277 if (pAllocateInfo->allocationSize == 0) { 2278 /* Apparently, this is allowed */ 2279 *pMem = VK_NULL_HANDLE; 2280 return VK_SUCCESS; 2281 } 2282 2283 struct tu_memory_heap *mem_heap = &device->physical_device->heap; 2284 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used); 2285 if (mem_heap_used > mem_heap->size) 2286 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2287 2288 mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem), 2289 VK_OBJECT_TYPE_DEVICE_MEMORY); 2290 if (mem == NULL) 2291 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2292 2293 const VkImportMemoryFdInfoKHR *fd_info = 2294 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); 2295 if (fd_info && !fd_info->handleType) 2296 fd_info = NULL; 2297 2298 if (fd_info) { 2299 assert(fd_info->handleType == 2300 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 2301 fd_info->handleType == 2302 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 2303 2304 /* 2305 * TODO Importing the same fd twice gives us the same handle without 2306 * reference counting. We need to maintain a per-instance handle-to-bo 2307 * table and add reference count to tu_bo. 2308 */ 2309 result = tu_bo_init_dmabuf(device, &mem->bo, 2310 pAllocateInfo->allocationSize, fd_info->fd); 2311 if (result == VK_SUCCESS) { 2312 /* take ownership and close the fd */ 2313 close(fd_info->fd); 2314 } 2315 } else { 2316 result = 2317 tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize, 2318 TU_BO_ALLOC_NO_FLAGS); 2319 } 2320 2321 2322 if (result == VK_SUCCESS) { 2323 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size); 2324 if (mem_heap_used > mem_heap->size) { 2325 p_atomic_add(&mem_heap->used, -mem->bo->size); 2326 tu_bo_finish(device, mem->bo); 2327 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY, 2328 "Out of heap memory"); 2329 } 2330 } 2331 2332 if (result != VK_SUCCESS) { 2333 vk_object_free(&device->vk, pAllocator, mem); 2334 return result; 2335 } 2336 2337 /* Track in the device whether our BO list contains any implicit-sync BOs, so 2338 * we can suppress implicit sync on non-WSI usage. 2339 */ 2340 const struct wsi_memory_allocate_info *wsi_info = 2341 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA); 2342 if (wsi_info && wsi_info->implicit_sync) { 2343 mtx_lock(&device->bo_mutex); 2344 if (!mem->bo->implicit_sync) { 2345 mem->bo->implicit_sync = true; 2346 device->implicit_sync_bo_count++; 2347 } 2348 mtx_unlock(&device->bo_mutex); 2349 } 2350 2351 *pMem = tu_device_memory_to_handle(mem); 2352 2353 return VK_SUCCESS; 2354} 2355 2356VKAPI_ATTR void VKAPI_CALL 2357tu_FreeMemory(VkDevice _device, 2358 VkDeviceMemory _mem, 2359 const VkAllocationCallbacks *pAllocator) 2360{ 2361 TU_FROM_HANDLE(tu_device, device, _device); 2362 TU_FROM_HANDLE(tu_device_memory, mem, _mem); 2363 2364 if (mem == NULL) 2365 return; 2366 2367 p_atomic_add(&device->physical_device->heap.used, -mem->bo->size); 2368 tu_bo_finish(device, mem->bo); 2369 vk_object_free(&device->vk, pAllocator, mem); 2370} 2371 2372VKAPI_ATTR VkResult VKAPI_CALL 2373tu_MapMemory(VkDevice _device, 2374 VkDeviceMemory _memory, 2375 VkDeviceSize offset, 2376 VkDeviceSize size, 2377 VkMemoryMapFlags flags, 2378 void **ppData) 2379{ 2380 TU_FROM_HANDLE(tu_device, device, _device); 2381 TU_FROM_HANDLE(tu_device_memory, mem, _memory); 2382 VkResult result; 2383 2384 if (mem == NULL) { 2385 *ppData = NULL; 2386 return VK_SUCCESS; 2387 } 2388 2389 if (!mem->bo->map) { 2390 result = tu_bo_map(device, mem->bo); 2391 if (result != VK_SUCCESS) 2392 return result; 2393 } 2394 2395 *ppData = mem->bo->map + offset; 2396 return VK_SUCCESS; 2397} 2398 2399VKAPI_ATTR void VKAPI_CALL 2400tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) 2401{ 2402 /* TODO: unmap here instead of waiting for FreeMemory */ 2403} 2404 2405VKAPI_ATTR VkResult VKAPI_CALL 2406tu_FlushMappedMemoryRanges(VkDevice _device, 2407 uint32_t memoryRangeCount, 2408 const VkMappedMemoryRange *pMemoryRanges) 2409{ 2410 return VK_SUCCESS; 2411} 2412 2413VKAPI_ATTR VkResult VKAPI_CALL 2414tu_InvalidateMappedMemoryRanges(VkDevice _device, 2415 uint32_t memoryRangeCount, 2416 const VkMappedMemoryRange *pMemoryRanges) 2417{ 2418 return VK_SUCCESS; 2419} 2420 2421static void 2422tu_get_buffer_memory_requirements(uint64_t size, 2423 VkMemoryRequirements2 *pMemoryRequirements) 2424{ 2425 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { 2426 .memoryTypeBits = 1, 2427 .alignment = 64, 2428 .size = MAX2(align64(size, 64), size), 2429 }; 2430 2431 vk_foreach_struct(ext, pMemoryRequirements->pNext) { 2432 switch (ext->sType) { 2433 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 2434 VkMemoryDedicatedRequirements *req = 2435 (VkMemoryDedicatedRequirements *) ext; 2436 req->requiresDedicatedAllocation = false; 2437 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; 2438 break; 2439 } 2440 default: 2441 break; 2442 } 2443 } 2444} 2445 2446VKAPI_ATTR void VKAPI_CALL 2447tu_GetBufferMemoryRequirements2( 2448 VkDevice device, 2449 const VkBufferMemoryRequirementsInfo2 *pInfo, 2450 VkMemoryRequirements2 *pMemoryRequirements) 2451{ 2452 TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer); 2453 2454 tu_get_buffer_memory_requirements(buffer->size, pMemoryRequirements); 2455} 2456 2457VKAPI_ATTR void VKAPI_CALL 2458tu_GetDeviceBufferMemoryRequirements( 2459 VkDevice device, 2460 const VkDeviceBufferMemoryRequirements *pInfo, 2461 VkMemoryRequirements2 *pMemoryRequirements) 2462{ 2463 tu_get_buffer_memory_requirements(pInfo->pCreateInfo->size, pMemoryRequirements); 2464} 2465 2466VKAPI_ATTR void VKAPI_CALL 2467tu_GetDeviceMemoryCommitment(VkDevice device, 2468 VkDeviceMemory memory, 2469 VkDeviceSize *pCommittedMemoryInBytes) 2470{ 2471 *pCommittedMemoryInBytes = 0; 2472} 2473 2474VKAPI_ATTR VkResult VKAPI_CALL 2475tu_BindBufferMemory2(VkDevice device, 2476 uint32_t bindInfoCount, 2477 const VkBindBufferMemoryInfo *pBindInfos) 2478{ 2479 for (uint32_t i = 0; i < bindInfoCount; ++i) { 2480 TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory); 2481 TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer); 2482 2483 if (mem) { 2484 buffer->bo = mem->bo; 2485 buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset; 2486 } else { 2487 buffer->bo = NULL; 2488 } 2489 } 2490 return VK_SUCCESS; 2491} 2492 2493VKAPI_ATTR VkResult VKAPI_CALL 2494tu_BindImageMemory2(VkDevice device, 2495 uint32_t bindInfoCount, 2496 const VkBindImageMemoryInfo *pBindInfos) 2497{ 2498 for (uint32_t i = 0; i < bindInfoCount; ++i) { 2499 TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image); 2500 TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory); 2501 2502 if (mem) { 2503 image->bo = mem->bo; 2504 image->iova = mem->bo->iova + pBindInfos[i].memoryOffset; 2505 } else { 2506 image->bo = NULL; 2507 image->iova = 0; 2508 } 2509 } 2510 2511 return VK_SUCCESS; 2512} 2513 2514VKAPI_ATTR VkResult VKAPI_CALL 2515tu_QueueBindSparse(VkQueue _queue, 2516 uint32_t bindInfoCount, 2517 const VkBindSparseInfo *pBindInfo, 2518 VkFence _fence) 2519{ 2520 return VK_SUCCESS; 2521} 2522 2523VKAPI_ATTR VkResult VKAPI_CALL 2524tu_CreateEvent(VkDevice _device, 2525 const VkEventCreateInfo *pCreateInfo, 2526 const VkAllocationCallbacks *pAllocator, 2527 VkEvent *pEvent) 2528{ 2529 TU_FROM_HANDLE(tu_device, device, _device); 2530 2531 struct tu_event *event = 2532 vk_object_alloc(&device->vk, pAllocator, sizeof(*event), 2533 VK_OBJECT_TYPE_EVENT); 2534 if (!event) 2535 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2536 2537 VkResult result = tu_bo_init_new(device, &event->bo, 0x1000, 2538 TU_BO_ALLOC_NO_FLAGS); 2539 if (result != VK_SUCCESS) 2540 goto fail_alloc; 2541 2542 result = tu_bo_map(device, event->bo); 2543 if (result != VK_SUCCESS) 2544 goto fail_map; 2545 2546 *pEvent = tu_event_to_handle(event); 2547 2548 return VK_SUCCESS; 2549 2550fail_map: 2551 tu_bo_finish(device, event->bo); 2552fail_alloc: 2553 vk_object_free(&device->vk, pAllocator, event); 2554 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2555} 2556 2557VKAPI_ATTR void VKAPI_CALL 2558tu_DestroyEvent(VkDevice _device, 2559 VkEvent _event, 2560 const VkAllocationCallbacks *pAllocator) 2561{ 2562 TU_FROM_HANDLE(tu_device, device, _device); 2563 TU_FROM_HANDLE(tu_event, event, _event); 2564 2565 if (!event) 2566 return; 2567 2568 tu_bo_finish(device, event->bo); 2569 vk_object_free(&device->vk, pAllocator, event); 2570} 2571 2572VKAPI_ATTR VkResult VKAPI_CALL 2573tu_GetEventStatus(VkDevice _device, VkEvent _event) 2574{ 2575 TU_FROM_HANDLE(tu_event, event, _event); 2576 2577 if (*(uint64_t*) event->bo->map == 1) 2578 return VK_EVENT_SET; 2579 return VK_EVENT_RESET; 2580} 2581 2582VKAPI_ATTR VkResult VKAPI_CALL 2583tu_SetEvent(VkDevice _device, VkEvent _event) 2584{ 2585 TU_FROM_HANDLE(tu_event, event, _event); 2586 *(uint64_t*) event->bo->map = 1; 2587 2588 return VK_SUCCESS; 2589} 2590 2591VKAPI_ATTR VkResult VKAPI_CALL 2592tu_ResetEvent(VkDevice _device, VkEvent _event) 2593{ 2594 TU_FROM_HANDLE(tu_event, event, _event); 2595 *(uint64_t*) event->bo->map = 0; 2596 2597 return VK_SUCCESS; 2598} 2599 2600VKAPI_ATTR VkResult VKAPI_CALL 2601tu_CreateBuffer(VkDevice _device, 2602 const VkBufferCreateInfo *pCreateInfo, 2603 const VkAllocationCallbacks *pAllocator, 2604 VkBuffer *pBuffer) 2605{ 2606 TU_FROM_HANDLE(tu_device, device, _device); 2607 struct tu_buffer *buffer; 2608 2609 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); 2610 2611 buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer), 2612 VK_OBJECT_TYPE_BUFFER); 2613 if (buffer == NULL) 2614 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2615 2616 buffer->size = pCreateInfo->size; 2617 buffer->usage = pCreateInfo->usage; 2618 buffer->flags = pCreateInfo->flags; 2619 2620 *pBuffer = tu_buffer_to_handle(buffer); 2621 2622 return VK_SUCCESS; 2623} 2624 2625VKAPI_ATTR void VKAPI_CALL 2626tu_DestroyBuffer(VkDevice _device, 2627 VkBuffer _buffer, 2628 const VkAllocationCallbacks *pAllocator) 2629{ 2630 TU_FROM_HANDLE(tu_device, device, _device); 2631 TU_FROM_HANDLE(tu_buffer, buffer, _buffer); 2632 2633 if (!buffer) 2634 return; 2635 2636 vk_object_free(&device->vk, pAllocator, buffer); 2637} 2638 2639VKAPI_ATTR VkResult VKAPI_CALL 2640tu_CreateFramebuffer(VkDevice _device, 2641 const VkFramebufferCreateInfo *pCreateInfo, 2642 const VkAllocationCallbacks *pAllocator, 2643 VkFramebuffer *pFramebuffer) 2644{ 2645 TU_FROM_HANDLE(tu_device, device, _device); 2646 2647 if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) 2648 return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator, 2649 pFramebuffer); 2650 2651 TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass); 2652 struct tu_framebuffer *framebuffer; 2653 2654 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); 2655 2656 bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT; 2657 2658 size_t size = sizeof(*framebuffer); 2659 if (!imageless) 2660 size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount; 2661 framebuffer = vk_object_alloc(&device->vk, pAllocator, size, 2662 VK_OBJECT_TYPE_FRAMEBUFFER); 2663 if (framebuffer == NULL) 2664 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2665 2666 framebuffer->attachment_count = pCreateInfo->attachmentCount; 2667 framebuffer->width = pCreateInfo->width; 2668 framebuffer->height = pCreateInfo->height; 2669 framebuffer->layers = pCreateInfo->layers; 2670 2671 if (!imageless) { 2672 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 2673 VkImageView _iview = pCreateInfo->pAttachments[i]; 2674 struct tu_image_view *iview = tu_image_view_from_handle(_iview); 2675 framebuffer->attachments[i].attachment = iview; 2676 } 2677 } 2678 2679 tu_framebuffer_tiling_config(framebuffer, device, pass); 2680 2681 *pFramebuffer = tu_framebuffer_to_handle(framebuffer); 2682 return VK_SUCCESS; 2683} 2684 2685void 2686tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer, 2687 const VkRenderingInfo *pRenderingInfo) 2688{ 2689 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass; 2690 struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer; 2691 2692 framebuffer->attachment_count = pass->attachment_count; 2693 framebuffer->width = pRenderingInfo->renderArea.offset.x + 2694 pRenderingInfo->renderArea.extent.width; 2695 framebuffer->height = pRenderingInfo->renderArea.offset.y + 2696 pRenderingInfo->renderArea.extent.height; 2697 framebuffer->layers = pRenderingInfo->layerCount; 2698 2699 tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass); 2700} 2701 2702VKAPI_ATTR void VKAPI_CALL 2703tu_DestroyFramebuffer(VkDevice _device, 2704 VkFramebuffer _fb, 2705 const VkAllocationCallbacks *pAllocator) 2706{ 2707 TU_FROM_HANDLE(tu_device, device, _device); 2708 2709 if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) { 2710 vk_common_DestroyFramebuffer(_device, _fb, pAllocator); 2711 return; 2712 } 2713 2714 TU_FROM_HANDLE(tu_framebuffer, fb, _fb); 2715 2716 if (!fb) 2717 return; 2718 2719 vk_object_free(&device->vk, pAllocator, fb); 2720} 2721 2722static void 2723tu_init_sampler(struct tu_device *device, 2724 struct tu_sampler *sampler, 2725 const VkSamplerCreateInfo *pCreateInfo) 2726{ 2727 const struct VkSamplerReductionModeCreateInfo *reduction = 2728 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO); 2729 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion = 2730 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO); 2731 const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color = 2732 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); 2733 /* for non-custom border colors, the VK enum is translated directly to an offset in 2734 * the border color buffer. custom border colors are located immediately after the 2735 * builtin colors, and thus an offset of TU_BORDER_COLOR_BUILTIN is added. 2736 */ 2737 uint32_t border_color = (unsigned) pCreateInfo->borderColor; 2738 if (pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || 2739 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) { 2740 mtx_lock(&device->mutex); 2741 border_color = BITSET_FFS(device->custom_border_color) - 1; 2742 assert(border_color < TU_BORDER_COLOR_COUNT); 2743 BITSET_CLEAR(device->custom_border_color, border_color); 2744 mtx_unlock(&device->mutex); 2745 2746 VkClearColorValue color = custom_border_color->customBorderColor; 2747 if (custom_border_color->format == VK_FORMAT_D24_UNORM_S8_UINT && 2748 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT && 2749 device->use_z24uint_s8uint) { 2750 /* When sampling stencil using the special Z24UINT_S8UINT format, the 2751 * border color is in the second component. Note: if 2752 * customBorderColorWithoutFormat is enabled, we may miss doing this 2753 * here if the format isn't specified, which is why we don't use that 2754 * format. 2755 */ 2756 color.uint32[1] = color.uint32[0]; 2757 } 2758 2759 tu6_pack_border_color(device->global_bo->map + gb_offset(bcolor[border_color]), 2760 &color, 2761 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT); 2762 border_color += TU_BORDER_COLOR_BUILTIN; 2763 } 2764 2765 unsigned aniso = pCreateInfo->anisotropyEnable ? 2766 util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0; 2767 bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR); 2768 float min_lod = CLAMP(pCreateInfo->minLod, 0.0f, 4095.0f / 256.0f); 2769 float max_lod = CLAMP(pCreateInfo->maxLod, 0.0f, 4095.0f / 256.0f); 2770 2771 sampler->descriptor[0] = 2772 COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | 2773 A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) | 2774 A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) | 2775 A6XX_TEX_SAMP_0_ANISO(aniso) | 2776 A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) | 2777 A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) | 2778 A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) | 2779 A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias); 2780 sampler->descriptor[1] = 2781 /* COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | */ 2782 COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) | 2783 A6XX_TEX_SAMP_1_MIN_LOD(min_lod) | 2784 A6XX_TEX_SAMP_1_MAX_LOD(max_lod) | 2785 COND(pCreateInfo->compareEnable, 2786 A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp))); 2787 sampler->descriptor[2] = A6XX_TEX_SAMP_2_BCOLOR(border_color); 2788 sampler->descriptor[3] = 0; 2789 2790 if (reduction) { 2791 sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE( 2792 tu6_reduction_mode(reduction->reductionMode)); 2793 } 2794 2795 sampler->ycbcr_sampler = ycbcr_conversion ? 2796 tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL; 2797 2798 if (sampler->ycbcr_sampler && 2799 sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) { 2800 sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR; 2801 } 2802 2803 /* TODO: 2804 * A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter? 2805 */ 2806} 2807 2808VKAPI_ATTR VkResult VKAPI_CALL 2809tu_CreateSampler(VkDevice _device, 2810 const VkSamplerCreateInfo *pCreateInfo, 2811 const VkAllocationCallbacks *pAllocator, 2812 VkSampler *pSampler) 2813{ 2814 TU_FROM_HANDLE(tu_device, device, _device); 2815 struct tu_sampler *sampler; 2816 2817 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); 2818 2819 sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler), 2820 VK_OBJECT_TYPE_SAMPLER); 2821 if (!sampler) 2822 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 2823 2824 tu_init_sampler(device, sampler, pCreateInfo); 2825 *pSampler = tu_sampler_to_handle(sampler); 2826 2827 return VK_SUCCESS; 2828} 2829 2830VKAPI_ATTR void VKAPI_CALL 2831tu_DestroySampler(VkDevice _device, 2832 VkSampler _sampler, 2833 const VkAllocationCallbacks *pAllocator) 2834{ 2835 TU_FROM_HANDLE(tu_device, device, _device); 2836 TU_FROM_HANDLE(tu_sampler, sampler, _sampler); 2837 uint32_t border_color; 2838 2839 if (!sampler) 2840 return; 2841 2842 border_color = (sampler->descriptor[2] & A6XX_TEX_SAMP_2_BCOLOR__MASK) >> A6XX_TEX_SAMP_2_BCOLOR__SHIFT; 2843 if (border_color >= TU_BORDER_COLOR_BUILTIN) { 2844 border_color -= TU_BORDER_COLOR_BUILTIN; 2845 /* if the sampler had a custom border color, free it. TODO: no lock */ 2846 mtx_lock(&device->mutex); 2847 assert(!BITSET_TEST(device->custom_border_color, border_color)); 2848 BITSET_SET(device->custom_border_color, border_color); 2849 mtx_unlock(&device->mutex); 2850 } 2851 2852 vk_object_free(&device->vk, pAllocator, sampler); 2853} 2854 2855/* vk_icd.h does not declare this function, so we declare it here to 2856 * suppress Wmissing-prototypes. 2857 */ 2858PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 2859vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion); 2860 2861PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 2862vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) 2863{ 2864 /* For the full details on loader interface versioning, see 2865 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. 2866 * What follows is a condensed summary, to help you navigate the large and 2867 * confusing official doc. 2868 * 2869 * - Loader interface v0 is incompatible with later versions. We don't 2870 * support it. 2871 * 2872 * - In loader interface v1: 2873 * - The first ICD entrypoint called by the loader is 2874 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this 2875 * entrypoint. 2876 * - The ICD must statically expose no other Vulkan symbol unless it 2877 * is linked with -Bsymbolic. 2878 * - Each dispatchable Vulkan handle created by the ICD must be 2879 * a pointer to a struct whose first member is VK_LOADER_DATA. The 2880 * ICD must initialize VK_LOADER_DATA.loadMagic to 2881 * ICD_LOADER_MAGIC. 2882 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and 2883 * vkDestroySurfaceKHR(). The ICD must be capable of working with 2884 * such loader-managed surfaces. 2885 * 2886 * - Loader interface v2 differs from v1 in: 2887 * - The first ICD entrypoint called by the loader is 2888 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must 2889 * statically expose this entrypoint. 2890 * 2891 * - Loader interface v3 differs from v2 in: 2892 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), 2893 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, 2894 * because the loader no longer does so. 2895 * 2896 * - Loader interface v4 differs from v3 in: 2897 * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). 2898 * 2899 * - Loader interface v5 differs from v4 in: 2900 * - The ICD must support Vulkan API version 1.1 and must not return 2901 * VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a 2902 * Vulkan Loader with interface v4 or smaller is being used and the 2903 * application provides an API version that is greater than 1.0. 2904 */ 2905 *pSupportedVersion = MIN2(*pSupportedVersion, 5u); 2906 return VK_SUCCESS; 2907} 2908 2909VKAPI_ATTR VkResult VKAPI_CALL 2910tu_GetMemoryFdKHR(VkDevice _device, 2911 const VkMemoryGetFdInfoKHR *pGetFdInfo, 2912 int *pFd) 2913{ 2914 TU_FROM_HANDLE(tu_device, device, _device); 2915 TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory); 2916 2917 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); 2918 2919 /* At the moment, we support only the below handle types. */ 2920 assert(pGetFdInfo->handleType == 2921 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 2922 pGetFdInfo->handleType == 2923 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 2924 2925 int prime_fd = tu_bo_export_dmabuf(device, memory->bo); 2926 if (prime_fd < 0) 2927 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2928 2929 *pFd = prime_fd; 2930 return VK_SUCCESS; 2931} 2932 2933VKAPI_ATTR VkResult VKAPI_CALL 2934tu_GetMemoryFdPropertiesKHR(VkDevice _device, 2935 VkExternalMemoryHandleTypeFlagBits handleType, 2936 int fd, 2937 VkMemoryFdPropertiesKHR *pMemoryFdProperties) 2938{ 2939 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 2940 pMemoryFdProperties->memoryTypeBits = 1; 2941 return VK_SUCCESS; 2942} 2943 2944VKAPI_ATTR void VKAPI_CALL 2945tu_GetPhysicalDeviceExternalFenceProperties( 2946 VkPhysicalDevice physicalDevice, 2947 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, 2948 VkExternalFenceProperties *pExternalFenceProperties) 2949{ 2950 pExternalFenceProperties->exportFromImportedHandleTypes = 0; 2951 pExternalFenceProperties->compatibleHandleTypes = 0; 2952 pExternalFenceProperties->externalFenceFeatures = 0; 2953} 2954 2955VKAPI_ATTR void VKAPI_CALL 2956tu_GetDeviceGroupPeerMemoryFeatures( 2957 VkDevice device, 2958 uint32_t heapIndex, 2959 uint32_t localDeviceIndex, 2960 uint32_t remoteDeviceIndex, 2961 VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) 2962{ 2963 assert(localDeviceIndex == remoteDeviceIndex); 2964 2965 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | 2966 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | 2967 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | 2968 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; 2969} 2970 2971VKAPI_ATTR void VKAPI_CALL 2972tu_GetPhysicalDeviceMultisamplePropertiesEXT( 2973 VkPhysicalDevice physicalDevice, 2974 VkSampleCountFlagBits samples, 2975 VkMultisamplePropertiesEXT* pMultisampleProperties) 2976{ 2977 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); 2978 2979 if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations) 2980 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 }; 2981 else 2982 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 }; 2983} 2984 2985VkDeviceAddress 2986tu_GetBufferDeviceAddress(VkDevice _device, 2987 const VkBufferDeviceAddressInfo* pInfo) 2988{ 2989 TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer); 2990 2991 return buffer->iova; 2992} 2993 2994uint64_t tu_GetBufferOpaqueCaptureAddress( 2995 VkDevice device, 2996 const VkBufferDeviceAddressInfo* pInfo) 2997{ 2998 tu_stub(); 2999 return 0; 3000} 3001 3002uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress( 3003 VkDevice device, 3004 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo) 3005{ 3006 tu_stub(); 3007 return 0; 3008} 3009