1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#include <fcntl.h> 29#include <stdbool.h> 30#include <string.h> 31 32#ifdef __FreeBSD__ 33#include <sys/types.h> 34#endif 35#ifdef MAJOR_IN_MKDEV 36#include <sys/mkdev.h> 37#endif 38#ifdef MAJOR_IN_SYSMACROS 39#include <sys/sysmacros.h> 40#endif 41 42#ifdef __linux__ 43#include <sys/inotify.h> 44#endif 45 46#include "util/debug.h" 47#include "util/disk_cache.h" 48#include "radv_cs.h" 49#include "radv_debug.h" 50#include "radv_private.h" 51#include "radv_shader.h" 52#include "vk_util.h" 53#ifdef _WIN32 54typedef void *drmDevicePtr; 55#include <io.h> 56#else 57#include <amdgpu.h> 58#include <xf86drm.h> 59#include "drm-uapi/amdgpu_drm.h" 60#include "winsys/amdgpu/radv_amdgpu_winsys_public.h" 61#endif 62#include "util/build_id.h" 63#include "util/debug.h" 64#include "util/driconf.h" 65#include "util/mesa-sha1.h" 66#include "util/os_time.h" 67#include "util/timespec.h" 68#include "util/u_atomic.h" 69#include "winsys/null/radv_null_winsys_public.h" 70#include "git_sha1.h" 71#include "sid.h" 72#include "vk_format.h" 73#include "vk_sync.h" 74#include "vk_sync_dummy.h" 75#include "vulkan/vk_icd.h" 76 77#ifdef LLVM_AVAILABLE 78#include "ac_llvm_util.h" 79#endif 80 81/* The number of IBs per submit isn't infinite, it depends on the IP type 82 * (ie. some initial setup needed for a submit) and the number of IBs (4 DW). 83 * This limit is arbitrary but should be safe for now. Ideally, we should get 84 * this limit from the KMD. 85 */ 86#define RADV_MAX_IBS_PER_SUBMIT 192 87 88/* The "RAW" clocks on Linux are called "FAST" on FreeBSD */ 89#if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST) 90#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST 91#endif 92 93static VkResult radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission); 94 95uint64_t 96radv_get_current_time(void) 97{ 98 return os_time_get_nano(); 99} 100 101static void 102parse_hex(char *out, const char *in, unsigned length) 103{ 104 for (unsigned i = 0; i < length; ++i) 105 out[i] = 0; 106 107 for (unsigned i = 0; i < 2 * length; ++i) { 108 unsigned v = 109 in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10)); 110 out[i / 2] |= v << (4 * (1 - i % 2)); 111 } 112} 113 114static int 115radv_device_get_cache_uuid(struct radv_physical_device *pdevice, void *uuid) 116{ 117 enum radeon_family family = pdevice->rad_info.family; 118 struct mesa_sha1 ctx; 119 unsigned char sha1[20]; 120 unsigned ptr_size = sizeof(void *); 121 122 memset(uuid, 0, VK_UUID_SIZE); 123 _mesa_sha1_init(&ctx); 124 125#ifdef RADV_BUILD_ID_OVERRIDE 126 { 127 char data[strlen(RADV_BUILD_ID_OVERRIDE) / 2]; 128 parse_hex(data, RADV_BUILD_ID_OVERRIDE, ARRAY_SIZE(data)); 129 _mesa_sha1_update(&ctx, data, ARRAY_SIZE(data)); 130 } 131#else 132 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx)) 133 return -1; 134#endif 135 136#ifdef LLVM_AVAILABLE 137 if (pdevice->use_llvm && 138 !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx)) 139 return -1; 140#endif 141 142 _mesa_sha1_update(&ctx, &family, sizeof(family)); 143 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size)); 144 _mesa_sha1_final(&ctx, sha1); 145 146 memcpy(uuid, sha1, VK_UUID_SIZE); 147 return 0; 148} 149 150static void 151radv_get_driver_uuid(void *uuid) 152{ 153 ac_compute_driver_uuid(uuid, VK_UUID_SIZE); 154} 155 156static void 157radv_get_device_uuid(struct radeon_info *info, void *uuid) 158{ 159 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE); 160} 161 162static uint64_t 163radv_get_adjusted_vram_size(struct radv_physical_device *device) 164{ 165 int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size"); 166 if (ov >= 0) 167 return MIN2((uint64_t)device->rad_info.vram_size_kb * 1024, (uint64_t)ov << 20); 168 return (uint64_t)device->rad_info.vram_size_kb * 1024; 169} 170 171static uint64_t 172radv_get_visible_vram_size(struct radv_physical_device *device) 173{ 174 return MIN2(radv_get_adjusted_vram_size(device), (uint64_t)device->rad_info.vram_vis_size_kb * 1024); 175} 176 177static uint64_t 178radv_get_vram_size(struct radv_physical_device *device) 179{ 180 uint64_t total_size = radv_get_adjusted_vram_size(device); 181 return total_size - MIN2(total_size, (uint64_t)device->rad_info.vram_vis_size_kb * 1024); 182} 183 184enum radv_heap { 185 RADV_HEAP_VRAM = 1 << 0, 186 RADV_HEAP_GTT = 1 << 1, 187 RADV_HEAP_VRAM_VIS = 1 << 2, 188 RADV_HEAP_MAX = 1 << 3, 189}; 190 191static void 192radv_physical_device_init_mem_types(struct radv_physical_device *device) 193{ 194 uint64_t visible_vram_size = radv_get_visible_vram_size(device); 195 uint64_t vram_size = radv_get_vram_size(device); 196 uint64_t gtt_size = (uint64_t)device->rad_info.gart_size_kb * 1024; 197 int vram_index = -1, visible_vram_index = -1, gart_index = -1; 198 199 device->memory_properties.memoryHeapCount = 0; 200 device->heaps = 0; 201 202 if (!device->rad_info.has_dedicated_vram) { 203 /* On APUs, the carveout is usually too small for games that request a minimum VRAM size 204 * greater than it. To workaround this, we compute the total available memory size (GTT + 205 * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT. 206 */ 207 const uint64_t total_size = gtt_size + visible_vram_size; 208 visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size); 209 gtt_size = total_size - visible_vram_size; 210 vram_size = 0; 211 } 212 213 /* Only get a VRAM heap if it is significant, not if it is a 16 MiB 214 * remainder above visible VRAM. */ 215 if (vram_size > 0 && vram_size * 9 >= visible_vram_size) { 216 vram_index = device->memory_properties.memoryHeapCount++; 217 device->heaps |= RADV_HEAP_VRAM; 218 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){ 219 .size = vram_size, 220 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 221 }; 222 } 223 224 if (gtt_size > 0) { 225 gart_index = device->memory_properties.memoryHeapCount++; 226 device->heaps |= RADV_HEAP_GTT; 227 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){ 228 .size = gtt_size, 229 .flags = 0, 230 }; 231 } 232 233 if (visible_vram_size) { 234 visible_vram_index = device->memory_properties.memoryHeapCount++; 235 device->heaps |= RADV_HEAP_VRAM_VIS; 236 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){ 237 .size = visible_vram_size, 238 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 239 }; 240 } 241 242 unsigned type_count = 0; 243 244 if (vram_index >= 0 || visible_vram_index >= 0) { 245 device->memory_domains[type_count] = RADEON_DOMAIN_VRAM; 246 device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS; 247 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ 248 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 249 .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index, 250 }; 251 252 device->memory_domains[type_count] = RADEON_DOMAIN_VRAM; 253 device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT; 254 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ 255 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 256 .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index, 257 }; 258 } 259 260 if (gart_index >= 0) { 261 device->memory_domains[type_count] = RADEON_DOMAIN_GTT; 262 device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS; 263 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ 264 .propertyFlags = 265 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 266 .heapIndex = gart_index, 267 }; 268 } 269 if (visible_vram_index >= 0) { 270 device->memory_domains[type_count] = RADEON_DOMAIN_VRAM; 271 device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS; 272 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ 273 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 274 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 275 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 276 .heapIndex = visible_vram_index, 277 }; 278 } 279 280 if (gart_index >= 0) { 281 device->memory_domains[type_count] = RADEON_DOMAIN_GTT; 282 device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS; 283 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ 284 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 285 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 286 .heapIndex = gart_index, 287 }; 288 } 289 device->memory_properties.memoryTypeCount = type_count; 290 291 if (device->rad_info.has_l2_uncached) { 292 for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) { 293 VkMemoryType mem_type = device->memory_properties.memoryTypes[i]; 294 295 if (((mem_type.propertyFlags & 296 (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) || 297 mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && 298 !(device->memory_flags[i] & RADEON_FLAG_32BIT)) { 299 300 VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | 301 VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | 302 VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD; 303 304 device->memory_domains[type_count] = device->memory_domains[i]; 305 device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED; 306 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){ 307 .propertyFlags = property_flags, 308 .heapIndex = mem_type.heapIndex, 309 }; 310 } 311 } 312 device->memory_properties.memoryTypeCount = type_count; 313 } 314 315 for (unsigned i = 0; i < type_count; ++i) { 316 if (device->memory_flags[i] & RADEON_FLAG_32BIT) 317 device->memory_types_32bit |= BITFIELD_BIT(i); 318 } 319} 320 321static const char * 322radv_get_compiler_string(struct radv_physical_device *pdevice) 323{ 324 if (!pdevice->use_llvm) { 325 /* Some games like SotTR apply shader workarounds if the LLVM 326 * version is too old or if the LLVM version string is 327 * missing. This gives 2-5% performance with SotTR and ACO. 328 */ 329 if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) { 330 return " (LLVM 9.0.1)"; 331 } 332 333 return ""; 334 } 335 336#ifdef LLVM_AVAILABLE 337 return " (LLVM " MESA_LLVM_VERSION_STRING ")"; 338#else 339 unreachable("LLVM is not available"); 340#endif 341} 342 343int 344radv_get_int_debug_option(const char *name, int default_value) 345{ 346 const char *str; 347 int result; 348 349 str = getenv(name); 350 if (!str) { 351 result = default_value; 352 } else { 353 char *endptr; 354 355 result = strtol(str, &endptr, 0); 356 if (str == endptr) { 357 /* No digits founs. */ 358 result = default_value; 359 } 360 } 361 362 return result; 363} 364 365static bool 366radv_thread_trace_enabled() 367{ 368 return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 || 369 getenv("RADV_THREAD_TRACE_TRIGGER"); 370} 371 372static bool 373radv_spm_trace_enabled() 374{ 375 return radv_thread_trace_enabled() && 376 debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", false); 377} 378 379static bool 380radv_perf_query_supported(const struct radv_physical_device *pdev) 381{ 382 /* SQTT / SPM interfere with the register states for perf counters, and 383 * the code has only been tested on GFX10.3 */ 384 return pdev->rad_info.gfx_level == GFX10_3 && !radv_thread_trace_enabled(); 385} 386 387static bool 388radv_taskmesh_enabled(const struct radv_physical_device *pdevice) 389{ 390 return pdevice->use_ngg && !pdevice->use_llvm && pdevice->rad_info.gfx_level >= GFX10_3 && 391 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && 392 pdevice->rad_info.has_scheduled_fence_dependency; 393} 394 395#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || \ 396 defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR) 397#define RADV_USE_WSI_PLATFORM 398#endif 399 400#ifdef ANDROID 401#define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION) 402#else 403#define RADV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION) 404#endif 405 406VKAPI_ATTR VkResult VKAPI_CALL 407radv_EnumerateInstanceVersion(uint32_t *pApiVersion) 408{ 409 *pApiVersion = RADV_API_VERSION; 410 return VK_SUCCESS; 411} 412 413static const struct vk_instance_extension_table radv_instance_extensions_supported = { 414 .KHR_device_group_creation = true, 415 .KHR_external_fence_capabilities = true, 416 .KHR_external_memory_capabilities = true, 417 .KHR_external_semaphore_capabilities = true, 418 .KHR_get_physical_device_properties2 = true, 419 .EXT_debug_report = true, 420 .EXT_debug_utils = true, 421 422#ifdef RADV_USE_WSI_PLATFORM 423 .KHR_get_surface_capabilities2 = true, 424 .KHR_surface = true, 425 .KHR_surface_protected_capabilities = true, 426#endif 427#ifdef VK_USE_PLATFORM_WAYLAND_KHR 428 .KHR_wayland_surface = true, 429#endif 430#ifdef VK_USE_PLATFORM_XCB_KHR 431 .KHR_xcb_surface = true, 432#endif 433#ifdef VK_USE_PLATFORM_XLIB_KHR 434 .KHR_xlib_surface = true, 435#endif 436#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT 437 .EXT_acquire_xlib_display = true, 438#endif 439#ifdef VK_USE_PLATFORM_DISPLAY_KHR 440 .KHR_display = true, 441 .KHR_get_display_properties2 = true, 442 .EXT_direct_mode_display = true, 443 .EXT_display_surface_counter = true, 444 .EXT_acquire_drm_display = true, 445#endif 446}; 447 448static void 449radv_physical_device_get_supported_extensions(const struct radv_physical_device *device, 450 struct vk_device_extension_table *ext) 451{ 452 *ext = (struct vk_device_extension_table){ 453 .KHR_8bit_storage = true, 454 .KHR_16bit_storage = true, 455 .KHR_acceleration_structure = radv_enable_rt(device, false), 456 .KHR_bind_memory2 = true, 457 .KHR_buffer_device_address = true, 458 .KHR_copy_commands2 = true, 459 .KHR_create_renderpass2 = true, 460 .KHR_dedicated_allocation = true, 461 .KHR_deferred_host_operations = true, 462 .KHR_depth_stencil_resolve = true, 463 .KHR_descriptor_update_template = true, 464 .KHR_device_group = true, 465 .KHR_draw_indirect_count = true, 466 .KHR_driver_properties = true, 467 .KHR_dynamic_rendering = true, 468 .KHR_external_fence = true, 469 .KHR_external_fence_fd = true, 470 .KHR_external_memory = true, 471 .KHR_external_memory_fd = true, 472 .KHR_external_semaphore = true, 473 .KHR_external_semaphore_fd = true, 474 .KHR_format_feature_flags2 = true, 475 .KHR_fragment_shading_rate = device->rad_info.gfx_level >= GFX10_3, 476 .KHR_get_memory_requirements2 = true, 477 .KHR_image_format_list = true, 478 .KHR_imageless_framebuffer = true, 479#ifdef RADV_USE_WSI_PLATFORM 480 .KHR_incremental_present = true, 481#endif 482 .KHR_maintenance1 = true, 483 .KHR_maintenance2 = true, 484 .KHR_maintenance3 = true, 485 .KHR_maintenance4 = true, 486 .KHR_multiview = true, 487 .KHR_performance_query = radv_perf_query_supported(device), 488 .KHR_pipeline_executable_properties = true, 489 .KHR_pipeline_library = !device->use_llvm, 490 .KHR_push_descriptor = true, 491 .KHR_ray_query = radv_enable_rt(device, false), 492 .KHR_ray_tracing_maintenance1 = radv_enable_rt(device, false), 493 .KHR_ray_tracing_pipeline = radv_enable_rt(device, true), 494 .KHR_relaxed_block_layout = true, 495 .KHR_sampler_mirror_clamp_to_edge = true, 496 .KHR_sampler_ycbcr_conversion = true, 497 .KHR_separate_depth_stencil_layouts = true, 498 .KHR_shader_atomic_int64 = true, 499 .KHR_shader_clock = true, 500 .KHR_shader_draw_parameters = true, 501 .KHR_shader_float16_int8 = true, 502 .KHR_shader_float_controls = true, 503 .KHR_shader_integer_dot_product = true, 504 .KHR_shader_non_semantic_info = true, 505 .KHR_shader_subgroup_extended_types = true, 506 .KHR_shader_subgroup_uniform_control_flow = true, 507 .KHR_shader_terminate_invocation = true, 508 .KHR_spirv_1_4 = true, 509 .KHR_storage_buffer_storage_class = true, 510#ifdef RADV_USE_WSI_PLATFORM 511 .KHR_swapchain = true, 512 .KHR_swapchain_mutable_format = true, 513#endif 514 .KHR_synchronization2 = true, 515 .KHR_timeline_semaphore = true, 516 .KHR_uniform_buffer_standard_layout = true, 517 .KHR_variable_pointers = true, 518 .KHR_vulkan_memory_model = true, 519 .KHR_workgroup_memory_explicit_layout = true, 520 .KHR_zero_initialize_workgroup_memory = true, 521 .EXT_4444_formats = true, 522 .EXT_attachment_feedback_loop_layout = true, 523 .EXT_border_color_swizzle = device->rad_info.gfx_level >= GFX10, 524 .EXT_buffer_device_address = true, 525 .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS, 526 .EXT_color_write_enable = true, 527 .EXT_conditional_rendering = true, 528 .EXT_conservative_rasterization = device->rad_info.gfx_level >= GFX9, 529 .EXT_custom_border_color = true, 530 .EXT_debug_marker = radv_thread_trace_enabled(), 531 .EXT_depth_clip_control = true, 532 .EXT_depth_clip_enable = true, 533 .EXT_depth_range_unrestricted = true, 534 .EXT_descriptor_indexing = true, 535 .EXT_discard_rectangles = true, 536#ifdef VK_USE_PLATFORM_DISPLAY_KHR 537 .EXT_display_control = true, 538#endif 539 .EXT_extended_dynamic_state = true, 540 .EXT_extended_dynamic_state2 = true, 541 .EXT_external_memory_dma_buf = true, 542 .EXT_external_memory_host = device->rad_info.has_userptr, 543 .EXT_global_priority = true, 544 .EXT_global_priority_query = true, 545 .EXT_host_query_reset = true, 546 .EXT_image_2d_view_of_3d = true, 547 .EXT_image_drm_format_modifier = device->rad_info.gfx_level >= GFX9, 548 .EXT_image_robustness = true, 549 .EXT_image_view_min_lod = true, 550 .EXT_index_type_uint8 = device->rad_info.gfx_level >= GFX8, 551 .EXT_inline_uniform_block = true, 552 .EXT_line_rasterization = true, 553 .EXT_memory_budget = true, 554 .EXT_memory_priority = true, 555 .EXT_multi_draw = true, 556 .EXT_non_seamless_cube_map = true, 557 .EXT_pci_bus_info = true, 558#ifndef _WIN32 559 .EXT_physical_device_drm = true, 560#endif 561 .EXT_pipeline_creation_cache_control = true, 562 .EXT_pipeline_creation_feedback = true, 563 .EXT_post_depth_coverage = device->rad_info.gfx_level >= GFX10, 564 .EXT_primitive_topology_list_restart = true, 565 .EXT_primitives_generated_query = true, 566 .EXT_private_data = true, 567 .EXT_provoking_vertex = true, 568 .EXT_queue_family_foreign = true, 569 .EXT_robustness2 = true, 570 .EXT_sample_locations = device->rad_info.gfx_level < GFX10, 571 .EXT_sampler_filter_minmax = true, 572 .EXT_scalar_block_layout = device->rad_info.gfx_level >= GFX7, 573 .EXT_separate_stencil_usage = true, 574 .EXT_shader_atomic_float = true, 575#ifdef LLVM_AVAILABLE 576 .EXT_shader_atomic_float2 = !device->use_llvm || LLVM_VERSION_MAJOR >= 14, 577#else 578 .EXT_shader_atomic_float2 = true, 579#endif 580 .EXT_shader_demote_to_helper_invocation = true, 581 .EXT_shader_image_atomic_int64 = true, 582 .EXT_shader_module_identifier = true, 583 .EXT_shader_stencil_export = true, 584 .EXT_shader_subgroup_ballot = true, 585 .EXT_shader_subgroup_vote = true, 586 .EXT_shader_viewport_index_layer = true, 587 .EXT_subgroup_size_control = true, 588 .EXT_texel_buffer_alignment = true, 589 .EXT_transform_feedback = device->rad_info.gfx_level < GFX11, 590 .EXT_vertex_attribute_divisor = true, 591 .EXT_vertex_input_dynamic_state = !device->use_llvm, 592 .EXT_ycbcr_image_arrays = true, 593 .AMD_buffer_marker = true, 594 .AMD_device_coherent_memory = true, 595 .AMD_draw_indirect_count = true, 596 .AMD_gcn_shader = true, 597 .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit, 598 .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit, 599 .AMD_memory_overallocation_behavior = true, 600 .AMD_mixed_attachment_samples = true, 601 .AMD_rasterization_order = device->rad_info.has_out_of_order_rast, 602 .AMD_shader_ballot = true, 603 .AMD_shader_core_properties = true, 604 .AMD_shader_core_properties2 = true, 605 .AMD_shader_explicit_vertex_parameter = true, 606 .AMD_shader_fragment_mask = device->rad_info.gfx_level < GFX11, 607 .AMD_shader_image_load_store_lod = true, 608 .AMD_shader_trinary_minmax = true, 609 .AMD_texture_gather_bias_lod = true, 610#ifdef ANDROID 611 .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER, 612 .ANDROID_native_buffer = true, 613#endif 614 .GOOGLE_decorate_string = true, 615 .GOOGLE_hlsl_functionality1 = true, 616 .GOOGLE_user_type = true, 617 .INTEL_shader_integer_functions2 = true, 618 .NV_compute_shader_derivatives = true, 619 .NV_device_generated_commands = device->rad_info.gfx_level >= GFX7 && 620 !(device->instance->debug_flags & RADV_DEBUG_NO_IBS) && 621 driQueryOptionb(&device->instance->dri_options, "radv_dgc"), 622 .NV_mesh_shader = 623 radv_taskmesh_enabled(device) && device->instance->perftest_flags & RADV_PERFTEST_NV_MS, 624 /* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from 625 * using it. 626 */ 627 .VALVE_descriptor_set_host_mapping = 628 device->vk.instance->app_info.engine_name && 629 strcmp(device->vk.instance->app_info.engine_name, "vkd3d") == 0, 630 .VALVE_mutable_descriptor_type = true, 631 }; 632} 633 634static bool 635radv_is_conformant(const struct radv_physical_device *pdevice) 636{ 637 return pdevice->rad_info.gfx_level >= GFX8; 638} 639 640static void 641radv_physical_device_init_queue_table(struct radv_physical_device *pdevice) 642{ 643 int idx = 0; 644 pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_GENERAL; 645 idx++; 646 647 for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++) 648 pdevice->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1; 649 650 if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 && 651 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { 652 pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_COMPUTE; 653 idx++; 654 } 655 pdevice->num_queues = idx; 656} 657 658static VkResult 659radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device, 660 struct radv_physical_device **device_out) 661{ 662 VkResult result; 663 int fd = -1; 664 int master_fd = -1; 665 666#ifdef _WIN32 667 assert(drm_device == NULL); 668#else 669 if (drm_device) { 670 const char *path = drm_device->nodes[DRM_NODE_RENDER]; 671 drmVersionPtr version; 672 673 fd = open(path, O_RDWR | O_CLOEXEC); 674 if (fd < 0) { 675 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 676 "Could not open device %s: %m", path); 677 } 678 679 version = drmGetVersion(fd); 680 if (!version) { 681 close(fd); 682 683 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 684 "Could not get the kernel driver version for device %s: %m", path); 685 } 686 687 if (strcmp(version->name, "amdgpu")) { 688 drmFreeVersion(version); 689 close(fd); 690 691 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 692 "Device '%s' is not using the AMDGPU kernel driver: %m", path); 693 } 694 drmFreeVersion(version); 695 696 if (instance->debug_flags & RADV_DEBUG_STARTUP) 697 fprintf(stderr, "radv: info: Found compatible device '%s'.\n", path); 698 } 699#endif 700 701 struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8, 702 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 703 if (!device) { 704 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); 705 goto fail_fd; 706 } 707 708 struct vk_physical_device_dispatch_table dispatch_table; 709 vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, 710 &radv_physical_device_entrypoints, true); 711 vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, 712 &wsi_physical_device_entrypoints, false); 713 714 result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table); 715 if (result != VK_SUCCESS) { 716 goto fail_alloc; 717 } 718 719 device->instance = instance; 720 721#ifdef _WIN32 722 device->ws = radv_null_winsys_create(); 723#else 724 if (drm_device) { 725 bool reserve_vmid = radv_thread_trace_enabled(); 726 727 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, 728 reserve_vmid); 729 } else { 730 device->ws = radv_null_winsys_create(); 731 } 732#endif 733 734 if (!device->ws) { 735 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys"); 736 goto fail_base; 737 } 738 739 device->vk.supported_sync_types = device->ws->get_sync_types(device->ws); 740 741#ifndef _WIN32 742 if (drm_device && instance->vk.enabled_extensions.KHR_display) { 743 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC); 744 if (master_fd >= 0) { 745 uint32_t accel_working = 0; 746 struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working, 747 .return_size = sizeof(accel_working), 748 .query = AMDGPU_INFO_ACCEL_WORKING}; 749 750 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < 751 0 || 752 !accel_working) { 753 close(master_fd); 754 master_fd = -1; 755 } 756 } 757 } 758#endif 759 760 device->master_fd = master_fd; 761 device->local_fd = fd; 762 device->ws->query_info(device->ws, &device->rad_info); 763 764 if (device->rad_info.gfx_level >= GFX11) { 765 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, 766 "This version of RADV does not support RDNA3 yet."); 767 goto fail_wsi; 768 } 769 770 device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM; 771#ifndef LLVM_AVAILABLE 772 if (device->use_llvm) { 773 fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not " 774 "enabled at build time.\n"); 775 abort(); 776 } 777#endif 778 779#ifdef ANDROID 780 device->emulate_etc2 = !radv_device_supports_etc(device); 781#else 782 device->emulate_etc2 = !radv_device_supports_etc(device) && 783 driQueryOptionb(&device->instance->dri_options, "radv_require_etc2"); 784#endif 785 786 snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name, 787 radv_get_compiler_string(device)); 788 789 const char *marketing_name = device->ws->get_chip_name(device->ws); 790 snprintf(device->marketing_name, sizeof(device->name), "%s (RADV %s%s)", 791 marketing_name ? marketing_name : "AMD Unknown", device->rad_info.name, 792 radv_get_compiler_string(device)); 793 794#ifdef ENABLE_SHADER_CACHE 795 if (radv_device_get_cache_uuid(device, device->cache_uuid)) { 796 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID"); 797 goto fail_wsi; 798 } 799 800 /* The gpu id is already embedded in the uuid so we just pass "radv" 801 * when creating the cache. 802 */ 803 char buf[VK_UUID_SIZE * 2 + 1]; 804 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); 805 device->disk_cache = disk_cache_create(device->name, buf, 0); 806#endif 807 808 if (!radv_is_conformant(device)) 809 vk_warn_non_conformant_implementation("radv"); 810 811 radv_get_driver_uuid(&device->driver_uuid); 812 radv_get_device_uuid(&device->rad_info, &device->device_uuid); 813 814 device->out_of_order_rast_allowed = 815 device->rad_info.has_out_of_order_rast && 816 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER); 817 818 device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA); 819 820 device->use_ngg = (device->rad_info.gfx_level >= GFX10 && 821 device->rad_info.family != CHIP_NAVI14 && 822 !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) || 823 device->rad_info.gfx_level >= GFX11; 824 825 device->use_ngg_culling = device->use_ngg && device->rad_info.max_render_backends > 1 && 826 (device->rad_info.gfx_level >= GFX10_3 || 827 (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) && 828 !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC); 829 830 device->use_ngg_streamout = false; 831 832 /* Determine the number of threads per wave for all stages. */ 833 device->cs_wave_size = 64; 834 device->ps_wave_size = 64; 835 device->ge_wave_size = 64; 836 device->rt_wave_size = 64; 837 838 if (device->rad_info.gfx_level >= GFX10) { 839 if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32) 840 device->cs_wave_size = 32; 841 842 /* For pixel shaders, wave64 is recommanded. */ 843 if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32) 844 device->ps_wave_size = 32; 845 846 if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32) 847 device->ge_wave_size = 32; 848 849 if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64)) 850 device->rt_wave_size = 32; 851 } 852 853 radv_physical_device_init_mem_types(device); 854 855 radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions); 856 857 radv_get_nir_options(device); 858 859#ifndef _WIN32 860 if (drm_device) { 861 struct stat primary_stat = {0}, render_stat = {0}; 862 863 device->available_nodes = drm_device->available_nodes; 864 device->bus_info = *drm_device->businfo.pci; 865 866 if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) && 867 stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) { 868 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 869 "failed to stat DRM primary node %s", 870 drm_device->nodes[DRM_NODE_PRIMARY]); 871 goto fail_perfcounters; 872 } 873 device->primary_devid = primary_stat.st_rdev; 874 875 if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) && 876 stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) { 877 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, 878 "failed to stat DRM render node %s", 879 drm_device->nodes[DRM_NODE_RENDER]); 880 goto fail_perfcounters; 881 } 882 device->render_devid = render_stat.st_rdev; 883 } 884#endif 885 886 if ((device->instance->debug_flags & RADV_DEBUG_INFO)) 887 ac_print_gpu_info(&device->rad_info, stdout); 888 889 radv_physical_device_init_queue_table(device); 890 891 /* We don't check the error code, but later check if it is initialized. */ 892 ac_init_perfcounters(&device->rad_info, false, false, &device->ac_perfcounters); 893 894 /* The WSI is structured as a layer on top of the driver, so this has 895 * to be the last part of initialization (at least until we get other 896 * semi-layers). 897 */ 898 result = radv_init_wsi(device); 899 if (result != VK_SUCCESS) { 900 vk_error(instance, result); 901 goto fail_perfcounters; 902 } 903 904 device->gs_table_depth = 905 ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family); 906 907 ac_get_hs_info(&device->rad_info, &device->hs); 908 ac_get_task_info(&device->rad_info, &device->task_info); 909 910 *device_out = device; 911 912 return VK_SUCCESS; 913 914fail_perfcounters: 915 ac_destroy_perfcounters(&device->ac_perfcounters); 916 disk_cache_destroy(device->disk_cache); 917fail_wsi: 918 device->ws->destroy(device->ws); 919fail_base: 920 vk_physical_device_finish(&device->vk); 921fail_alloc: 922 vk_free(&instance->vk.alloc, device); 923fail_fd: 924 if (fd != -1) 925 close(fd); 926 if (master_fd != -1) 927 close(master_fd); 928 return result; 929} 930 931static void 932radv_physical_device_destroy(struct radv_physical_device *device) 933{ 934 radv_finish_wsi(device); 935 ac_destroy_perfcounters(&device->ac_perfcounters); 936 device->ws->destroy(device->ws); 937 disk_cache_destroy(device->disk_cache); 938 if (device->local_fd != -1) 939 close(device->local_fd); 940 if (device->master_fd != -1) 941 close(device->master_fd); 942 vk_physical_device_finish(&device->vk); 943 vk_free(&device->instance->vk.alloc, device); 944} 945 946static const struct debug_control radv_debug_options[] = { 947 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS}, 948 {"nodcc", RADV_DEBUG_NO_DCC}, 949 {"shaders", RADV_DEBUG_DUMP_SHADERS}, 950 {"nocache", RADV_DEBUG_NO_CACHE}, 951 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS}, 952 {"nohiz", RADV_DEBUG_NO_HIZ}, 953 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE}, 954 {"allbos", RADV_DEBUG_ALL_BOS}, 955 {"noibs", RADV_DEBUG_NO_IBS}, 956 {"spirv", RADV_DEBUG_DUMP_SPIRV}, 957 {"vmfaults", RADV_DEBUG_VM_FAULTS}, 958 {"zerovram", RADV_DEBUG_ZERO_VRAM}, 959 {"syncshaders", RADV_DEBUG_SYNC_SHADERS}, 960 {"preoptir", RADV_DEBUG_PREOPTIR}, 961 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS}, 962 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER}, 963 {"info", RADV_DEBUG_INFO}, 964 {"startup", RADV_DEBUG_STARTUP}, 965 {"checkir", RADV_DEBUG_CHECKIR}, 966 {"nobinning", RADV_DEBUG_NOBINNING}, 967 {"nongg", RADV_DEBUG_NO_NGG}, 968 {"metashaders", RADV_DEBUG_DUMP_META_SHADERS}, 969 {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE}, 970 {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE}, 971 {"llvm", RADV_DEBUG_LLVM}, 972 {"forcecompress", RADV_DEBUG_FORCE_COMPRESS}, 973 {"hang", RADV_DEBUG_HANG}, 974 {"img", RADV_DEBUG_IMG}, 975 {"noumr", RADV_DEBUG_NO_UMR}, 976 {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM}, 977 {"splitfma", RADV_DEBUG_SPLIT_FMA}, 978 {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC}, 979 {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK}, 980 {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING}, 981 {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING}, 982 {"nonggc", RADV_DEBUG_NO_NGGC}, 983 {"prologs", RADV_DEBUG_DUMP_PROLOGS}, 984 {"nodma", RADV_DEBUG_NO_DMA_BLIT}, 985 {"epilogs", RADV_DEBUG_DUMP_EPILOGS}, 986 {NULL, 0}}; 987 988const char * 989radv_get_debug_option_name(int id) 990{ 991 assert(id < ARRAY_SIZE(radv_debug_options) - 1); 992 return radv_debug_options[id].string; 993} 994 995static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS}, 996 {"dccmsaa", RADV_PERFTEST_DCC_MSAA}, 997 {"bolist", RADV_PERFTEST_BO_LIST}, 998 {"cswave32", RADV_PERFTEST_CS_WAVE_32}, 999 {"pswave32", RADV_PERFTEST_PS_WAVE_32}, 1000 {"gewave32", RADV_PERFTEST_GE_WAVE_32}, 1001 {"nosam", RADV_PERFTEST_NO_SAM}, 1002 {"sam", RADV_PERFTEST_SAM}, 1003 {"rt", RADV_PERFTEST_RT}, 1004 {"nggc", RADV_PERFTEST_NGGC}, 1005 {"emulate_rt", RADV_PERFTEST_EMULATE_RT}, 1006 {"nv_ms", RADV_PERFTEST_NV_MS}, 1007 {"rtwave64", RADV_PERFTEST_RT_WAVE_64}, 1008 {NULL, 0}}; 1009 1010const char * 1011radv_get_perftest_option_name(int id) 1012{ 1013 assert(id < ARRAY_SIZE(radv_perftest_options) - 1); 1014 return radv_perftest_options[id].string; 1015} 1016 1017// clang-format off 1018static const driOptionDescription radv_dri_options[] = { 1019 DRI_CONF_SECTION_PERFORMANCE 1020 DRI_CONF_ADAPTIVE_SYNC(true) 1021 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0) 1022 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false) 1023 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false) 1024 DRI_CONF_VK_XWAYLAND_WAIT_READY(true) 1025 DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false) 1026 DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false) 1027 DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false) 1028 DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false) 1029 DRI_CONF_RADV_ABSOLUTE_DEPTH_BIAS(false) 1030 DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0) 1031 DRI_CONF_SECTION_END 1032 1033 DRI_CONF_SECTION_DEBUG 1034 DRI_CONF_OVERRIDE_VRAM_SIZE() 1035 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) 1036 DRI_CONF_RADV_ZERO_VRAM(false) 1037 DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false) 1038 DRI_CONF_RADV_INVARIANT_GEOM(false) 1039 DRI_CONF_RADV_SPLIT_FMA(false) 1040 DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false) 1041 DRI_CONF_RADV_DISABLE_DCC(false) 1042 DRI_CONF_RADV_REQUIRE_ETC2(false) 1043 DRI_CONF_RADV_DISABLE_ANISO_SINGLE_LEVEL(false) 1044 DRI_CONF_RADV_DISABLE_SINKING_LOAD_INPUT_FS(false) 1045 DRI_CONF_RADV_DGC(false) 1046 DRI_CONF_RADV_FLUSH_BEFORE_QUERY_COPY(false) 1047 DRI_CONF_SECTION_END 1048}; 1049// clang-format on 1050 1051static void 1052radv_init_dri_options(struct radv_instance *instance) 1053{ 1054 driParseOptionInfo(&instance->available_dri_options, radv_dri_options, 1055 ARRAY_SIZE(radv_dri_options)); 1056 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL, NULL, 1057 instance->vk.app_info.app_name, instance->vk.app_info.app_version, 1058 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version); 1059 1060 instance->enable_mrt_output_nan_fixup = 1061 driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup"); 1062 1063 instance->disable_shrink_image_store = 1064 driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store"); 1065 1066 instance->absolute_depth_bias = 1067 driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias"); 1068 1069 instance->disable_tc_compat_htile_in_general = 1070 driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general"); 1071 1072 if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds")) 1073 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS; 1074 1075 if (driQueryOptionb(&instance->dri_options, "radv_lower_discard_to_demote")) 1076 instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE; 1077 1078 if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom")) 1079 instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM; 1080 1081 if (driQueryOptionb(&instance->dri_options, "radv_split_fma")) 1082 instance->debug_flags |= RADV_DEBUG_SPLIT_FMA; 1083 1084 if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc")) 1085 instance->debug_flags |= RADV_DEBUG_NO_DCC; 1086 1087 instance->zero_vram = 1088 driQueryOptionb(&instance->dri_options, "radv_zero_vram"); 1089 1090 instance->disable_aniso_single_level = 1091 driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level"); 1092 1093 instance->disable_sinking_load_input_fs = 1094 driQueryOptionb(&instance->dri_options, "radv_disable_sinking_load_input_fs"); 1095 1096 instance->flush_before_query_copy = 1097 driQueryOptionb(&instance->dri_options, "radv_flush_before_query_copy"); 1098} 1099 1100VKAPI_ATTR VkResult VKAPI_CALL 1101radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, 1102 const VkAllocationCallbacks *pAllocator, VkInstance *pInstance) 1103{ 1104 struct radv_instance *instance; 1105 VkResult result; 1106 1107 if (!pAllocator) 1108 pAllocator = vk_default_allocator(); 1109 1110 instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 1111 if (!instance) 1112 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 1113 1114 struct vk_instance_dispatch_table dispatch_table; 1115 vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true); 1116 vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &wsi_instance_entrypoints, false); 1117 struct vk_instance_extension_table extensions_supported = radv_instance_extensions_supported; 1118 1119 result = vk_instance_init(&instance->vk, &extensions_supported, &dispatch_table, 1120 pCreateInfo, pAllocator); 1121 if (result != VK_SUCCESS) { 1122 vk_free(pAllocator, instance); 1123 return vk_error(instance, result); 1124 } 1125 1126 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options); 1127 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options); 1128 1129 if (instance->debug_flags & RADV_DEBUG_STARTUP) 1130 fprintf(stderr, "radv: info: Created an instance.\n"); 1131 1132 instance->physical_devices_enumerated = false; 1133 list_inithead(&instance->physical_devices); 1134 1135 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); 1136 1137 radv_init_dri_options(instance); 1138 1139 *pInstance = radv_instance_to_handle(instance); 1140 1141 return VK_SUCCESS; 1142} 1143 1144VKAPI_ATTR void VKAPI_CALL 1145radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator) 1146{ 1147 RADV_FROM_HANDLE(radv_instance, instance, _instance); 1148 1149 if (!instance) 1150 return; 1151 1152 list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link) 1153 { 1154 radv_physical_device_destroy(pdevice); 1155 } 1156 1157 VG(VALGRIND_DESTROY_MEMPOOL(instance)); 1158 1159 driDestroyOptionCache(&instance->dri_options); 1160 driDestroyOptionInfo(&instance->available_dri_options); 1161 1162 vk_instance_finish(&instance->vk); 1163 vk_free(&instance->vk.alloc, instance); 1164} 1165 1166static VkResult 1167radv_enumerate_physical_devices(struct radv_instance *instance) 1168{ 1169 if (instance->physical_devices_enumerated) 1170 return VK_SUCCESS; 1171 1172 instance->physical_devices_enumerated = true; 1173 1174 VkResult result = VK_SUCCESS; 1175 1176 if (getenv("RADV_FORCE_FAMILY")) { 1177 /* When RADV_FORCE_FAMILY is set, the driver creates a nul 1178 * device that allows to test the compiler without having an 1179 * AMDGPU instance. 1180 */ 1181 struct radv_physical_device *pdevice; 1182 1183 result = radv_physical_device_try_create(instance, NULL, &pdevice); 1184 if (result != VK_SUCCESS) 1185 return result; 1186 1187 list_addtail(&pdevice->link, &instance->physical_devices); 1188 return VK_SUCCESS; 1189 } 1190 1191#ifndef _WIN32 1192 /* TODO: Check for more devices ? */ 1193 drmDevicePtr devices[8]; 1194 int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); 1195 1196 if (instance->debug_flags & RADV_DEBUG_STARTUP) 1197 fprintf(stderr, "radv: info: Found %d drm nodes.\n", max_devices); 1198 1199 if (max_devices < 1) 1200 return vk_error(instance, VK_SUCCESS); 1201 1202 for (unsigned i = 0; i < (unsigned)max_devices; i++) { 1203 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && 1204 devices[i]->bustype == DRM_BUS_PCI && 1205 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) { 1206 1207 struct radv_physical_device *pdevice; 1208 result = radv_physical_device_try_create(instance, devices[i], &pdevice); 1209 /* Incompatible DRM device, skip. */ 1210 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { 1211 result = VK_SUCCESS; 1212 continue; 1213 } 1214 1215 /* Error creating the physical device, report the error. */ 1216 if (result != VK_SUCCESS) 1217 break; 1218 1219 list_addtail(&pdevice->link, &instance->physical_devices); 1220 } 1221 } 1222 drmFreeDevices(devices, max_devices); 1223#endif 1224 1225 /* If we successfully enumerated any devices, call it success */ 1226 return result; 1227} 1228 1229VKAPI_ATTR VkResult VKAPI_CALL 1230radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount, 1231 VkPhysicalDevice *pPhysicalDevices) 1232{ 1233 RADV_FROM_HANDLE(radv_instance, instance, _instance); 1234 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount); 1235 1236 VkResult result = radv_enumerate_physical_devices(instance); 1237 if (result != VK_SUCCESS) 1238 return result; 1239 1240 list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link) 1241 { 1242 vk_outarray_append_typed(VkPhysicalDevice, &out, i) 1243 { 1244 *i = radv_physical_device_to_handle(pdevice); 1245 } 1246 } 1247 1248 return vk_outarray_status(&out); 1249} 1250 1251VKAPI_ATTR VkResult VKAPI_CALL 1252radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount, 1253 VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties) 1254{ 1255 RADV_FROM_HANDLE(radv_instance, instance, _instance); 1256 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties, 1257 pPhysicalDeviceGroupCount); 1258 1259 VkResult result = radv_enumerate_physical_devices(instance); 1260 if (result != VK_SUCCESS) 1261 return result; 1262 1263 list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link) 1264 { 1265 vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) 1266 { 1267 p->physicalDeviceCount = 1; 1268 memset(p->physicalDevices, 0, sizeof(p->physicalDevices)); 1269 p->physicalDevices[0] = radv_physical_device_to_handle(pdevice); 1270 p->subsetAllocation = false; 1271 } 1272 } 1273 1274 return vk_outarray_status(&out); 1275} 1276 1277VKAPI_ATTR void VKAPI_CALL 1278radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures) 1279{ 1280 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1281 memset(pFeatures, 0, sizeof(*pFeatures)); 1282 1283 *pFeatures = (VkPhysicalDeviceFeatures){ 1284 .robustBufferAccess = true, 1285 .fullDrawIndexUint32 = true, 1286 .imageCubeArray = true, 1287 .independentBlend = true, 1288 .geometryShader = true, 1289 .tessellationShader = true, 1290 .sampleRateShading = true, 1291 .dualSrcBlend = true, 1292 .logicOp = true, 1293 .multiDrawIndirect = true, 1294 .drawIndirectFirstInstance = true, 1295 .depthClamp = true, 1296 .depthBiasClamp = true, 1297 .fillModeNonSolid = true, 1298 .depthBounds = true, 1299 .wideLines = true, 1300 .largePoints = true, 1301 .alphaToOne = false, 1302 .multiViewport = true, 1303 .samplerAnisotropy = true, 1304 .textureCompressionETC2 = radv_device_supports_etc(pdevice) || pdevice->emulate_etc2, 1305 .textureCompressionASTC_LDR = false, 1306 .textureCompressionBC = true, 1307 .occlusionQueryPrecise = true, 1308 .pipelineStatisticsQuery = true, 1309 .vertexPipelineStoresAndAtomics = true, 1310 .fragmentStoresAndAtomics = true, 1311 .shaderTessellationAndGeometryPointSize = true, 1312 .shaderImageGatherExtended = true, 1313 .shaderStorageImageExtendedFormats = true, 1314 .shaderStorageImageMultisample = true, 1315 .shaderUniformBufferArrayDynamicIndexing = true, 1316 .shaderSampledImageArrayDynamicIndexing = true, 1317 .shaderStorageBufferArrayDynamicIndexing = true, 1318 .shaderStorageImageArrayDynamicIndexing = true, 1319 .shaderStorageImageReadWithoutFormat = true, 1320 .shaderStorageImageWriteWithoutFormat = true, 1321 .shaderClipDistance = true, 1322 .shaderCullDistance = true, 1323 .shaderFloat64 = true, 1324 .shaderInt64 = true, 1325 .shaderInt16 = true, 1326 .sparseBinding = true, 1327 .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10, 1328 .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10, 1329 .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10, 1330 .variableMultisampleRate = true, 1331 .shaderResourceMinLod = true, 1332 .shaderResourceResidency = true, 1333 .inheritedQueries = true, 1334 }; 1335} 1336 1337static void 1338radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice, 1339 VkPhysicalDeviceVulkan11Features *f) 1340{ 1341 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES); 1342 1343 f->storageBuffer16BitAccess = true; 1344 f->uniformAndStorageBuffer16BitAccess = true; 1345 f->storagePushConstant16 = true; 1346 f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit; 1347 f->multiview = true; 1348 f->multiviewGeometryShader = true; 1349 f->multiviewTessellationShader = true; 1350 f->variablePointersStorageBuffer = true; 1351 f->variablePointers = true; 1352 f->protectedMemory = false; 1353 f->samplerYcbcrConversion = true; 1354 f->shaderDrawParameters = true; 1355} 1356 1357static void 1358radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice, 1359 VkPhysicalDeviceVulkan12Features *f) 1360{ 1361 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES); 1362 1363 f->samplerMirrorClampToEdge = true; 1364 f->drawIndirectCount = true; 1365 f->storageBuffer8BitAccess = true; 1366 f->uniformAndStorageBuffer8BitAccess = true; 1367 f->storagePushConstant8 = true; 1368 f->shaderBufferInt64Atomics = true; 1369 f->shaderSharedInt64Atomics = true; 1370 f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit; 1371 f->shaderInt8 = true; 1372 1373 f->descriptorIndexing = true; 1374 f->shaderInputAttachmentArrayDynamicIndexing = true; 1375 f->shaderUniformTexelBufferArrayDynamicIndexing = true; 1376 f->shaderStorageTexelBufferArrayDynamicIndexing = true; 1377 f->shaderUniformBufferArrayNonUniformIndexing = true; 1378 f->shaderSampledImageArrayNonUniformIndexing = true; 1379 f->shaderStorageBufferArrayNonUniformIndexing = true; 1380 f->shaderStorageImageArrayNonUniformIndexing = true; 1381 f->shaderInputAttachmentArrayNonUniformIndexing = true; 1382 f->shaderUniformTexelBufferArrayNonUniformIndexing = true; 1383 f->shaderStorageTexelBufferArrayNonUniformIndexing = true; 1384 f->descriptorBindingUniformBufferUpdateAfterBind = true; 1385 f->descriptorBindingSampledImageUpdateAfterBind = true; 1386 f->descriptorBindingStorageImageUpdateAfterBind = true; 1387 f->descriptorBindingStorageBufferUpdateAfterBind = true; 1388 f->descriptorBindingUniformTexelBufferUpdateAfterBind = true; 1389 f->descriptorBindingStorageTexelBufferUpdateAfterBind = true; 1390 f->descriptorBindingUpdateUnusedWhilePending = true; 1391 f->descriptorBindingPartiallyBound = true; 1392 f->descriptorBindingVariableDescriptorCount = true; 1393 f->runtimeDescriptorArray = true; 1394 1395 f->samplerFilterMinmax = true; 1396 f->scalarBlockLayout = pdevice->rad_info.gfx_level >= GFX7; 1397 f->imagelessFramebuffer = true; 1398 f->uniformBufferStandardLayout = true; 1399 f->shaderSubgroupExtendedTypes = true; 1400 f->separateDepthStencilLayouts = true; 1401 f->hostQueryReset = true; 1402 f->timelineSemaphore = true, f->bufferDeviceAddress = true; 1403 f->bufferDeviceAddressCaptureReplay = true; 1404 f->bufferDeviceAddressMultiDevice = false; 1405 f->vulkanMemoryModel = true; 1406 f->vulkanMemoryModelDeviceScope = true; 1407 f->vulkanMemoryModelAvailabilityVisibilityChains = false; 1408 f->shaderOutputViewportIndex = true; 1409 f->shaderOutputLayer = true; 1410 f->subgroupBroadcastDynamicId = true; 1411} 1412 1413static void 1414radv_get_physical_device_features_1_3(struct radv_physical_device *pdevice, 1415 VkPhysicalDeviceVulkan13Features *f) 1416{ 1417 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES); 1418 1419 f->robustImageAccess = true; 1420 f->inlineUniformBlock = true; 1421 f->descriptorBindingInlineUniformBlockUpdateAfterBind = true; 1422 f->pipelineCreationCacheControl = true; 1423 f->privateData = true; 1424 f->shaderDemoteToHelperInvocation = true; 1425 f->shaderTerminateInvocation = true; 1426 f->subgroupSizeControl = true; 1427 f->computeFullSubgroups = true; 1428 f->synchronization2 = true; 1429 f->textureCompressionASTC_HDR = false; 1430 f->shaderZeroInitializeWorkgroupMemory = true; 1431 f->dynamicRendering = true; 1432 f->shaderIntegerDotProduct = true; 1433 f->maintenance4 = true; 1434} 1435 1436VKAPI_ATTR void VKAPI_CALL 1437radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, 1438 VkPhysicalDeviceFeatures2 *pFeatures) 1439{ 1440 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1441 radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); 1442 1443 VkPhysicalDeviceVulkan11Features core_1_1 = { 1444 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, 1445 }; 1446 radv_get_physical_device_features_1_1(pdevice, &core_1_1); 1447 1448 VkPhysicalDeviceVulkan12Features core_1_2 = { 1449 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, 1450 }; 1451 radv_get_physical_device_features_1_2(pdevice, &core_1_2); 1452 1453 VkPhysicalDeviceVulkan13Features core_1_3 = { 1454 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, 1455 }; 1456 radv_get_physical_device_features_1_3(pdevice, &core_1_3); 1457 1458#define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature 1459 1460 vk_foreach_struct(ext, pFeatures->pNext) 1461 { 1462 if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1)) 1463 continue; 1464 if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2)) 1465 continue; 1466 if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3)) 1467 continue; 1468 1469 switch (ext->sType) { 1470 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { 1471 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = 1472 (VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext; 1473 features->conditionalRendering = true; 1474 features->inheritedConditionalRendering = false; 1475 break; 1476 } 1477 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { 1478 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = 1479 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; 1480 features->vertexAttributeInstanceRateDivisor = true; 1481 features->vertexAttributeInstanceRateZeroDivisor = true; 1482 break; 1483 } 1484 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: { 1485 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features = 1486 (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext; 1487 features->transformFeedback = pdevice->rad_info.gfx_level < GFX11; 1488 features->geometryStreams = !pdevice->use_ngg_streamout && pdevice->rad_info.gfx_level < GFX11; 1489 break; 1490 } 1491 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: { 1492 VkPhysicalDeviceScalarBlockLayoutFeatures *features = 1493 (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext; 1494 CORE_FEATURE(1, 2, scalarBlockLayout); 1495 break; 1496 } 1497 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: { 1498 VkPhysicalDeviceMemoryPriorityFeaturesEXT *features = 1499 (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext; 1500 features->memoryPriority = true; 1501 break; 1502 } 1503 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: { 1504 VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = 1505 (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext; 1506 CORE_FEATURE(1, 2, bufferDeviceAddress); 1507 CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay); 1508 CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice); 1509 break; 1510 } 1511 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: { 1512 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features = 1513 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext; 1514 features->depthClipEnable = true; 1515 break; 1516 } 1517 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: { 1518 VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features = 1519 (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext; 1520 features->computeDerivativeGroupQuads = false; 1521 features->computeDerivativeGroupLinear = true; 1522 break; 1523 } 1524 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: { 1525 VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features = 1526 (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext; 1527 features->ycbcrImageArrays = true; 1528 break; 1529 } 1530 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: { 1531 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features = 1532 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext; 1533 features->indexTypeUint8 = pdevice->rad_info.gfx_level >= GFX8; 1534 break; 1535 } 1536 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: { 1537 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features = 1538 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext; 1539 features->pipelineExecutableInfo = true; 1540 break; 1541 } 1542 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: { 1543 VkPhysicalDeviceShaderClockFeaturesKHR *features = 1544 (VkPhysicalDeviceShaderClockFeaturesKHR *)ext; 1545 features->shaderSubgroupClock = true; 1546 features->shaderDeviceClock = pdevice->rad_info.gfx_level >= GFX8; 1547 break; 1548 } 1549 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: { 1550 VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features = 1551 (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext; 1552 features->texelBufferAlignment = true; 1553 break; 1554 } 1555 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: { 1556 VkPhysicalDeviceCoherentMemoryFeaturesAMD *features = 1557 (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext; 1558 features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached; 1559 break; 1560 } 1561 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: { 1562 VkPhysicalDeviceLineRasterizationFeaturesEXT *features = 1563 (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext; 1564 features->rectangularLines = false; 1565 features->bresenhamLines = true; 1566 features->smoothLines = false; 1567 features->stippledRectangularLines = false; 1568 /* FIXME: Some stippled Bresenham CTS fails on Vega10 1569 * but work on Raven. 1570 */ 1571 features->stippledBresenhamLines = pdevice->rad_info.gfx_level != GFX9; 1572 features->stippledSmoothLines = false; 1573 break; 1574 } 1575 case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: { 1576 VkDeviceMemoryOverallocationCreateInfoAMD *features = 1577 (VkDeviceMemoryOverallocationCreateInfoAMD *)ext; 1578 features->overallocationBehavior = true; 1579 break; 1580 } 1581 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: { 1582 VkPhysicalDeviceRobustness2FeaturesEXT *features = 1583 (VkPhysicalDeviceRobustness2FeaturesEXT *)ext; 1584 features->robustBufferAccess2 = true; 1585 features->robustImageAccess2 = true; 1586 features->nullDescriptor = true; 1587 break; 1588 } 1589 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { 1590 VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = 1591 (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext; 1592 features->customBorderColors = true; 1593 features->customBorderColorWithoutFormat = true; 1594 break; 1595 } 1596 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: { 1597 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = 1598 (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext; 1599 features->extendedDynamicState = true; 1600 break; 1601 } 1602 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: { 1603 VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = 1604 (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext; 1605 features->shaderBufferFloat32Atomics = true; 1606 features->shaderBufferFloat32AtomicAdd = false; 1607 features->shaderBufferFloat64Atomics = true; 1608 features->shaderBufferFloat64AtomicAdd = false; 1609 features->shaderSharedFloat32Atomics = true; 1610 features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.gfx_level >= GFX8; 1611 features->shaderSharedFloat64Atomics = true; 1612 features->shaderSharedFloat64AtomicAdd = false; 1613 features->shaderImageFloat32Atomics = true; 1614 features->shaderImageFloat32AtomicAdd = false; 1615 features->sparseImageFloat32Atomics = true; 1616 features->sparseImageFloat32AtomicAdd = false; 1617 break; 1618 } 1619 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: { 1620 VkPhysicalDevice4444FormatsFeaturesEXT *features = 1621 (VkPhysicalDevice4444FormatsFeaturesEXT *)ext; 1622 features->formatA4R4G4B4 = true; 1623 features->formatA4B4G4R4 = true; 1624 break; 1625 } 1626 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: { 1627 VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features = 1628 (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext; 1629 features->shaderImageInt64Atomics = true; 1630 features->sparseImageInt64Atomics = true; 1631 break; 1632 } 1633 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: { 1634 VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features = 1635 (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext; 1636 features->mutableDescriptorType = true; 1637 break; 1638 } 1639 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: { 1640 VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features = 1641 (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext; 1642 features->pipelineFragmentShadingRate = true; 1643 features->primitiveFragmentShadingRate = true; 1644 features->attachmentFragmentShadingRate = 1645 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_HIZ) && 1646 pdevice->rad_info.gfx_level < GFX11; /* TODO: VRS no longer uses HTILE. */ 1647 break; 1648 } 1649 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: { 1650 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features = 1651 (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext; 1652 features->workgroupMemoryExplicitLayout = true; 1653 features->workgroupMemoryExplicitLayoutScalarBlockLayout = true; 1654 features->workgroupMemoryExplicitLayout8BitAccess = true; 1655 features->workgroupMemoryExplicitLayout16BitAccess = true; 1656 break; 1657 } 1658 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: { 1659 VkPhysicalDeviceProvokingVertexFeaturesEXT *features = 1660 (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext; 1661 features->provokingVertexLast = true; 1662 features->transformFeedbackPreservesProvokingVertex = true; 1663 break; 1664 } 1665 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: { 1666 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features = 1667 (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext; 1668 features->extendedDynamicState2 = true; 1669 features->extendedDynamicState2LogicOp = true; 1670 features->extendedDynamicState2PatchControlPoints = false; 1671 break; 1672 } 1673 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR: { 1674 VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *features = 1675 (VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *)ext; 1676 features->globalPriorityQuery = true; 1677 break; 1678 } 1679 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: { 1680 VkPhysicalDeviceAccelerationStructureFeaturesKHR *features = 1681 (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)ext; 1682 features->accelerationStructure = true; 1683 features->accelerationStructureCaptureReplay = false; 1684 features->accelerationStructureIndirectBuild = false; 1685 features->accelerationStructureHostCommands = true; 1686 features->descriptorBindingAccelerationStructureUpdateAfterBind = true; 1687 break; 1688 } 1689 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: { 1690 VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features = 1691 (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext; 1692 features->shaderSubgroupUniformControlFlow = true; 1693 break; 1694 } 1695 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: { 1696 VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext; 1697 features->multiDraw = true; 1698 break; 1699 } 1700 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: { 1701 VkPhysicalDeviceColorWriteEnableFeaturesEXT *features = 1702 (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext; 1703 features->colorWriteEnable = true; 1704 break; 1705 } 1706 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: { 1707 VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = 1708 (VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *)ext; 1709 bool has_shader_buffer_float_minmax = radv_has_shader_buffer_float_minmax(pdevice); 1710 bool has_shader_image_float_minmax = 1711 pdevice->rad_info.gfx_level != GFX8 && pdevice->rad_info.gfx_level != GFX9; 1712 features->shaderBufferFloat16Atomics = false; 1713 features->shaderBufferFloat16AtomicAdd = false; 1714 features->shaderBufferFloat16AtomicMinMax = false; 1715 features->shaderBufferFloat32AtomicMinMax = has_shader_buffer_float_minmax; 1716 features->shaderBufferFloat64AtomicMinMax = has_shader_buffer_float_minmax; 1717 features->shaderSharedFloat16Atomics = false; 1718 features->shaderSharedFloat16AtomicAdd = false; 1719 features->shaderSharedFloat16AtomicMinMax = false; 1720 features->shaderSharedFloat32AtomicMinMax = true; 1721 features->shaderSharedFloat64AtomicMinMax = true; 1722 features->shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax; 1723 features->sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax; 1724 break; 1725 } 1726 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: { 1727 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features = 1728 (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext; 1729 features->primitiveTopologyListRestart = true; 1730 features->primitiveTopologyPatchListRestart = false; 1731 break; 1732 } 1733 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: { 1734 VkPhysicalDeviceRayQueryFeaturesKHR *features = 1735 (VkPhysicalDeviceRayQueryFeaturesKHR *)ext; 1736 features->rayQuery = true; 1737 break; 1738 } 1739 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR: { 1740 VkPhysicalDeviceRayTracingPipelineFeaturesKHR *features = 1741 (VkPhysicalDeviceRayTracingPipelineFeaturesKHR *)ext; 1742 features->rayTracingPipeline = true; 1743 features->rayTracingPipelineShaderGroupHandleCaptureReplay = false; 1744 features->rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false; 1745 features->rayTracingPipelineTraceRaysIndirect = true; 1746 features->rayTraversalPrimitiveCulling = true; 1747 break; 1748 } 1749 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_MAINTENANCE_1_FEATURES_KHR: { 1750 VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR *features = 1751 (VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR *)ext; 1752 features->rayTracingMaintenance1 = true; 1753 features->rayTracingPipelineTraceRaysIndirect2 = radv_enable_rt(pdevice, true); 1754 break; 1755 } 1756 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES: { 1757 VkPhysicalDeviceMaintenance4Features *features = 1758 (VkPhysicalDeviceMaintenance4Features *)ext; 1759 features->maintenance4 = true; 1760 break; 1761 } 1762 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: { 1763 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features = 1764 (VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *)ext; 1765 features->vertexInputDynamicState = true; 1766 break; 1767 } 1768 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: { 1769 VkPhysicalDeviceImageViewMinLodFeaturesEXT *features = 1770 (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext; 1771 features->minLod = true; 1772 break; 1773 } 1774 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES: { 1775 VkPhysicalDeviceSynchronization2Features *features = 1776 (VkPhysicalDeviceSynchronization2Features *)ext; 1777 features->synchronization2 = true; 1778 break; 1779 } 1780 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES: { 1781 VkPhysicalDeviceDynamicRenderingFeatures *features = 1782 (VkPhysicalDeviceDynamicRenderingFeatures *)ext; 1783 features->dynamicRendering = true; 1784 break; 1785 } 1786 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: { 1787 VkPhysicalDeviceMeshShaderFeaturesNV *features = 1788 (VkPhysicalDeviceMeshShaderFeaturesNV *)ext; 1789 features->taskShader = features->meshShader = radv_taskmesh_enabled(pdevice); 1790 break; 1791 } 1792 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: { 1793 VkPhysicalDeviceTextureCompressionASTCHDRFeatures *features = 1794 (VkPhysicalDeviceTextureCompressionASTCHDRFeatures *)ext; 1795 features->textureCompressionASTC_HDR = false; 1796 break; 1797 } 1798 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_SET_HOST_MAPPING_FEATURES_VALVE: { 1799 VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE *features = 1800 (VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE *)ext; 1801 features->descriptorSetHostMapping = true; 1802 break; 1803 } 1804 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: { 1805 VkPhysicalDeviceDepthClipControlFeaturesEXT *features = 1806 (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext; 1807 features->depthClipControl = true; 1808 break; 1809 } 1810 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: { 1811 VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features = 1812 (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext; 1813 features->image2DViewOf3D = true; 1814 features->sampler2DViewOf3D = false; 1815 break; 1816 } 1817 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: { 1818 VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features = 1819 (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext; 1820 features->shaderIntegerFunctions2 = true; 1821 break; 1822 } 1823 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: { 1824 VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features = 1825 (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext; 1826 features->primitivesGeneratedQuery = true; 1827 features->primitivesGeneratedQueryWithRasterizerDiscard = true; 1828 features->primitivesGeneratedQueryWithNonZeroStreams = true; 1829 break; 1830 } 1831 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_NON_SEAMLESS_CUBE_MAP_FEATURES_EXT : { 1832 VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *features = 1833 (VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *)ext; 1834 features->nonSeamlessCubeMap = true; 1835 break; 1836 } 1837 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: { 1838 VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features = 1839 (VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *)ext; 1840 features->borderColorSwizzle = true; 1841 features->borderColorSwizzleFromImage = true; 1842 break; 1843 } 1844 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: { 1845 VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features = 1846 (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext; 1847 features->shaderModuleIdentifier = true; 1848 break; 1849 } 1850 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: { 1851 VkPhysicalDevicePerformanceQueryFeaturesKHR *features = 1852 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext; 1853 features->performanceCounterQueryPools = radv_perf_query_supported(pdevice); 1854 features->performanceCounterMultipleQueryPools = features->performanceCounterQueryPools; 1855 break; 1856 } 1857 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV: { 1858 VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV *features = 1859 (VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV *)ext; 1860 features->deviceGeneratedCommands = true; 1861 break; 1862 } 1863 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT: { 1864 VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT *features = 1865 (VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT *)ext; 1866 features->attachmentFeedbackLoopLayout = true; 1867 break; 1868 } 1869 default: 1870 break; 1871 } 1872 } 1873} 1874 1875static size_t 1876radv_max_descriptor_set_size() 1877{ 1878 /* make sure that the entire descriptor set is addressable with a signed 1879 * 32-bit int. So the sum of all limits scaled by descriptor size has to 1880 * be at most 2 GiB. the combined image & samples object count as one of 1881 * both. This limit is for the pipeline layout, not for the set layout, but 1882 * there is no set limit, so we just set a pipeline limit. I don't think 1883 * any app is going to hit this soon. */ 1884 return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - 1885 MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) / 1886 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + 1887 32 /* storage buffer, 32 due to potential space wasted on alignment */ + 1888 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 1889 64 /* storage image */); 1890} 1891 1892static uint32_t 1893radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice) 1894{ 1895 uint32_t uniform_offset_alignment = 1896 driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment"); 1897 if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) { 1898 fprintf(stderr, 1899 "ERROR: invalid radv_override_uniform_offset_alignment setting %d:" 1900 "not a power of two\n", 1901 uniform_offset_alignment); 1902 uniform_offset_alignment = 0; 1903 } 1904 1905 /* Take at least the hardware limit. */ 1906 return MAX2(uniform_offset_alignment, 4); 1907} 1908 1909VKAPI_ATTR void VKAPI_CALL 1910radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, 1911 VkPhysicalDeviceProperties *pProperties) 1912{ 1913 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 1914 VkSampleCountFlags sample_counts = 0xf; 1915 1916 size_t max_descriptor_set_size = radv_max_descriptor_set_size(); 1917 1918 VkPhysicalDeviceLimits limits = { 1919 .maxImageDimension1D = (1 << 14), 1920 .maxImageDimension2D = (1 << 14), 1921 .maxImageDimension3D = (1 << 11), 1922 .maxImageDimensionCube = (1 << 14), 1923 .maxImageArrayLayers = (1 << 11), 1924 .maxTexelBufferElements = UINT32_MAX, 1925 .maxUniformBufferRange = UINT32_MAX, 1926 .maxStorageBufferRange = UINT32_MAX, 1927 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, 1928 .maxMemoryAllocationCount = UINT32_MAX, 1929 .maxSamplerAllocationCount = 64 * 1024, 1930 .bufferImageGranularity = 1, 1931 .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */ 1932 .maxBoundDescriptorSets = MAX_SETS, 1933 .maxPerStageDescriptorSamplers = max_descriptor_set_size, 1934 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size, 1935 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size, 1936 .maxPerStageDescriptorSampledImages = max_descriptor_set_size, 1937 .maxPerStageDescriptorStorageImages = max_descriptor_set_size, 1938 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size, 1939 .maxPerStageResources = max_descriptor_set_size, 1940 .maxDescriptorSetSamplers = max_descriptor_set_size, 1941 .maxDescriptorSetUniformBuffers = max_descriptor_set_size, 1942 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, 1943 .maxDescriptorSetStorageBuffers = max_descriptor_set_size, 1944 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, 1945 .maxDescriptorSetSampledImages = max_descriptor_set_size, 1946 .maxDescriptorSetStorageImages = max_descriptor_set_size, 1947 .maxDescriptorSetInputAttachments = max_descriptor_set_size, 1948 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS, 1949 .maxVertexInputBindings = MAX_VBS, 1950 .maxVertexInputAttributeOffset = UINT32_MAX, 1951 .maxVertexInputBindingStride = 2048, 1952 .maxVertexOutputComponents = 128, 1953 .maxTessellationGenerationLevel = 64, 1954 .maxTessellationPatchSize = 32, 1955 .maxTessellationControlPerVertexInputComponents = 128, 1956 .maxTessellationControlPerVertexOutputComponents = 128, 1957 .maxTessellationControlPerPatchOutputComponents = 120, 1958 .maxTessellationControlTotalOutputComponents = 4096, 1959 .maxTessellationEvaluationInputComponents = 128, 1960 .maxTessellationEvaluationOutputComponents = 128, 1961 .maxGeometryShaderInvocations = 127, 1962 .maxGeometryInputComponents = 64, 1963 .maxGeometryOutputComponents = 128, 1964 .maxGeometryOutputVertices = 256, 1965 .maxGeometryTotalOutputComponents = 1024, 1966 .maxFragmentInputComponents = 128, 1967 .maxFragmentOutputAttachments = 8, 1968 .maxFragmentDualSrcAttachments = 1, 1969 .maxFragmentCombinedOutputResources = max_descriptor_set_size, 1970 .maxComputeSharedMemorySize = pdevice->rad_info.gfx_level >= GFX7 ? 65536 : 32768, 1971 .maxComputeWorkGroupCount = {65535, 65535, 65535}, 1972 .maxComputeWorkGroupInvocations = 1024, 1973 .maxComputeWorkGroupSize = {1024, 1024, 1024}, 1974 .subPixelPrecisionBits = 8, 1975 .subTexelPrecisionBits = 8, 1976 .mipmapPrecisionBits = 8, 1977 .maxDrawIndexedIndexValue = UINT32_MAX, 1978 .maxDrawIndirectCount = UINT32_MAX, 1979 .maxSamplerLodBias = 16, 1980 .maxSamplerAnisotropy = 16, 1981 .maxViewports = MAX_VIEWPORTS, 1982 .maxViewportDimensions = {(1 << 14), (1 << 14)}, 1983 .viewportBoundsRange = {INT16_MIN, INT16_MAX}, 1984 .viewportSubPixelBits = 8, 1985 .minMemoryMapAlignment = 4096, /* A page */ 1986 .minTexelBufferOffsetAlignment = 4, 1987 .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice), 1988 .minStorageBufferOffsetAlignment = 4, 1989 .minTexelOffset = -32, 1990 .maxTexelOffset = 31, 1991 .minTexelGatherOffset = -32, 1992 .maxTexelGatherOffset = 31, 1993 .minInterpolationOffset = -2, 1994 .maxInterpolationOffset = 2, 1995 .subPixelInterpolationOffsetBits = 8, 1996 .maxFramebufferWidth = MAX_FRAMEBUFFER_WIDTH, 1997 .maxFramebufferHeight = MAX_FRAMEBUFFER_HEIGHT, 1998 .maxFramebufferLayers = (1 << 10), 1999 .framebufferColorSampleCounts = sample_counts, 2000 .framebufferDepthSampleCounts = sample_counts, 2001 .framebufferStencilSampleCounts = sample_counts, 2002 .framebufferNoAttachmentsSampleCounts = sample_counts, 2003 .maxColorAttachments = MAX_RTS, 2004 .sampledImageColorSampleCounts = sample_counts, 2005 .sampledImageIntegerSampleCounts = sample_counts, 2006 .sampledImageDepthSampleCounts = sample_counts, 2007 .sampledImageStencilSampleCounts = sample_counts, 2008 .storageImageSampleCounts = sample_counts, 2009 .maxSampleMaskWords = 1, 2010 .timestampComputeAndGraphics = true, 2011 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq, 2012 .maxClipDistances = 8, 2013 .maxCullDistances = 8, 2014 .maxCombinedClipAndCullDistances = 8, 2015 .discreteQueuePriorities = 2, 2016 .pointSizeRange = {0.0, 8191.875}, 2017 .lineWidthRange = {0.0, 8191.875}, 2018 .pointSizeGranularity = (1.0 / 8.0), 2019 .lineWidthGranularity = (1.0 / 8.0), 2020 .strictLines = false, /* FINISHME */ 2021 .standardSampleLocations = true, 2022 .optimalBufferCopyOffsetAlignment = 1, 2023 .optimalBufferCopyRowPitchAlignment = 1, 2024 .nonCoherentAtomSize = 64, 2025 }; 2026 2027 VkPhysicalDeviceType device_type; 2028 2029 if (pdevice->rad_info.has_dedicated_vram) { 2030 device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; 2031 } else { 2032 device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; 2033 } 2034 2035 *pProperties = (VkPhysicalDeviceProperties){ 2036 .apiVersion = RADV_API_VERSION, 2037 .driverVersion = vk_get_driver_version(), 2038 .vendorID = ATI_VENDOR_ID, 2039 .deviceID = pdevice->rad_info.pci_id, 2040 .deviceType = device_type, 2041 .limits = limits, 2042 .sparseProperties = 2043 { 2044 .residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10, 2045 .residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10, 2046 }, 2047 }; 2048 2049 strcpy(pProperties->deviceName, pdevice->marketing_name); 2050 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE); 2051} 2052 2053static void 2054radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice, 2055 VkPhysicalDeviceVulkan11Properties *p) 2056{ 2057 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES); 2058 2059 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); 2060 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); 2061 memset(p->deviceLUID, 0, VK_LUID_SIZE); 2062 /* The LUID is for Windows. */ 2063 p->deviceLUIDValid = false; 2064 p->deviceNodeMask = 0; 2065 2066 p->subgroupSize = RADV_SUBGROUP_SIZE; 2067 p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT; 2068 p->subgroupSupportedOperations = 2069 VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | 2070 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | 2071 VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | 2072 VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; 2073 p->subgroupQuadOperationsInAllStages = true; 2074 2075 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; 2076 p->maxMultiviewViewCount = MAX_VIEWS; 2077 p->maxMultiviewInstanceIndex = INT_MAX; 2078 p->protectedNoFault = false; 2079 p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS; 2080 p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE; 2081} 2082 2083static void 2084radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice, 2085 VkPhysicalDeviceVulkan12Properties *p) 2086{ 2087 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES); 2088 2089 p->driverID = VK_DRIVER_ID_MESA_RADV; 2090 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv"); 2091 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s", 2092 radv_get_compiler_string(pdevice)); 2093 2094 if (radv_is_conformant(pdevice)) { 2095 if (pdevice->rad_info.gfx_level >= GFX10_3) { 2096 p->conformanceVersion = (VkConformanceVersion){ 2097 .major = 1, 2098 .minor = 3, 2099 .subminor = 0, 2100 .patch = 0, 2101 }; 2102 } else { 2103 p->conformanceVersion = (VkConformanceVersion){ 2104 .major = 1, 2105 .minor = 2, 2106 .subminor = 7, 2107 .patch = 1, 2108 }; 2109 } 2110 } else { 2111 p->conformanceVersion = (VkConformanceVersion){ 2112 .major = 0, 2113 .minor = 0, 2114 .subminor = 0, 2115 .patch = 0, 2116 }; 2117 } 2118 2119 /* On AMD hardware, denormals and rounding modes for fp16/fp64 are 2120 * controlled by the same config register. 2121 */ 2122 if (pdevice->rad_info.has_packed_math_16bit) { 2123 p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY; 2124 p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY; 2125 } else { 2126 p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL; 2127 p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL; 2128 } 2129 2130 /* With LLVM, do not allow both preserving and flushing denorms because 2131 * different shaders in the same pipeline can have different settings and 2132 * this won't work for merged shaders. To make it work, this requires LLVM 2133 * support for changing the register. The same logic applies for the 2134 * rounding modes because they are configured with the same config 2135 * register. 2136 */ 2137 p->shaderDenormFlushToZeroFloat32 = true; 2138 p->shaderDenormPreserveFloat32 = !pdevice->use_llvm; 2139 p->shaderRoundingModeRTEFloat32 = true; 2140 p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm; 2141 p->shaderSignedZeroInfNanPreserveFloat32 = true; 2142 2143 p->shaderDenormFlushToZeroFloat16 = 2144 pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm; 2145 p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit; 2146 p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit; 2147 p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm; 2148 p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit; 2149 2150 p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm; 2151 p->shaderDenormPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8; 2152 p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.gfx_level >= GFX8; 2153 p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm; 2154 p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8; 2155 2156 p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64; 2157 p->shaderUniformBufferArrayNonUniformIndexingNative = false; 2158 p->shaderSampledImageArrayNonUniformIndexingNative = false; 2159 p->shaderStorageBufferArrayNonUniformIndexingNative = false; 2160 p->shaderStorageImageArrayNonUniformIndexingNative = false; 2161 p->shaderInputAttachmentArrayNonUniformIndexingNative = false; 2162 p->robustBufferAccessUpdateAfterBind = true; 2163 p->quadDivergentImplicitLod = false; 2164 2165 size_t max_descriptor_set_size = 2166 ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - 2167 MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) / 2168 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + 2169 32 /* storage buffer, 32 due to potential space wasted on alignment */ + 2170 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 2171 64 /* storage image */); 2172 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size; 2173 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size; 2174 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size; 2175 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size; 2176 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size; 2177 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size; 2178 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size; 2179 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size; 2180 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size; 2181 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS; 2182 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size; 2183 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS; 2184 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size; 2185 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size; 2186 p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size; 2187 2188 /* We support all of the depth resolve modes */ 2189 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | 2190 VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT | 2191 VK_RESOLVE_MODE_MAX_BIT; 2192 2193 /* Average doesn't make sense for stencil so we don't support that */ 2194 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | 2195 VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT; 2196 2197 p->independentResolveNone = true; 2198 p->independentResolve = true; 2199 2200 /* GFX6-8 only support single channel min/max filter. */ 2201 p->filterMinmaxImageComponentMapping = pdevice->rad_info.gfx_level >= GFX9; 2202 p->filterMinmaxSingleComponentFormats = true; 2203 2204 p->maxTimelineSemaphoreValueDifference = UINT64_MAX; 2205 2206 p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT; 2207} 2208 2209static void 2210radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice, 2211 VkPhysicalDeviceVulkan13Properties *p) 2212{ 2213 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES); 2214 2215 p->minSubgroupSize = 64; 2216 p->maxSubgroupSize = 64; 2217 p->maxComputeWorkgroupSubgroups = UINT32_MAX; 2218 p->requiredSubgroupSizeStages = 0; 2219 if (pdevice->rad_info.gfx_level >= GFX10) { 2220 /* Only GFX10+ supports wave32. */ 2221 p->minSubgroupSize = 32; 2222 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT; 2223 } 2224 2225 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE; 2226 p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; 2227 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS; 2228 p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; 2229 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT; 2230 p->maxInlineUniformTotalSize = UINT16_MAX; 2231 2232 bool accel = pdevice->rad_info.has_accelerated_dot_product; 2233 p->integerDotProduct8BitUnsignedAccelerated = accel; 2234 p->integerDotProduct8BitSignedAccelerated = accel; 2235 p->integerDotProduct8BitMixedSignednessAccelerated = false; 2236 p->integerDotProduct4x8BitPackedUnsignedAccelerated = accel; 2237 p->integerDotProduct4x8BitPackedSignedAccelerated = accel; 2238 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false; 2239 p->integerDotProduct16BitUnsignedAccelerated = accel; 2240 p->integerDotProduct16BitSignedAccelerated = accel; 2241 p->integerDotProduct16BitMixedSignednessAccelerated = false; 2242 p->integerDotProduct32BitUnsignedAccelerated = false; 2243 p->integerDotProduct32BitSignedAccelerated = false; 2244 p->integerDotProduct32BitMixedSignednessAccelerated = false; 2245 p->integerDotProduct64BitUnsignedAccelerated = false; 2246 p->integerDotProduct64BitSignedAccelerated = false; 2247 p->integerDotProduct64BitMixedSignednessAccelerated = false; 2248 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel; 2249 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel; 2250 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false; 2251 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel; 2252 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel; 2253 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false; 2254 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel; 2255 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel; 2256 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false; 2257 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false; 2258 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false; 2259 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false; 2260 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false; 2261 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false; 2262 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false; 2263 2264 p->storageTexelBufferOffsetAlignmentBytes = 4; 2265 p->storageTexelBufferOffsetSingleTexelAlignment = true; 2266 p->uniformTexelBufferOffsetAlignmentBytes = 4; 2267 p->uniformTexelBufferOffsetSingleTexelAlignment = true; 2268 2269 p->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE; 2270} 2271 2272VKAPI_ATTR void VKAPI_CALL 2273radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, 2274 VkPhysicalDeviceProperties2 *pProperties) 2275{ 2276 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 2277 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); 2278 2279 VkPhysicalDeviceVulkan11Properties core_1_1 = { 2280 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES, 2281 }; 2282 radv_get_physical_device_properties_1_1(pdevice, &core_1_1); 2283 2284 VkPhysicalDeviceVulkan12Properties core_1_2 = { 2285 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES, 2286 }; 2287 radv_get_physical_device_properties_1_2(pdevice, &core_1_2); 2288 2289 VkPhysicalDeviceVulkan13Properties core_1_3 = { 2290 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES, 2291 }; 2292 radv_get_physical_device_properties_1_3(pdevice, &core_1_3); 2293 2294 vk_foreach_struct(ext, pProperties->pNext) 2295 { 2296 if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1)) 2297 continue; 2298 if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2)) 2299 continue; 2300 if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3)) 2301 continue; 2302 2303 switch (ext->sType) { 2304 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { 2305 VkPhysicalDevicePushDescriptorPropertiesKHR *properties = 2306 (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext; 2307 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; 2308 break; 2309 } 2310 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: { 2311 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties = 2312 (VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext; 2313 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES; 2314 break; 2315 } 2316 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: { 2317 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties = 2318 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext; 2319 properties->minImportedHostPointerAlignment = 4096; 2320 break; 2321 } 2322 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: { 2323 VkPhysicalDeviceShaderCorePropertiesAMD *properties = 2324 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext; 2325 2326 /* Shader engines. */ 2327 properties->shaderEngineCount = pdevice->rad_info.max_se; 2328 properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se; 2329 properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa; 2330 properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit; 2331 properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd; 2332 properties->wavefrontSize = 64; 2333 2334 /* SGPR. */ 2335 properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd; 2336 properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc; 2337 properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc; 2338 properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity; 2339 2340 /* VGPR. */ 2341 properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd; 2342 properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc; 2343 properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc; 2344 properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity; 2345 break; 2346 } 2347 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: { 2348 VkPhysicalDeviceShaderCoreProperties2AMD *properties = 2349 (VkPhysicalDeviceShaderCoreProperties2AMD *)ext; 2350 2351 properties->shaderCoreFeatures = 0; 2352 properties->activeComputeUnitCount = pdevice->rad_info.num_cu; 2353 break; 2354 } 2355 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { 2356 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties = 2357 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; 2358 properties->maxVertexAttribDivisor = UINT32_MAX; 2359 break; 2360 } 2361 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: { 2362 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties = 2363 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext; 2364 properties->primitiveOverestimationSize = 0; 2365 properties->maxExtraPrimitiveOverestimationSize = 0; 2366 properties->extraPrimitiveOverestimationSizeGranularity = 0; 2367 properties->primitiveUnderestimation = false; 2368 properties->conservativePointAndLineRasterization = false; 2369 properties->degenerateTrianglesRasterized = true; 2370 properties->degenerateLinesRasterized = false; 2371 properties->fullyCoveredFragmentShaderInputVariable = false; 2372 properties->conservativeRasterizationPostDepthCoverage = false; 2373 break; 2374 } 2375#ifndef _WIN32 2376 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: { 2377 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties = 2378 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext; 2379 properties->pciDomain = pdevice->bus_info.domain; 2380 properties->pciBus = pdevice->bus_info.bus; 2381 properties->pciDevice = pdevice->bus_info.dev; 2382 properties->pciFunction = pdevice->bus_info.func; 2383 break; 2384 } 2385#endif 2386 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: { 2387 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties = 2388 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext; 2389 properties->maxTransformFeedbackStreams = MAX_SO_STREAMS; 2390 properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS; 2391 properties->maxTransformFeedbackBufferSize = UINT32_MAX; 2392 properties->maxTransformFeedbackStreamDataSize = 512; 2393 properties->maxTransformFeedbackBufferDataSize = 512; 2394 properties->maxTransformFeedbackBufferDataStride = 512; 2395 properties->transformFeedbackQueries = !pdevice->use_ngg_streamout; 2396 properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout; 2397 properties->transformFeedbackRasterizationStreamSelect = false; 2398 properties->transformFeedbackDraw = true; 2399 break; 2400 } 2401 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { 2402 VkPhysicalDeviceSampleLocationsPropertiesEXT *properties = 2403 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext; 2404 properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | 2405 VK_SAMPLE_COUNT_8_BIT; 2406 properties->maxSampleLocationGridSize = (VkExtent2D){2, 2}; 2407 properties->sampleLocationCoordinateRange[0] = 0.0f; 2408 properties->sampleLocationCoordinateRange[1] = 0.9375f; 2409 properties->sampleLocationSubPixelBits = 4; 2410 properties->variableSampleLocations = false; 2411 break; 2412 } 2413 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: { 2414 VkPhysicalDeviceLineRasterizationPropertiesEXT *props = 2415 (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext; 2416 props->lineSubPixelPrecisionBits = 4; 2417 break; 2418 } 2419 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { 2420 VkPhysicalDeviceRobustness2PropertiesEXT *properties = 2421 (VkPhysicalDeviceRobustness2PropertiesEXT *)ext; 2422 properties->robustStorageBufferAccessSizeAlignment = 4; 2423 properties->robustUniformBufferAccessSizeAlignment = 4; 2424 break; 2425 } 2426 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: { 2427 VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = 2428 (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext; 2429 props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT; 2430 break; 2431 } 2432 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: { 2433 VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props = 2434 (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext; 2435 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8}; 2436 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8}; 2437 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1; 2438 props->primitiveFragmentShadingRateWithMultipleViewports = true; 2439 props->layeredShadingRateAttachments = false; /* TODO */ 2440 props->fragmentShadingRateNonTrivialCombinerOps = true; 2441 props->maxFragmentSize = (VkExtent2D){2, 2}; 2442 props->maxFragmentSizeAspectRatio = 2; 2443 props->maxFragmentShadingRateCoverageSamples = 32; 2444 props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT; 2445 props->fragmentShadingRateWithShaderDepthStencilWrites = false; 2446 props->fragmentShadingRateWithSampleMask = true; 2447 props->fragmentShadingRateWithShaderSampleMask = false; 2448 props->fragmentShadingRateWithConservativeRasterization = true; 2449 props->fragmentShadingRateWithFragmentShaderInterlock = false; 2450 props->fragmentShadingRateWithCustomSampleLocations = false; 2451 props->fragmentShadingRateStrictMultiplyCombiner = true; 2452 break; 2453 } 2454 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: { 2455 VkPhysicalDeviceProvokingVertexPropertiesEXT *props = 2456 (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext; 2457 props->provokingVertexModePerPipeline = true; 2458 props->transformFeedbackPreservesTriangleFanProvokingVertex = true; 2459 break; 2460 } 2461 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: { 2462 VkPhysicalDeviceAccelerationStructurePropertiesKHR *props = 2463 (VkPhysicalDeviceAccelerationStructurePropertiesKHR *)ext; 2464 props->maxGeometryCount = (1 << 24) - 1; 2465 props->maxInstanceCount = (1 << 24) - 1; 2466 props->maxPrimitiveCount = (1 << 29) - 1; 2467 props->maxPerStageDescriptorAccelerationStructures = 2468 pProperties->properties.limits.maxPerStageDescriptorStorageBuffers; 2469 props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = 2470 pProperties->properties.limits.maxPerStageDescriptorStorageBuffers; 2471 props->maxDescriptorSetAccelerationStructures = 2472 pProperties->properties.limits.maxDescriptorSetStorageBuffers; 2473 props->maxDescriptorSetUpdateAfterBindAccelerationStructures = 2474 pProperties->properties.limits.maxDescriptorSetStorageBuffers; 2475 props->minAccelerationStructureScratchOffsetAlignment = 128; 2476 break; 2477 } 2478#ifndef _WIN32 2479 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: { 2480 VkPhysicalDeviceDrmPropertiesEXT *props = (VkPhysicalDeviceDrmPropertiesEXT *)ext; 2481 if (pdevice->available_nodes & (1 << DRM_NODE_PRIMARY)) { 2482 props->hasPrimary = true; 2483 props->primaryMajor = (int64_t)major(pdevice->primary_devid); 2484 props->primaryMinor = (int64_t)minor(pdevice->primary_devid); 2485 } else { 2486 props->hasPrimary = false; 2487 } 2488 if (pdevice->available_nodes & (1 << DRM_NODE_RENDER)) { 2489 props->hasRender = true; 2490 props->renderMajor = (int64_t)major(pdevice->render_devid); 2491 props->renderMinor = (int64_t)minor(pdevice->render_devid); 2492 } else { 2493 props->hasRender = false; 2494 } 2495 break; 2496 } 2497#endif 2498 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: { 2499 VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext; 2500 props->maxMultiDrawCount = 2048; 2501 break; 2502 } 2503 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR: { 2504 VkPhysicalDeviceRayTracingPipelinePropertiesKHR *props = 2505 (VkPhysicalDeviceRayTracingPipelinePropertiesKHR *)ext; 2506 props->shaderGroupHandleSize = RADV_RT_HANDLE_SIZE; 2507 props->maxRayRecursionDepth = 31; /* Minimum allowed for DXR. */ 2508 props->maxShaderGroupStride = 16384; /* dummy */ 2509 props->shaderGroupBaseAlignment = 16; 2510 props->shaderGroupHandleCaptureReplaySize = 16; 2511 props->maxRayDispatchInvocationCount = 1024 * 1024 * 64; 2512 props->shaderGroupHandleAlignment = 16; 2513 props->maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE; 2514 break; 2515 } 2516 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: { 2517 VkPhysicalDeviceMaintenance4Properties *properties = 2518 (VkPhysicalDeviceMaintenance4Properties *)ext; 2519 properties->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE; 2520 break; 2521 } 2522 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: { 2523 VkPhysicalDeviceMeshShaderPropertiesNV *properties = 2524 (VkPhysicalDeviceMeshShaderPropertiesNV *)ext; 2525 2526 /* Task shader limitations: 2527 * Same as compute, because TS are compiled to CS. 2528 */ 2529 properties->maxDrawMeshTasksCount = 65535; 2530 properties->maxTaskTotalMemorySize = 65536; 2531 properties->maxTaskWorkGroupInvocations = 1024; 2532 properties->maxTaskWorkGroupSize[0] = 1024; 2533 properties->maxTaskWorkGroupSize[1] = 1024; 2534 properties->maxTaskWorkGroupSize[2] = 1024; 2535 properties->maxTaskOutputCount = 65535; 2536 2537 /* Mesh shader limitations: 2538 * Same as NGG, because MS are compiled to NGG. 2539 */ 2540 properties->maxMeshMultiviewViewCount = MAX_VIEWS; 2541 properties->maxMeshOutputPrimitives = 256; 2542 properties->maxMeshOutputVertices = 256; 2543 properties->maxMeshTotalMemorySize = 31 * 1024; /* Reserve 1K for prim indices, etc. */ 2544 properties->maxMeshWorkGroupInvocations = 256; 2545 properties->maxMeshWorkGroupSize[0] = 256; 2546 properties->maxMeshWorkGroupSize[1] = 256; 2547 properties->maxMeshWorkGroupSize[2] = 256; 2548 properties->meshOutputPerPrimitiveGranularity = 1; 2549 properties->meshOutputPerVertexGranularity = 1; 2550 2551 break; 2552 } 2553 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: { 2554 VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *properties = 2555 (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext; 2556 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == 2557 sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); 2558 memcpy(properties->shaderModuleIdentifierAlgorithmUUID, 2559 vk_shaderModuleIdentifierAlgorithmUUID, 2560 sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); 2561 break; 2562 } 2563 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: { 2564 VkPhysicalDevicePerformanceQueryPropertiesKHR *properties = 2565 (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext; 2566 properties->allowCommandBufferQueryCopies = false; 2567 break; 2568 } 2569 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_NV: { 2570 VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *properties = 2571 (VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV *)ext; 2572 properties->maxIndirectCommandsStreamCount = 1; 2573 properties->maxIndirectCommandsStreamStride = UINT32_MAX; 2574 properties->maxIndirectCommandsTokenCount = UINT32_MAX; 2575 properties->maxIndirectCommandsTokenOffset = UINT16_MAX; 2576 properties->minIndirectCommandsBufferOffsetAlignment = 4; 2577 properties->minSequencesCountBufferOffsetAlignment = 4; 2578 properties->minSequencesIndexBufferOffsetAlignment = 4; 2579 2580 /* Don't support even a shader group count = 1 until we support shader 2581 * overrides during pipeline creation. */ 2582 properties->maxGraphicsShaderGroupCount = 0; 2583 2584 properties->maxIndirectSequenceCount = UINT32_MAX; 2585 break; 2586 } 2587 default: 2588 break; 2589 } 2590 } 2591} 2592 2593static void 2594radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice, 2595 uint32_t *pCount, 2596 VkQueueFamilyProperties **pQueueFamilyProperties) 2597{ 2598 int num_queue_families = 1; 2599 int idx; 2600 if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 && 2601 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) 2602 num_queue_families++; 2603 2604 if (pQueueFamilyProperties == NULL) { 2605 *pCount = num_queue_families; 2606 return; 2607 } 2608 2609 if (!*pCount) 2610 return; 2611 2612 idx = 0; 2613 if (*pCount >= 1) { 2614 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ 2615 .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | 2616 VK_QUEUE_SPARSE_BINDING_BIT, 2617 .queueCount = 1, 2618 .timestampValidBits = 64, 2619 .minImageTransferGranularity = (VkExtent3D){1, 1, 1}, 2620 }; 2621 idx++; 2622 } 2623 2624 if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 && 2625 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { 2626 if (*pCount > idx) { 2627 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ 2628 .queueFlags = 2629 VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT, 2630 .queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues, 2631 .timestampValidBits = 64, 2632 .minImageTransferGranularity = (VkExtent3D){1, 1, 1}, 2633 }; 2634 idx++; 2635 } 2636 } 2637 *pCount = idx; 2638} 2639 2640static const VkQueueGlobalPriorityKHR radv_global_queue_priorities[] = { 2641 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR, 2642 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR, 2643 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR, 2644 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR, 2645}; 2646 2647VKAPI_ATTR void VKAPI_CALL 2648radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount, 2649 VkQueueFamilyProperties2 *pQueueFamilyProperties) 2650{ 2651 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 2652 if (!pQueueFamilyProperties) { 2653 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); 2654 return; 2655 } 2656 VkQueueFamilyProperties *properties[] = { 2657 &pQueueFamilyProperties[0].queueFamilyProperties, 2658 &pQueueFamilyProperties[1].queueFamilyProperties, 2659 &pQueueFamilyProperties[2].queueFamilyProperties, 2660 }; 2661 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); 2662 assert(*pCount <= 3); 2663 2664 for (uint32_t i = 0; i < *pCount; i++) { 2665 vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext) 2666 { 2667 switch (ext->sType) { 2668 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: { 2669 VkQueueFamilyGlobalPriorityPropertiesKHR *prop = 2670 (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext; 2671 STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_KHR); 2672 prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities); 2673 memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities)); 2674 break; 2675 } 2676 default: 2677 break; 2678 } 2679 } 2680 } 2681} 2682 2683static void 2684radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, 2685 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget) 2686{ 2687 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); 2688 VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties; 2689 2690 /* For all memory heaps, the computation of budget is as follow: 2691 * heap_budget = heap_size - global_heap_usage + app_heap_usage 2692 * 2693 * The Vulkan spec 1.1.97 says that the budget should include any 2694 * currently allocated device memory. 2695 * 2696 * Note that the application heap usages are not really accurate (eg. 2697 * in presence of shared buffers). 2698 */ 2699 if (!device->rad_info.has_dedicated_vram) { 2700 /* On APUs, the driver exposes fake heaps to the application because usually the carveout is 2701 * too small for games but the budgets need to be redistributed accordingly. 2702 */ 2703 2704 assert(device->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS)); 2705 assert(device->memory_properties.memoryHeaps[0].flags == 0); /* GTT */ 2706 assert(device->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT); 2707 uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1; 2708 2709 /* Get the visible VRAM/GTT heap sizes and internal usages. */ 2710 uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size; 2711 uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size; 2712 2713 uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) + 2714 device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); 2715 uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT); 2716 2717 /* Compute the total heap size, internal and system usage. */ 2718 uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size; 2719 uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage; 2720 uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + 2721 device->ws->query_value(device->ws, RADEON_GTT_USAGE); 2722 2723 uint64_t total_usage = MAX2(total_internal_usage, total_system_usage); 2724 2725 /* Compute the total free space that can be allocated for this process accross all heaps. */ 2726 uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage); 2727 2728 /* Compute the remaining visible VRAM size for this process. */ 2729 uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage); 2730 2731 /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap sizes, 2732 * and align down to the page size to be conservative. 2733 */ 2734 vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), 2735 device->rad_info.gart_page_size); 2736 uint64_t gtt_free_space = total_free_space - vram_vis_free_space; 2737 2738 memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage; 2739 memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage; 2740 memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage; 2741 memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage; 2742 } else { 2743 unsigned mask = device->heaps; 2744 unsigned heap = 0; 2745 while (mask) { 2746 uint64_t internal_usage = 0, system_usage = 0; 2747 unsigned type = 1u << u_bit_scan(&mask); 2748 2749 switch (type) { 2750 case RADV_HEAP_VRAM: 2751 internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); 2752 system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE); 2753 break; 2754 case RADV_HEAP_VRAM_VIS: 2755 internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS); 2756 if (!(device->heaps & RADV_HEAP_VRAM)) 2757 internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM); 2758 system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE); 2759 break; 2760 case RADV_HEAP_GTT: 2761 internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT); 2762 system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE); 2763 break; 2764 } 2765 2766 uint64_t total_usage = MAX2(internal_usage, system_usage); 2767 2768 uint64_t free_space = device->memory_properties.memoryHeaps[heap].size - 2769 MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage); 2770 memoryBudget->heapBudget[heap] = free_space + internal_usage; 2771 memoryBudget->heapUsage[heap] = internal_usage; 2772 ++heap; 2773 } 2774 2775 assert(heap == memory_properties->memoryHeapCount); 2776 } 2777 2778 /* The heapBudget and heapUsage values must be zero for array elements 2779 * greater than or equal to 2780 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. 2781 */ 2782 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) { 2783 memoryBudget->heapBudget[i] = 0; 2784 memoryBudget->heapUsage[i] = 0; 2785 } 2786} 2787 2788VKAPI_ATTR void VKAPI_CALL 2789radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, 2790 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) 2791{ 2792 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 2793 2794 pMemoryProperties->memoryProperties = pdevice->memory_properties; 2795 2796 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget = 2797 vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT); 2798 if (memory_budget) 2799 radv_get_memory_budget_properties(physicalDevice, memory_budget); 2800} 2801 2802VKAPI_ATTR VkResult VKAPI_CALL 2803radv_GetMemoryHostPointerPropertiesEXT( 2804 VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer, 2805 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) 2806{ 2807 RADV_FROM_HANDLE(radv_device, device, _device); 2808 2809 switch (handleType) { 2810 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: { 2811 const struct radv_physical_device *physical_device = device->physical_device; 2812 uint32_t memoryTypeBits = 0; 2813 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) { 2814 if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT && 2815 !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) { 2816 memoryTypeBits = (1 << i); 2817 break; 2818 } 2819 } 2820 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits; 2821 return VK_SUCCESS; 2822 } 2823 default: 2824 return VK_ERROR_INVALID_EXTERNAL_HANDLE; 2825 } 2826} 2827 2828static enum radeon_ctx_priority 2829radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoKHR *pObj) 2830{ 2831 /* Default to MEDIUM when a specific global priority isn't requested */ 2832 if (!pObj) 2833 return RADEON_CTX_PRIORITY_MEDIUM; 2834 2835 switch (pObj->globalPriority) { 2836 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR: 2837 return RADEON_CTX_PRIORITY_REALTIME; 2838 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR: 2839 return RADEON_CTX_PRIORITY_HIGH; 2840 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR: 2841 return RADEON_CTX_PRIORITY_MEDIUM; 2842 case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR: 2843 return RADEON_CTX_PRIORITY_LOW; 2844 default: 2845 unreachable("Illegal global priority value"); 2846 return RADEON_CTX_PRIORITY_INVALID; 2847 } 2848} 2849 2850int 2851radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx, 2852 const VkDeviceQueueCreateInfo *create_info, 2853 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority) 2854{ 2855 queue->device = device; 2856 queue->priority = radv_get_queue_global_priority(global_priority); 2857 queue->hw_ctx = device->hw_ctx[queue->priority]; 2858 queue->state.qf = vk_queue_to_radv(device->physical_device, create_info->queueFamilyIndex); 2859 2860 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx); 2861 if (result != VK_SUCCESS) 2862 return result; 2863 2864 queue->vk.driver_submit = radv_queue_submit; 2865 2866 return VK_SUCCESS; 2867} 2868 2869static void 2870radv_queue_state_finish(struct radv_queue_state *queue, struct radeon_winsys *ws) 2871{ 2872 if (queue->initial_full_flush_preamble_cs) 2873 ws->cs_destroy(queue->initial_full_flush_preamble_cs); 2874 if (queue->initial_preamble_cs) 2875 ws->cs_destroy(queue->initial_preamble_cs); 2876 if (queue->continue_preamble_cs) 2877 ws->cs_destroy(queue->continue_preamble_cs); 2878 if (queue->descriptor_bo) 2879 ws->buffer_destroy(ws, queue->descriptor_bo); 2880 if (queue->scratch_bo) 2881 ws->buffer_destroy(ws, queue->scratch_bo); 2882 if (queue->esgs_ring_bo) 2883 ws->buffer_destroy(ws, queue->esgs_ring_bo); 2884 if (queue->gsvs_ring_bo) 2885 ws->buffer_destroy(ws, queue->gsvs_ring_bo); 2886 if (queue->tess_rings_bo) 2887 ws->buffer_destroy(ws, queue->tess_rings_bo); 2888 if (queue->task_rings_bo) 2889 ws->buffer_destroy(ws, queue->task_rings_bo); 2890 if (queue->gds_bo) 2891 ws->buffer_destroy(ws, queue->gds_bo); 2892 if (queue->gds_oa_bo) 2893 ws->buffer_destroy(ws, queue->gds_oa_bo); 2894 if (queue->compute_scratch_bo) 2895 ws->buffer_destroy(ws, queue->compute_scratch_bo); 2896} 2897 2898static void 2899radv_queue_finish(struct radv_queue *queue) 2900{ 2901 if (queue->ace_internal_state) { 2902 /* Prevent double free */ 2903 queue->ace_internal_state->task_rings_bo = NULL; 2904 2905 /* Clean up the internal ACE queue state. */ 2906 radv_queue_state_finish(queue->ace_internal_state, queue->device->ws); 2907 free(queue->ace_internal_state); 2908 } 2909 2910 radv_queue_state_finish(&queue->state, queue->device->ws); 2911 vk_queue_finish(&queue->vk); 2912} 2913 2914static bool 2915radv_queue_init_ace_internal_state(struct radv_queue *queue) 2916{ 2917 if (queue->ace_internal_state) 2918 return true; 2919 2920 queue->ace_internal_state = calloc(1, sizeof(struct radv_queue_state)); 2921 if (!queue->ace_internal_state) 2922 return false; 2923 2924 queue->ace_internal_state->qf = RADV_QUEUE_COMPUTE; 2925 return true; 2926} 2927 2928static VkResult 2929radv_device_init_border_color(struct radv_device *device) 2930{ 2931 VkResult result; 2932 2933 result = device->ws->buffer_create( 2934 device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM, 2935 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING, 2936 RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo); 2937 2938 if (result != VK_SUCCESS) 2939 return vk_error(device, result); 2940 2941 result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true); 2942 if (result != VK_SUCCESS) 2943 return vk_error(device, result); 2944 2945 device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo); 2946 if (!device->border_color_data.colors_gpu_ptr) 2947 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 2948 mtx_init(&device->border_color_data.mutex, mtx_plain); 2949 2950 return VK_SUCCESS; 2951} 2952 2953static void 2954radv_device_finish_border_color(struct radv_device *device) 2955{ 2956 if (device->border_color_data.bo) { 2957 device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false); 2958 device->ws->buffer_destroy(device->ws, device->border_color_data.bo); 2959 2960 mtx_destroy(&device->border_color_data.mutex); 2961 } 2962} 2963 2964static VkResult 2965radv_device_init_vs_prologs(struct radv_device *device) 2966{ 2967 u_rwlock_init(&device->vs_prologs_lock); 2968 device->vs_prologs = _mesa_hash_table_create(NULL, &radv_hash_vs_prolog, &radv_cmp_vs_prolog); 2969 if (!device->vs_prologs) 2970 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 2971 2972 /* don't pre-compile prologs if we want to print them */ 2973 if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS) 2974 return VK_SUCCESS; 2975 2976 struct radv_vs_input_state state; 2977 state.nontrivial_divisors = 0; 2978 memset(state.offsets, 0, sizeof(state.offsets)); 2979 state.alpha_adjust_lo = 0; 2980 state.alpha_adjust_hi = 0; 2981 memset(state.formats, 0, sizeof(state.formats)); 2982 2983 struct radv_vs_prolog_key key; 2984 key.state = &state; 2985 key.misaligned_mask = 0; 2986 key.as_ls = false; 2987 key.is_ngg = device->physical_device->use_ngg; 2988 key.next_stage = MESA_SHADER_VERTEX; 2989 key.wave32 = device->physical_device->ge_wave_size == 32; 2990 2991 for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) { 2992 state.attribute_mask = BITFIELD_MASK(i); 2993 state.instance_rate_inputs = 0; 2994 2995 key.num_attributes = i; 2996 2997 device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key); 2998 if (!device->simple_vs_prologs[i - 1]) 2999 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 3000 } 3001 3002 unsigned idx = 0; 3003 for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) { 3004 state.attribute_mask = BITFIELD_MASK(num_attributes); 3005 3006 for (unsigned i = 0; i < num_attributes; i++) 3007 state.divisors[i] = 1; 3008 3009 for (unsigned count = 1; count <= num_attributes; count++) { 3010 for (unsigned start = 0; start <= (num_attributes - count); start++) { 3011 state.instance_rate_inputs = u_bit_consecutive(start, count); 3012 3013 key.num_attributes = num_attributes; 3014 3015 struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key); 3016 if (!prolog) 3017 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); 3018 3019 assert(idx == 3020 radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs)); 3021 device->instance_rate_vs_prologs[idx++] = prolog; 3022 } 3023 } 3024 } 3025 assert(idx == ARRAY_SIZE(device->instance_rate_vs_prologs)); 3026 3027 return VK_SUCCESS; 3028} 3029 3030static void 3031radv_device_finish_vs_prologs(struct radv_device *device) 3032{ 3033 if (device->vs_prologs) { 3034 hash_table_foreach(device->vs_prologs, entry) 3035 { 3036 free((void *)entry->key); 3037 radv_shader_part_destroy(device, entry->data); 3038 } 3039 _mesa_hash_table_destroy(device->vs_prologs, NULL); 3040 } 3041 3042 for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++) 3043 radv_shader_part_destroy(device, device->simple_vs_prologs[i]); 3044 3045 for (unsigned i = 0; i < ARRAY_SIZE(device->instance_rate_vs_prologs); i++) 3046 radv_shader_part_destroy(device, device->instance_rate_vs_prologs[i]); 3047} 3048 3049VkResult 3050radv_device_init_vrs_state(struct radv_device *device) 3051{ 3052 /* FIXME: 4k depth buffers should be large enough for now but we might want to adjust this 3053 * dynamically at some point. 3054 */ 3055 uint32_t width = 4096, height = 4096; 3056 VkDeviceMemory mem; 3057 VkBuffer buffer; 3058 VkResult result; 3059 VkImage image; 3060 3061 VkImageCreateInfo image_create_info = { 3062 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 3063 .imageType = VK_IMAGE_TYPE_2D, 3064 .format = VK_FORMAT_D16_UNORM, 3065 .extent = {width, height, 1}, 3066 .mipLevels = 1, 3067 .arrayLayers = 1, 3068 .samples = VK_SAMPLE_COUNT_1_BIT, 3069 .tiling = VK_IMAGE_TILING_OPTIMAL, 3070 .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 3071 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 3072 .queueFamilyIndexCount = 0, 3073 .pQueueFamilyIndices = NULL, 3074 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 3075 }; 3076 3077 result = radv_CreateImage(radv_device_to_handle(device), &image_create_info, 3078 &device->meta_state.alloc, &image); 3079 if (result != VK_SUCCESS) 3080 return result; 3081 3082 VkBufferCreateInfo buffer_create_info = { 3083 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 3084 .size = radv_image_from_handle(image)->planes[0].surface.meta_size, 3085 .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 3086 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 3087 }; 3088 3089 result = radv_CreateBuffer(radv_device_to_handle(device), &buffer_create_info, 3090 &device->meta_state.alloc, &buffer); 3091 if (result != VK_SUCCESS) 3092 goto fail_create; 3093 3094 VkBufferMemoryRequirementsInfo2 info = { 3095 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, 3096 .buffer = buffer, 3097 }; 3098 VkMemoryRequirements2 mem_req = { 3099 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, 3100 }; 3101 radv_GetBufferMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req); 3102 3103 VkMemoryAllocateInfo alloc_info = { 3104 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 3105 .allocationSize = mem_req.memoryRequirements.size, 3106 }; 3107 3108 result = radv_AllocateMemory(radv_device_to_handle(device), &alloc_info, 3109 &device->meta_state.alloc, &mem); 3110 if (result != VK_SUCCESS) 3111 goto fail_alloc; 3112 3113 VkBindBufferMemoryInfo bind_info = { 3114 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, 3115 .buffer = buffer, 3116 .memory = mem, 3117 .memoryOffset = 0 3118 }; 3119 3120 result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info); 3121 if (result != VK_SUCCESS) 3122 goto fail_bind; 3123 3124 device->vrs.image = radv_image_from_handle(image); 3125 device->vrs.buffer = radv_buffer_from_handle(buffer); 3126 device->vrs.mem = radv_device_memory_from_handle(mem); 3127 3128 return VK_SUCCESS; 3129 3130fail_bind: 3131 radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc); 3132fail_alloc: 3133 radv_DestroyBuffer(radv_device_to_handle(device), buffer, &device->meta_state.alloc); 3134fail_create: 3135 radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc); 3136 3137 return result; 3138} 3139 3140static void 3141radv_device_finish_vrs_image(struct radv_device *device) 3142{ 3143 if (!device->vrs.image) 3144 return; 3145 3146 radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem), 3147 &device->meta_state.alloc); 3148 radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer), 3149 &device->meta_state.alloc); 3150 radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image), 3151 &device->meta_state.alloc); 3152} 3153 3154static enum radv_force_vrs 3155radv_parse_vrs_rates(const char *str) 3156{ 3157 if (!strcmp(str, "2x2")) { 3158 return RADV_FORCE_VRS_2x2; 3159 } else if (!strcmp(str, "2x1")) { 3160 return RADV_FORCE_VRS_2x1; 3161 } else if (!strcmp(str, "1x2")) { 3162 return RADV_FORCE_VRS_1x2; 3163 } else if (!strcmp(str, "1x1")) { 3164 return RADV_FORCE_VRS_1x1; 3165 } 3166 3167 fprintf(stderr, "radv: Invalid VRS rates specified (valid values are 2x2, 2x1, 1x2 and 1x1)\n"); 3168 return RADV_FORCE_VRS_1x1; 3169} 3170 3171static const char * 3172radv_get_force_vrs_config_file(void) 3173{ 3174 return getenv("RADV_FORCE_VRS_CONFIG_FILE"); 3175} 3176 3177static enum radv_force_vrs 3178radv_parse_force_vrs_config_file(const char *config_file) 3179{ 3180 enum radv_force_vrs force_vrs = RADV_FORCE_VRS_1x1; 3181 char buf[4]; 3182 FILE *f; 3183 3184 f = fopen(config_file, "r"); 3185 if (!f) { 3186 fprintf(stderr, "radv: Can't open file: '%s'.\n", config_file); 3187 return force_vrs; 3188 } 3189 3190 if (fread(buf, sizeof(buf), 1, f) == 1) { 3191 buf[3] = '\0'; 3192 force_vrs = radv_parse_vrs_rates(buf); 3193 } 3194 3195 fclose(f); 3196 return force_vrs; 3197} 3198 3199#ifdef __linux__ 3200 3201#define BUF_LEN ((10 * (sizeof(struct inotify_event) + NAME_MAX + 1))) 3202 3203static int 3204radv_notifier_thread_run(void *data) 3205{ 3206 struct radv_device *device = data; 3207 struct radv_notifier *notifier = &device->notifier; 3208 char buf[BUF_LEN]; 3209 3210 while (!notifier->quit) { 3211 const char *file = radv_get_force_vrs_config_file(); 3212 struct timespec tm = { .tv_nsec = 100000000 }; /* 1OOms */ 3213 int length, i = 0; 3214 3215 length = read(notifier->fd, buf, BUF_LEN); 3216 while (i < length) { 3217 struct inotify_event *event = (struct inotify_event *)&buf[i]; 3218 3219 i += sizeof(struct inotify_event) + event->len; 3220 if (event->mask & IN_MODIFY || event->mask & IN_DELETE_SELF) { 3221 /* Sleep 100ms for editors that use a temporary file and delete the original. */ 3222 thrd_sleep(&tm, NULL); 3223 device->force_vrs = radv_parse_force_vrs_config_file(file); 3224 3225 fprintf(stderr, "radv: Updated the per-vertex VRS rate to '%d'.\n", device->force_vrs); 3226 3227 if (event->mask & IN_DELETE_SELF) { 3228 inotify_rm_watch(notifier->fd, notifier->watch); 3229 notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF); 3230 } 3231 } 3232 } 3233 3234 thrd_sleep(&tm, NULL); 3235 } 3236 3237 return 0; 3238} 3239 3240#endif 3241 3242static int 3243radv_device_init_notifier(struct radv_device *device) 3244{ 3245#ifndef __linux__ 3246 return true; 3247#else 3248 struct radv_notifier *notifier = &device->notifier; 3249 const char *file = radv_get_force_vrs_config_file(); 3250 int ret; 3251 3252 notifier->fd = inotify_init1(IN_NONBLOCK); 3253 if (notifier->fd < 0) 3254 return false; 3255 3256 notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF); 3257 if (notifier->watch < 0) 3258 goto fail_watch; 3259 3260 ret = thrd_create(¬ifier->thread, radv_notifier_thread_run, device); 3261 if (ret) 3262 goto fail_thread; 3263 3264 return true; 3265 3266fail_thread: 3267 inotify_rm_watch(notifier->fd, notifier->watch); 3268fail_watch: 3269 close(notifier->fd); 3270 3271 return false; 3272#endif 3273} 3274 3275static void 3276radv_device_finish_notifier(struct radv_device *device) 3277{ 3278#ifdef __linux__ 3279 struct radv_notifier *notifier = &device->notifier; 3280 3281 if (!notifier->thread) 3282 return; 3283 3284 notifier->quit = true; 3285 thrd_join(notifier->thread, NULL); 3286 inotify_rm_watch(notifier->fd, notifier->watch); 3287 close(notifier->fd); 3288#endif 3289} 3290 3291static void 3292radv_device_finish_perf_counter_lock_cs(struct radv_device *device) 3293{ 3294 if (!device->perf_counter_lock_cs) 3295 return; 3296 3297 for (unsigned i = 0; i < 2 * PERF_CTR_MAX_PASSES; ++i) { 3298 if (device->perf_counter_lock_cs[i]) 3299 device->ws->cs_destroy(device->perf_counter_lock_cs[i]); 3300 } 3301 3302 free(device->perf_counter_lock_cs); 3303} 3304 3305VKAPI_ATTR VkResult VKAPI_CALL 3306radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, 3307 const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) 3308{ 3309 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); 3310 VkResult result; 3311 struct radv_device *device; 3312 3313 bool keep_shader_info = false; 3314 bool robust_buffer_access = false; 3315 bool robust_buffer_access2 = false; 3316 bool overallocation_disallowed = false; 3317 bool custom_border_colors = false; 3318 bool attachment_vrs_enabled = false; 3319 bool image_float32_atomics = false; 3320 bool vs_prologs = false; 3321 bool global_bo_list = false; 3322 bool image_2d_view_of_3d = false; 3323 bool primitives_generated_query = false; 3324 bool use_perf_counters = false; 3325 bool use_dgc = false; 3326 3327 /* Check enabled features */ 3328 if (pCreateInfo->pEnabledFeatures) { 3329 if (pCreateInfo->pEnabledFeatures->robustBufferAccess) 3330 robust_buffer_access = true; 3331 } 3332 3333 vk_foreach_struct_const(ext, pCreateInfo->pNext) 3334 { 3335 switch (ext->sType) { 3336 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: { 3337 const VkPhysicalDeviceFeatures2 *features = (const void *)ext; 3338 if (features->features.robustBufferAccess) 3339 robust_buffer_access = true; 3340 break; 3341 } 3342 case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: { 3343 const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext; 3344 if (overallocation->overallocationBehavior == 3345 VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD) 3346 overallocation_disallowed = true; 3347 break; 3348 } 3349 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { 3350 const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = 3351 (const void *)ext; 3352 custom_border_colors = border_color_features->customBorderColors; 3353 break; 3354 } 3355 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: { 3356 const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext; 3357 attachment_vrs_enabled = vrs->attachmentFragmentShadingRate; 3358 break; 3359 } 3360 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: { 3361 const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext; 3362 if (features->robustBufferAccess2) 3363 robust_buffer_access2 = true; 3364 break; 3365 } 3366 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: { 3367 const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (const void *)ext; 3368 if (features->shaderImageFloat32Atomics || 3369 features->sparseImageFloat32Atomics) 3370 image_float32_atomics = true; 3371 break; 3372 } 3373 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: { 3374 const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (const void *)ext; 3375 if (features->shaderImageFloat32AtomicMinMax || 3376 features->sparseImageFloat32AtomicMinMax) 3377 image_float32_atomics = true; 3378 break; 3379 } 3380 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: { 3381 const VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features = (const void *)ext; 3382 if (features->vertexInputDynamicState) 3383 vs_prologs = true; 3384 break; 3385 } 3386 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: { 3387 const VkPhysicalDeviceVulkan12Features *features = (const void *)ext; 3388 if (features->bufferDeviceAddress || features->descriptorIndexing) 3389 global_bo_list = true; 3390 break; 3391 } 3392 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: { 3393 const VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features = (const void *)ext; 3394 if (features->image2DViewOf3D) 3395 image_2d_view_of_3d = true; 3396 break; 3397 } 3398 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: { 3399 const VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features = (const void *)ext; 3400 if (features->primitivesGeneratedQuery || 3401 features->primitivesGeneratedQueryWithRasterizerDiscard || 3402 features->primitivesGeneratedQueryWithNonZeroStreams) 3403 primitives_generated_query = true; 3404 break; 3405 } 3406 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: { 3407 const VkPhysicalDevicePerformanceQueryFeaturesKHR *features = (const void *)ext; 3408 if (features->performanceCounterQueryPools) 3409 use_perf_counters = true; 3410 break; 3411 } 3412 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV: { 3413 const VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV *features = (const void *)ext; 3414 if (features->deviceGeneratedCommands) 3415 use_dgc = true; 3416 break; 3417 } 3418 default: 3419 break; 3420 } 3421 } 3422 3423 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8, 3424 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 3425 if (!device) 3426 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); 3427 3428 struct vk_device_dispatch_table dispatch_table; 3429 3430 if (physical_device->instance->vk.app_info.app_name && 3431 !strcmp(physical_device->instance->vk.app_info.app_name, "metroexodus")) { 3432 /* Metro Exodus (Linux native) calls vkGetSemaphoreCounterValue() with a NULL semaphore and it 3433 * crashes sometimes. Workaround this game bug by enabling an internal layer. Remove this 3434 * when the game is fixed. 3435 */ 3436 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &metro_exodus_device_entrypoints, true); 3437 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false); 3438 } else if (radv_thread_trace_enabled()) { 3439 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true); 3440 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false); 3441 } else { 3442 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true); 3443 } 3444 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_device_entrypoints, false); 3445 3446 result = 3447 vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator); 3448 if (result != VK_SUCCESS) { 3449 vk_free(&device->vk.alloc, device); 3450 return result; 3451 } 3452 3453 device->instance = physical_device->instance; 3454 device->physical_device = physical_device; 3455 simple_mtx_init(&device->trace_mtx, mtx_plain); 3456 simple_mtx_init(&device->pstate_mtx, mtx_plain); 3457 3458 device->ws = physical_device->ws; 3459 vk_device_set_drm_fd(&device->vk, device->ws->get_fd(device->ws)); 3460 3461 /* With update after bind we can't attach bo's to the command buffer 3462 * from the descriptor set anymore, so we have to use a global BO list. 3463 */ 3464 device->use_global_bo_list = global_bo_list || 3465 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) || 3466 device->vk.enabled_extensions.EXT_descriptor_indexing || 3467 device->vk.enabled_extensions.EXT_buffer_device_address || 3468 device->vk.enabled_extensions.KHR_buffer_device_address || 3469 device->vk.enabled_extensions.KHR_ray_tracing_pipeline || 3470 device->vk.enabled_extensions.KHR_acceleration_structure || 3471 device->vk.enabled_extensions.VALVE_descriptor_set_host_mapping; 3472 3473 device->robust_buffer_access = robust_buffer_access || robust_buffer_access2; 3474 device->robust_buffer_access2 = robust_buffer_access2; 3475 3476 device->attachment_vrs_enabled = attachment_vrs_enabled; 3477 3478 device->image_float32_atomics = image_float32_atomics; 3479 3480 device->image_2d_view_of_3d = image_2d_view_of_3d; 3481 3482 device->primitives_generated_query = primitives_generated_query; 3483 device->uses_device_generated_commands = use_dgc; 3484 3485 radv_init_shader_arenas(device); 3486 3487 device->overallocation_disallowed = overallocation_disallowed; 3488 mtx_init(&device->overallocation_mutex, mtx_plain); 3489 3490 /* Create one context per queue priority. */ 3491 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 3492 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; 3493 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority = 3494 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR); 3495 enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority); 3496 3497 if (device->hw_ctx[priority]) 3498 continue; 3499 3500 result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]); 3501 if (result != VK_SUCCESS) 3502 goto fail; 3503 } 3504 3505 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 3506 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; 3507 uint32_t qfi = queue_create->queueFamilyIndex; 3508 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority = 3509 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR); 3510 3511 device->queues[qfi] = 3512 vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8, 3513 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 3514 if (!device->queues[qfi]) { 3515 result = VK_ERROR_OUT_OF_HOST_MEMORY; 3516 goto fail; 3517 } 3518 3519 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue)); 3520 3521 device->queue_count[qfi] = queue_create->queueCount; 3522 3523 for (unsigned q = 0; q < queue_create->queueCount; q++) { 3524 result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority); 3525 if (result != VK_SUCCESS) 3526 goto fail; 3527 } 3528 } 3529 device->private_sdma_queue = VK_NULL_HANDLE; 3530 3531 device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 && 3532 !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); 3533 3534 /* The maximum number of scratch waves. Scratch space isn't divided 3535 * evenly between CUs. The number is only a function of the number of CUs. 3536 * We can decrease the constant to decrease the scratch buffer size. 3537 * 3538 * sctx->scratch_waves must be >= the maximum possible size of 3539 * 1 threadgroup, so that the hw doesn't hang from being unable 3540 * to start any. 3541 * 3542 * The recommended value is 4 per CU at most. Higher numbers don't 3543 * bring much benefit, but they still occupy chip resources (think 3544 * async compute). I've seen ~2% performance difference between 4 and 32. 3545 */ 3546 uint32_t max_threads_per_block = 2048; 3547 device->scratch_waves = 3548 MAX2(32 * physical_device->rad_info.num_cu, max_threads_per_block / 64); 3549 3550 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); 3551 3552 if (device->physical_device->rad_info.gfx_level >= GFX7) { 3553 /* If the KMD allows it (there is a KMD hw register for it), 3554 * allow launching waves out-of-order. 3555 */ 3556 device->dispatch_initiator |= S_00B800_ORDER_MODE(1); 3557 } 3558 3559 /* Disable partial preemption for task shaders. 3560 * The kernel may not support preemption, but PAL always sets this bit, 3561 * so let's also set it here for consistency. 3562 */ 3563 device->dispatch_initiator_task = 3564 device->dispatch_initiator | S_00B800_DISABLE_DISP_PREMPT_EN(1); 3565 3566 if (device->instance->debug_flags & RADV_DEBUG_HANG) { 3567 /* Enable GPU hangs detection and dump logs if a GPU hang is 3568 * detected. 3569 */ 3570 keep_shader_info = true; 3571 3572 if (!radv_init_trace(device)) 3573 goto fail; 3574 3575 fprintf(stderr, 3576 "*****************************************************************************\n"); 3577 fprintf(stderr, 3578 "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n"); 3579 fprintf(stderr, 3580 "*****************************************************************************\n"); 3581 3582 /* Wait for idle after every draw/dispatch to identify the 3583 * first bad call. 3584 */ 3585 device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS; 3586 3587 radv_dump_enabled_options(device, stderr); 3588 } 3589 3590 if (radv_thread_trace_enabled()) { 3591 if (device->physical_device->rad_info.gfx_level < GFX8 || 3592 device->physical_device->rad_info.gfx_level > GFX10_3) { 3593 fprintf(stderr, "GPU hardware not supported: refer to " 3594 "the RGP documentation for the list of " 3595 "supported GPUs!\n"); 3596 abort(); 3597 } 3598 3599 if (!radv_thread_trace_init(device)) 3600 goto fail; 3601 3602 fprintf(stderr, "radv: Thread trace support is enabled (initial buffer size: %u MiB, " 3603 "instruction timing: %s, cache counters: %s).\n", 3604 device->thread_trace.buffer_size / (1024 * 1024), 3605 radv_is_instruction_timing_enabled() ? "enabled" : "disabled", 3606 radv_spm_trace_enabled() ? "enabled" : "disabled"); 3607 3608 if (radv_spm_trace_enabled()) { 3609 if (device->physical_device->rad_info.gfx_level >= GFX10) { 3610 if (!radv_spm_init(device)) 3611 goto fail; 3612 } else { 3613 fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", 3614 device->physical_device->name); 3615 } 3616 } 3617 } 3618 3619 if (getenv("RADV_TRAP_HANDLER")) { 3620 /* TODO: Add support for more hardware. */ 3621 assert(device->physical_device->rad_info.gfx_level == GFX8); 3622 3623 fprintf(stderr, "**********************************************************************\n"); 3624 fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n"); 3625 fprintf(stderr, "**********************************************************************\n"); 3626 3627 /* To get the disassembly of the faulty shaders, we have to 3628 * keep some shader info around. 3629 */ 3630 keep_shader_info = true; 3631 3632 if (!radv_trap_handler_init(device)) 3633 goto fail; 3634 } 3635 3636 if (device->physical_device->rad_info.gfx_level >= GFX10_3) { 3637 if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) { 3638 const char *file = radv_get_force_vrs_config_file(); 3639 3640 device->force_vrs = radv_parse_force_vrs_config_file(file); 3641 3642 if (radv_device_init_notifier(device)) { 3643 device->force_vrs_enabled = true; 3644 } else { 3645 fprintf(stderr, "radv: Failed to initialize the notifier for RADV_FORCE_VRS_CONFIG_FILE!\n"); 3646 } 3647 } else if (getenv("RADV_FORCE_VRS")) { 3648 const char *vrs_rates = getenv("RADV_FORCE_VRS"); 3649 3650 device->force_vrs = radv_parse_vrs_rates(vrs_rates); 3651 device->force_vrs_enabled = device->force_vrs != RADV_FORCE_VRS_1x1; 3652 } 3653 } 3654 3655 /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */ 3656 device->load_grid_size_from_user_sgpr = device->physical_device->rad_info.gfx_level >= GFX10_3; 3657 3658 device->keep_shader_info = keep_shader_info; 3659 result = radv_device_init_meta(device); 3660 if (result != VK_SUCCESS) 3661 goto fail; 3662 3663 radv_device_init_msaa(device); 3664 3665 /* If the border color extension is enabled, let's create the buffer we need. */ 3666 if (custom_border_colors) { 3667 result = radv_device_init_border_color(device); 3668 if (result != VK_SUCCESS) 3669 goto fail; 3670 } 3671 3672 if (vs_prologs) { 3673 result = radv_device_init_vs_prologs(device); 3674 if (result != VK_SUCCESS) 3675 goto fail; 3676 } 3677 3678 if (device->physical_device->rad_info.gfx_level >= GFX7) 3679 cik_create_gfx_config(device); 3680 3681 VkPipelineCacheCreateInfo ci; 3682 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; 3683 ci.pNext = NULL; 3684 ci.flags = 0; 3685 ci.pInitialData = NULL; 3686 ci.initialDataSize = 0; 3687 VkPipelineCache pc; 3688 result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc); 3689 if (result != VK_SUCCESS) 3690 goto fail_meta; 3691 3692 device->mem_cache = radv_pipeline_cache_from_handle(pc); 3693 3694 device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1)); 3695 if (device->force_aniso >= 0) { 3696 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 3697 1 << util_logbase2(device->force_aniso)); 3698 } 3699 3700 if (use_perf_counters) { 3701 size_t bo_size = PERF_CTR_BO_PASS_OFFSET + sizeof(uint64_t) * PERF_CTR_MAX_PASSES; 3702 result = 3703 device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT, 3704 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, 3705 RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo); 3706 if (result != VK_SUCCESS) 3707 goto fail_cache; 3708 3709 device->perf_counter_lock_cs = 3710 calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES); 3711 if (!device->perf_counter_lock_cs) { 3712 result = VK_ERROR_OUT_OF_HOST_MEMORY; 3713 goto fail_cache; 3714 } 3715 3716 if (!device->physical_device->ac_perfcounters.blocks) { 3717 result = VK_ERROR_INITIALIZATION_FAILED; 3718 goto fail_cache; 3719 } 3720 } 3721 3722 *pDevice = radv_device_to_handle(device); 3723 return VK_SUCCESS; 3724 3725fail_cache: 3726 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); 3727fail_meta: 3728 radv_device_finish_meta(device); 3729fail: 3730 radv_thread_trace_finish(device); 3731 3732 radv_spm_finish(device); 3733 3734 radv_trap_handler_finish(device); 3735 radv_finish_trace(device); 3736 3737 radv_device_finish_perf_counter_lock_cs(device); 3738 if (device->perf_counter_bo) 3739 device->ws->buffer_destroy(device->ws, device->perf_counter_bo); 3740 if (device->gfx_init) 3741 device->ws->buffer_destroy(device->ws, device->gfx_init); 3742 3743 radv_device_finish_notifier(device); 3744 radv_device_finish_vs_prologs(device); 3745 radv_device_finish_border_color(device); 3746 3747 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 3748 for (unsigned q = 0; q < device->queue_count[i]; q++) 3749 radv_queue_finish(&device->queues[i][q]); 3750 if (device->queue_count[i]) 3751 vk_free(&device->vk.alloc, device->queues[i]); 3752 } 3753 3754 for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) { 3755 if (device->hw_ctx[i]) 3756 device->ws->ctx_destroy(device->hw_ctx[i]); 3757 } 3758 3759 simple_mtx_destroy(&device->pstate_mtx); 3760 simple_mtx_destroy(&device->trace_mtx); 3761 mtx_destroy(&device->overallocation_mutex); 3762 3763 vk_device_finish(&device->vk); 3764 vk_free(&device->vk.alloc, device); 3765 return result; 3766} 3767 3768VKAPI_ATTR void VKAPI_CALL 3769radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) 3770{ 3771 RADV_FROM_HANDLE(radv_device, device, _device); 3772 3773 if (!device) 3774 return; 3775 3776 radv_device_finish_perf_counter_lock_cs(device); 3777 if (device->perf_counter_bo) 3778 device->ws->buffer_destroy(device->ws, device->perf_counter_bo); 3779 3780 if (device->gfx_init) 3781 device->ws->buffer_destroy(device->ws, device->gfx_init); 3782 3783 radv_device_finish_notifier(device); 3784 radv_device_finish_vs_prologs(device); 3785 radv_device_finish_border_color(device); 3786 radv_device_finish_vrs_image(device); 3787 3788 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 3789 for (unsigned q = 0; q < device->queue_count[i]; q++) 3790 radv_queue_finish(&device->queues[i][q]); 3791 if (device->queue_count[i]) 3792 vk_free(&device->vk.alloc, device->queues[i]); 3793 } 3794 if (device->private_sdma_queue != VK_NULL_HANDLE) { 3795 radv_queue_finish(device->private_sdma_queue); 3796 vk_free(&device->vk.alloc, device->private_sdma_queue); 3797 } 3798 3799 for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) { 3800 if (device->hw_ctx[i]) 3801 device->ws->ctx_destroy(device->hw_ctx[i]); 3802 } 3803 3804 mtx_destroy(&device->overallocation_mutex); 3805 simple_mtx_destroy(&device->pstate_mtx); 3806 simple_mtx_destroy(&device->trace_mtx); 3807 3808 radv_device_finish_meta(device); 3809 3810 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache); 3811 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); 3812 3813 radv_trap_handler_finish(device); 3814 radv_finish_trace(device); 3815 3816 radv_destroy_shader_arenas(device); 3817 3818 radv_thread_trace_finish(device); 3819 3820 radv_spm_finish(device); 3821 3822 vk_device_finish(&device->vk); 3823 vk_free(&device->vk.alloc, device); 3824} 3825 3826VKAPI_ATTR VkResult VKAPI_CALL 3827radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties) 3828{ 3829 if (pProperties == NULL) { 3830 *pPropertyCount = 0; 3831 return VK_SUCCESS; 3832 } 3833 3834 /* None supported at this time */ 3835 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 3836} 3837 3838VKAPI_ATTR VkResult VKAPI_CALL 3839radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount, 3840 VkLayerProperties *pProperties) 3841{ 3842 if (pProperties == NULL) { 3843 *pPropertyCount = 0; 3844 return VK_SUCCESS; 3845 } 3846 3847 /* None supported at this time */ 3848 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 3849} 3850 3851static void 3852radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sample_positions, 3853 uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo, 3854 uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, 3855 struct radeon_winsys_bo *tess_rings_bo, 3856 struct radeon_winsys_bo *task_rings_bo, 3857 struct radeon_winsys_bo *mesh_scratch_ring_bo) 3858{ 3859 uint32_t *desc = &map[4]; 3860 3861 if (esgs_ring_bo) { 3862 uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo); 3863 3864 /* stride 0, num records - size, add tid, swizzle, elsize4, 3865 index stride 64 */ 3866 desc[0] = esgs_va; 3867 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32); 3868 desc[2] = esgs_ring_size; 3869 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3870 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 3871 S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1); 3872 3873 if (device->physical_device->rad_info.gfx_level >= GFX11) 3874 desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(1); 3875 else 3876 desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1); 3877 3878 if (device->physical_device->rad_info.gfx_level >= GFX11) { 3879 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 3880 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 3881 } else if (device->physical_device->rad_info.gfx_level >= GFX10) { 3882 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 3883 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 3884 } else if (device->physical_device->rad_info.gfx_level >= GFX8) { 3885 /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ 3886 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 3887 S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); 3888 } else { 3889 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 3890 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1); 3891 } 3892 3893 /* GS entry for ES->GS ring */ 3894 /* stride 0, num records - size, elsize0, 3895 index stride 0 */ 3896 desc[4] = esgs_va; 3897 desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32); 3898 desc[6] = esgs_ring_size; 3899 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3900 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 3901 3902 if (device->physical_device->rad_info.gfx_level >= GFX11) { 3903 desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 3904 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 3905 } else if (device->physical_device->rad_info.gfx_level >= GFX10) { 3906 desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 3907 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 3908 } else { 3909 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 3910 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 3911 } 3912 } 3913 3914 desc += 8; 3915 3916 if (gsvs_ring_bo) { 3917 uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo); 3918 3919 /* VS entry for GS->VS ring */ 3920 /* stride 0, num records - size, elsize0, 3921 index stride 0 */ 3922 desc[0] = gsvs_va; 3923 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32); 3924 desc[2] = gsvs_ring_size; 3925 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3926 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 3927 3928 if (device->physical_device->rad_info.gfx_level >= GFX11) { 3929 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 3930 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 3931 } else if (device->physical_device->rad_info.gfx_level >= GFX10) { 3932 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 3933 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 3934 } else { 3935 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 3936 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 3937 } 3938 3939 /* stride gsvs_itemsize, num records 64 3940 elsize 4, index stride 16 */ 3941 /* shader will patch stride and desc[2] */ 3942 desc[4] = gsvs_va; 3943 desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32); 3944 desc[6] = 0; 3945 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3946 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 3947 S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true); 3948 3949 if (device->physical_device->rad_info.gfx_level >= GFX11) 3950 desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX11(1); 3951 else 3952 desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1); 3953 3954 if (device->physical_device->rad_info.gfx_level >= GFX11) { 3955 desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 3956 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 3957 } else if (device->physical_device->rad_info.gfx_level >= GFX10) { 3958 desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 3959 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 3960 } else if (device->physical_device->rad_info.gfx_level >= GFX8) { 3961 /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ 3962 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 3963 S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); 3964 } else { 3965 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 3966 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1); 3967 } 3968 } 3969 3970 desc += 8; 3971 3972 if (tess_rings_bo) { 3973 uint64_t tess_va = radv_buffer_get_va(tess_rings_bo); 3974 uint64_t tess_offchip_va = tess_va + device->physical_device->hs.tess_offchip_ring_offset; 3975 3976 desc[0] = tess_va; 3977 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32); 3978 desc[2] = device->physical_device->hs.tess_factor_ring_size; 3979 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3980 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 3981 3982 if (device->physical_device->rad_info.gfx_level >= GFX11) { 3983 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 3984 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); 3985 } else if (device->physical_device->rad_info.gfx_level >= GFX10) { 3986 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 3987 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); 3988 } else { 3989 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 3990 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 3991 } 3992 3993 desc[4] = tess_offchip_va; 3994 desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32); 3995 desc[6] = device->physical_device->hs.tess_offchip_ring_size; 3996 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3997 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 3998 3999 if (device->physical_device->rad_info.gfx_level >= GFX11) { 4000 desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 4001 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); 4002 } else if (device->physical_device->rad_info.gfx_level >= GFX10) { 4003 desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 4004 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); 4005 } else { 4006 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 4007 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 4008 } 4009 } 4010 4011 desc += 8; 4012 4013 if (task_rings_bo) { 4014 uint64_t task_va = radv_buffer_get_va(task_rings_bo); 4015 uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset; 4016 uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset; 4017 4018 desc[0] = task_draw_ring_va; 4019 desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32); 4020 desc[2] = device->physical_device->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES; 4021 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 4022 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 4023 4024 if (device->physical_device->rad_info.gfx_level >= GFX11) { 4025 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | 4026 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 4027 } else { 4028 assert(device->physical_device->rad_info.gfx_level >= GFX10_3); 4029 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | 4030 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 4031 } 4032 4033 desc[4] = task_payload_ring_va; 4034 desc[5] = S_008F04_BASE_ADDRESS_HI(task_payload_ring_va >> 32); 4035 desc[6] = device->physical_device->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES; 4036 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 4037 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 4038 4039 if (device->physical_device->rad_info.gfx_level >= GFX11) { 4040 desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | 4041 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 4042 } else { 4043 assert(device->physical_device->rad_info.gfx_level >= GFX10_3); 4044 desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | 4045 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 4046 } 4047 } 4048 4049 desc += 8; 4050 4051 if (mesh_scratch_ring_bo) { 4052 uint64_t va = radv_buffer_get_va(mesh_scratch_ring_bo); 4053 4054 desc[0] = va; 4055 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); 4056 desc[2] = RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES; 4057 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 4058 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 4059 4060 if (device->physical_device->rad_info.gfx_level >= GFX11) { 4061 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | 4062 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 4063 } else { 4064 assert(device->physical_device->rad_info.gfx_level >= GFX10_3); 4065 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | 4066 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 4067 } 4068 } 4069 4070 desc += 4; 4071 4072 if (add_sample_positions) { 4073 /* add sample positions after all rings */ 4074 memcpy(desc, device->sample_locations_1x, 8); 4075 desc += 2; 4076 memcpy(desc, device->sample_locations_2x, 16); 4077 desc += 4; 4078 memcpy(desc, device->sample_locations_4x, 32); 4079 desc += 8; 4080 memcpy(desc, device->sample_locations_8x, 64); 4081 } 4082} 4083 4084static void 4085radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, 4086 struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size, 4087 struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size) 4088{ 4089 if (!esgs_ring_bo && !gsvs_ring_bo) 4090 return; 4091 4092 if (esgs_ring_bo) 4093 radv_cs_add_buffer(device->ws, cs, esgs_ring_bo); 4094 4095 if (gsvs_ring_bo) 4096 radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo); 4097 4098 if (device->physical_device->rad_info.gfx_level >= GFX7) { 4099 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); 4100 radeon_emit(cs, esgs_ring_size >> 8); 4101 radeon_emit(cs, gsvs_ring_size >> 8); 4102 } else { 4103 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2); 4104 radeon_emit(cs, esgs_ring_size >> 8); 4105 radeon_emit(cs, gsvs_ring_size >> 8); 4106 } 4107} 4108 4109static void 4110radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, 4111 struct radeon_winsys_bo *tess_rings_bo) 4112{ 4113 uint64_t tf_va; 4114 uint32_t tf_ring_size; 4115 if (!tess_rings_bo) 4116 return; 4117 4118 tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4; 4119 tf_va = radv_buffer_get_va(tess_rings_bo); 4120 4121 radv_cs_add_buffer(device->ws, cs, tess_rings_bo); 4122 4123 if (device->physical_device->rad_info.gfx_level >= GFX7) { 4124 if (device->physical_device->rad_info.gfx_level >= GFX11) { 4125 /* TF_RING_SIZE is per SE on GFX11. */ 4126 tf_ring_size /= device->physical_device->rad_info.max_se; 4127 } 4128 4129 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size)); 4130 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8); 4131 4132 if (device->physical_device->rad_info.gfx_level >= GFX10) { 4133 radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI, 4134 S_030984_BASE_HI(tf_va >> 40)); 4135 } else if (device->physical_device->rad_info.gfx_level == GFX9) { 4136 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40)); 4137 } 4138 4139 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); 4140 } else { 4141 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size)); 4142 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8); 4143 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); 4144 } 4145} 4146 4147static VkResult 4148radv_initialise_task_control_buffer(struct radv_device *device, 4149 struct radeon_winsys_bo *task_rings_bo) 4150{ 4151 uint32_t *ptr = (uint32_t *)device->ws->buffer_map(task_rings_bo); 4152 if (!ptr) 4153 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 4154 4155 const uint32_t num_entries = device->physical_device->task_info.num_entries; 4156 const uint64_t task_va = radv_buffer_get_va(task_rings_bo); 4157 const uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset; 4158 assert((task_draw_ring_va & 0xFFFFFF00) == (task_draw_ring_va & 0xFFFFFFFF)); 4159 4160 /* 64-bit write_ptr */ 4161 ptr[0] = num_entries; 4162 ptr[1] = 0; 4163 /* 64-bit read_ptr */ 4164 ptr[2] = num_entries; 4165 ptr[3] = 0; 4166 /* 64-bit dealloc_ptr */ 4167 ptr[4] = num_entries; 4168 ptr[5] = 0; 4169 /* num_entries */ 4170 ptr[6] = num_entries; 4171 /* 64-bit draw ring address */ 4172 ptr[7] = task_draw_ring_va; 4173 ptr[8] = task_draw_ring_va >> 32; 4174 4175 device->ws->buffer_unmap(task_rings_bo); 4176 return VK_SUCCESS; 4177} 4178 4179static void 4180radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs, 4181 struct radeon_winsys_bo *task_rings_bo, bool compute) 4182{ 4183 if (!task_rings_bo) 4184 return; 4185 4186 const uint64_t task_ctrlbuf_va = radv_buffer_get_va(task_rings_bo); 4187 assert(radv_is_aligned(task_ctrlbuf_va, 256)); 4188 radv_cs_add_buffer(device->ws, cs, task_rings_bo); 4189 4190 /* Tell the GPU where the task control buffer is. */ 4191 radeon_emit(cs, PKT3(PKT3_DISPATCH_TASK_STATE_INIT, 1, 0) | PKT3_SHADER_TYPE_S(!!compute)); 4192 /* bits [31:8]: control buffer address lo, bits[7:0]: reserved (set to zero) */ 4193 radeon_emit(cs, task_ctrlbuf_va & 0xFFFFFF00); 4194 /* bits [31:0]: control buffer address hi */ 4195 radeon_emit(cs, task_ctrlbuf_va >> 32); 4196} 4197 4198static void 4199radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, 4200 uint32_t size_per_wave, uint32_t waves, 4201 struct radeon_winsys_bo *scratch_bo) 4202{ 4203 struct radeon_info *info = &device->physical_device->rad_info; 4204 4205 if (!scratch_bo) 4206 return; 4207 4208 radv_cs_add_buffer(device->ws, cs, scratch_bo); 4209 4210 if (info->gfx_level >= GFX11) { 4211 uint64_t va = radv_buffer_get_va(scratch_bo); 4212 4213 /* WAVES is per SE for SPI_TMPRING_SIZE. */ 4214 waves /= info->num_se; 4215 4216 radeon_set_context_reg_seq(cs, R_0286E8_SPI_TMPRING_SIZE, 3); 4217 radeon_emit(cs, S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 256))); 4218 radeon_emit(cs, va >> 8); /* SPI_GFX_SCRATCH_BASE_LO */ 4219 radeon_emit(cs, va >> 40); /* SPI_GFX_SCRATCH_BASE_HI */ 4220 } else { 4221 radeon_set_context_reg( 4222 cs, R_0286E8_SPI_TMPRING_SIZE, 4223 S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024))); 4224 } 4225} 4226 4227static void 4228radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, 4229 uint32_t size_per_wave, uint32_t waves, 4230 struct radeon_winsys_bo *compute_scratch_bo) 4231{ 4232 struct radeon_info *info = &device->physical_device->rad_info; 4233 uint64_t scratch_va; 4234 uint32_t rsrc1; 4235 4236 if (!compute_scratch_bo) 4237 return; 4238 4239 scratch_va = radv_buffer_get_va(compute_scratch_bo); 4240 rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32); 4241 4242 if (device->physical_device->rad_info.gfx_level >= GFX11) 4243 rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1); 4244 else 4245 rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1); 4246 4247 radv_cs_add_buffer(device->ws, cs, compute_scratch_bo); 4248 4249 if (info->gfx_level >= GFX11) { 4250 radeon_set_sh_reg_seq(cs, R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, 2); 4251 radeon_emit(cs, scratch_va >> 8); 4252 radeon_emit(cs, scratch_va >> 40); 4253 4254 waves /= info->num_se; 4255 } 4256 4257 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2); 4258 radeon_emit(cs, scratch_va); 4259 radeon_emit(cs, rsrc1); 4260 4261 radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE, 4262 S_00B860_WAVES(waves) | 4263 S_00B860_WAVESIZE(round_up_u32(size_per_wave, info->gfx_level >= GFX11 ? 256 : 1024))); 4264} 4265 4266static void 4267radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, 4268 struct radeon_winsys_bo *descriptor_bo) 4269{ 4270 if (!descriptor_bo) 4271 return; 4272 4273 uint64_t va = radv_buffer_get_va(descriptor_bo); 4274 radv_cs_add_buffer(device->ws, cs, descriptor_bo); 4275 4276 /* Compute shader user data 0-1 have the scratch pointer (unlike GFX shaders), 4277 * so emit the descriptor pointer to user data 2-3 instead (task_ring_offsets arg). 4278 */ 4279 radv_emit_shader_pointer(device, cs, R_00B908_COMPUTE_USER_DATA_2, va, true); 4280} 4281 4282static void 4283radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, 4284 struct radeon_winsys_bo *descriptor_bo) 4285{ 4286 uint64_t va; 4287 4288 if (!descriptor_bo) 4289 return; 4290 4291 va = radv_buffer_get_va(descriptor_bo); 4292 4293 radv_cs_add_buffer(device->ws, cs, descriptor_bo); 4294 4295 if (device->physical_device->rad_info.gfx_level >= GFX11) { 4296 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, 4297 R_00B420_SPI_SHADER_PGM_LO_HS, 4298 R_00B220_SPI_SHADER_PGM_LO_GS}; 4299 4300 for (int i = 0; i < ARRAY_SIZE(regs); ++i) { 4301 radv_emit_shader_pointer(device, cs, regs[i], va, true); 4302 } 4303 } else if (device->physical_device->rad_info.gfx_level >= GFX10) { 4304 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, 4305 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, 4306 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; 4307 4308 for (int i = 0; i < ARRAY_SIZE(regs); ++i) { 4309 radv_emit_shader_pointer(device, cs, regs[i], va, true); 4310 } 4311 } else if (device->physical_device->rad_info.gfx_level == GFX9) { 4312 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, 4313 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, 4314 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; 4315 4316 for (int i = 0; i < ARRAY_SIZE(regs); ++i) { 4317 radv_emit_shader_pointer(device, cs, regs[i], va, true); 4318 } 4319 } else { 4320 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, 4321 R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0, 4322 R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0}; 4323 4324 for (int i = 0; i < ARRAY_SIZE(regs); ++i) { 4325 radv_emit_shader_pointer(device, cs, regs[i], va, true); 4326 } 4327 } 4328} 4329 4330static void 4331radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_device *device) 4332{ 4333 if (device->gfx_init) { 4334 uint64_t va = radv_buffer_get_va(device->gfx_init); 4335 4336 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 4337 radeon_emit(cs, va); 4338 radeon_emit(cs, va >> 32); 4339 radeon_emit(cs, device->gfx_init_size_dw & 0xffff); 4340 4341 radv_cs_add_buffer(device->ws, cs, device->gfx_init); 4342 } else { 4343 si_emit_graphics(device, cs); 4344 } 4345} 4346 4347static void 4348radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_device *device) 4349{ 4350 si_emit_compute(device, cs); 4351} 4352 4353static VkResult 4354radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *device, 4355 const struct radv_queue_ring_info *needs) 4356{ 4357 struct radeon_winsys *ws = device->ws; 4358 struct radeon_winsys_bo *scratch_bo = queue->scratch_bo; 4359 struct radeon_winsys_bo *descriptor_bo = queue->descriptor_bo; 4360 struct radeon_winsys_bo *compute_scratch_bo = queue->compute_scratch_bo; 4361 struct radeon_winsys_bo *esgs_ring_bo = queue->esgs_ring_bo; 4362 struct radeon_winsys_bo *gsvs_ring_bo = queue->gsvs_ring_bo; 4363 struct radeon_winsys_bo *tess_rings_bo = queue->tess_rings_bo; 4364 struct radeon_winsys_bo *task_rings_bo = queue->task_rings_bo; 4365 struct radeon_winsys_bo *mesh_scratch_ring_bo = queue->mesh_scratch_ring_bo; 4366 struct radeon_winsys_bo *gds_bo = queue->gds_bo; 4367 struct radeon_winsys_bo *gds_oa_bo = queue->gds_oa_bo; 4368 struct radeon_cmdbuf *dest_cs[3] = {0}; 4369 const uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING; 4370 VkResult result = VK_SUCCESS; 4371 4372 const bool add_sample_positions = !queue->ring_info.sample_positions && needs->sample_positions; 4373 const uint32_t scratch_size = needs->scratch_size_per_wave * needs->scratch_waves; 4374 const uint32_t queue_scratch_size = 4375 queue->ring_info.scratch_size_per_wave * queue->ring_info.scratch_waves; 4376 4377 if (scratch_size > queue_scratch_size) { 4378 result = ws->buffer_create(ws, scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, 4379 RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo); 4380 if (result != VK_SUCCESS) 4381 goto fail; 4382 } 4383 4384 const uint32_t compute_scratch_size = 4385 needs->compute_scratch_size_per_wave * needs->compute_scratch_waves; 4386 const uint32_t compute_queue_scratch_size = 4387 queue->ring_info.compute_scratch_size_per_wave * queue->ring_info.compute_scratch_waves; 4388 if (compute_scratch_size > compute_queue_scratch_size) { 4389 result = ws->buffer_create(ws, compute_scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, 4390 RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo); 4391 if (result != VK_SUCCESS) 4392 goto fail; 4393 } 4394 4395 if (needs->esgs_ring_size > queue->ring_info.esgs_ring_size) { 4396 result = ws->buffer_create(ws, needs->esgs_ring_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, 4397 RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo); 4398 if (result != VK_SUCCESS) 4399 goto fail; 4400 } 4401 4402 if (needs->gsvs_ring_size > queue->ring_info.gsvs_ring_size) { 4403 result = ws->buffer_create(ws, needs->gsvs_ring_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags, 4404 RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo); 4405 if (result != VK_SUCCESS) 4406 goto fail; 4407 } 4408 4409 if (!queue->ring_info.tess_rings && needs->tess_rings) { 4410 result = ws->buffer_create( 4411 ws, device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size, 256, 4412 RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo); 4413 if (result != VK_SUCCESS) 4414 goto fail; 4415 } 4416 4417 if (!queue->ring_info.task_rings && needs->task_rings) { 4418 assert(device->physical_device->rad_info.gfx_level >= GFX10_3); 4419 4420 /* We write the control buffer from the CPU, so need to grant CPU access to the BO. 4421 * The draw ring needs to be zero-initialized otherwise the ready bits will be incorrect. 4422 */ 4423 uint32_t task_rings_bo_flags = 4424 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM; 4425 4426 result = ws->buffer_create(ws, device->physical_device->task_info.bo_size_bytes, 256, 4427 RADEON_DOMAIN_VRAM, task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH, 4428 0, &task_rings_bo); 4429 if (result != VK_SUCCESS) 4430 goto fail; 4431 4432 result = radv_initialise_task_control_buffer(device, task_rings_bo); 4433 if (result != VK_SUCCESS) 4434 goto fail; 4435 } 4436 4437 if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) { 4438 assert(device->physical_device->rad_info.gfx_level >= GFX10_3); 4439 result = 4440 ws->buffer_create(ws, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256, 4441 RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &mesh_scratch_ring_bo); 4442 4443 if (result != VK_SUCCESS) 4444 goto fail; 4445 } 4446 4447 if (!queue->ring_info.gds && needs->gds) { 4448 assert(device->physical_device->rad_info.gfx_level >= GFX10); 4449 4450 /* 4 streamout GDS counters. 4451 * We need 256B (64 dw) of GDS, otherwise streamout hangs. 4452 */ 4453 result = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, ring_bo_flags, 4454 RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo); 4455 if (result != VK_SUCCESS) 4456 goto fail; 4457 } 4458 4459 if (!queue->ring_info.gds_oa && needs->gds_oa) { 4460 assert(device->physical_device->rad_info.gfx_level >= GFX10); 4461 4462 result = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags, 4463 RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo); 4464 if (result != VK_SUCCESS) 4465 goto fail; 4466 } 4467 4468 /* Re-initialize the descriptor BO when any ring BOs changed. 4469 * 4470 * Additionally, make sure to create the descriptor BO for the compute queue 4471 * when it uses the task shader rings. The task rings BO is shared between the 4472 * GFX and compute queues and already initialized here. 4473 */ 4474 if ((queue->qf == RADV_QUEUE_COMPUTE && !descriptor_bo && task_rings_bo) || 4475 scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo || 4476 gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo || 4477 task_rings_bo != queue->task_rings_bo || mesh_scratch_ring_bo != queue->mesh_scratch_ring_bo || 4478 add_sample_positions) { 4479 uint32_t size = 0; 4480 if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || add_sample_positions) { 4481 size = 160; /* 2 dword + 2 padding + 4 dword * 9 */ 4482 if (add_sample_positions) 4483 size += 128; /* 64+32+16+8 = 120 bytes */ 4484 } else if (scratch_bo) { 4485 size = 8; /* 2 dword */ 4486 } 4487 4488 result = ws->buffer_create( 4489 ws, size, 4096, RADEON_DOMAIN_VRAM, 4490 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY, 4491 RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo); 4492 if (result != VK_SUCCESS) 4493 goto fail; 4494 } 4495 4496 if (descriptor_bo != queue->descriptor_bo) { 4497 uint32_t *map = (uint32_t *)ws->buffer_map(descriptor_bo); 4498 if (!map) 4499 goto fail; 4500 4501 if (scratch_bo) { 4502 uint64_t scratch_va = radv_buffer_get_va(scratch_bo); 4503 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32); 4504 4505 if (device->physical_device->rad_info.gfx_level >= GFX11) 4506 rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1); 4507 else 4508 rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1); 4509 4510 map[0] = scratch_va; 4511 map[1] = rsrc1; 4512 } 4513 4514 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || add_sample_positions) 4515 radv_fill_shader_rings(device, map, add_sample_positions, needs->esgs_ring_size, 4516 esgs_ring_bo, needs->gsvs_ring_size, gsvs_ring_bo, tess_rings_bo, 4517 task_rings_bo, mesh_scratch_ring_bo); 4518 4519 ws->buffer_unmap(descriptor_bo); 4520 } 4521 4522 for (int i = 0; i < 3; ++i) { 4523 /* Don't create continue preamble when it's not necessary. */ 4524 if (i == 2) { 4525 /* We only need the continue preamble when we can't use indirect buffers. */ 4526 if (!(device->instance->debug_flags & RADV_DEBUG_NO_IBS) && 4527 device->physical_device->rad_info.gfx_level >= GFX7) 4528 continue; 4529 /* Continue preamble is unnecessary when no shader rings are used. */ 4530 if (!needs->scratch_size_per_wave && !needs->compute_scratch_size_per_wave && 4531 !needs->esgs_ring_size && !needs->gsvs_ring_size && !needs->tess_rings && 4532 !needs->task_rings && !needs->mesh_scratch_ring && !needs->gds && !needs->gds_oa && !needs->sample_positions) 4533 continue; 4534 } 4535 4536 enum rgp_flush_bits sqtt_flush_bits = 0; 4537 struct radeon_cmdbuf *cs = NULL; 4538 cs = ws->cs_create(ws, radv_queue_family_to_ring(device->physical_device, queue->qf)); 4539 if (!cs) { 4540 result = VK_ERROR_OUT_OF_HOST_MEMORY; 4541 goto fail; 4542 } 4543 4544 dest_cs[i] = cs; 4545 4546 if (scratch_bo) 4547 radv_cs_add_buffer(ws, cs, scratch_bo); 4548 4549 /* Emit initial configuration. */ 4550 switch (queue->qf) { 4551 case RADV_QUEUE_GENERAL: 4552 radv_init_graphics_state(cs, device); 4553 4554 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo) { 4555 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 4556 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 4557 4558 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 4559 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); 4560 } 4561 4562 radv_emit_gs_ring_sizes(device, cs, esgs_ring_bo, needs->esgs_ring_size, gsvs_ring_bo, 4563 needs->gsvs_ring_size); 4564 radv_emit_tess_factor_ring(device, cs, tess_rings_bo); 4565 radv_emit_task_rings(device, cs, task_rings_bo, false); 4566 radv_emit_graphics_shader_pointers(device, cs, descriptor_bo); 4567 radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave, 4568 needs->compute_scratch_waves, compute_scratch_bo); 4569 radv_emit_graphics_scratch(device, cs, needs->scratch_size_per_wave, needs->scratch_waves, 4570 scratch_bo); 4571 break; 4572 case RADV_QUEUE_COMPUTE: 4573 radv_init_compute_state(cs, device); 4574 4575 if (task_rings_bo) { 4576 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 4577 radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); 4578 } 4579 4580 radv_emit_task_rings(device, cs, task_rings_bo, true); 4581 radv_emit_compute_shader_pointers(device, cs, descriptor_bo); 4582 radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave, 4583 needs->compute_scratch_waves, compute_scratch_bo); 4584 break; 4585 default: 4586 break; 4587 } 4588 4589 if (gds_bo) 4590 radv_cs_add_buffer(ws, cs, gds_bo); 4591 if (gds_oa_bo) 4592 radv_cs_add_buffer(ws, cs, gds_oa_bo); 4593 4594 if (i < 2) { 4595 /* The two initial preambles have a cache flush at the beginning. */ 4596 const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; 4597 const bool is_mec = queue->qf == RADV_QUEUE_COMPUTE && gfx_level >= GFX7; 4598 enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | 4599 RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 | 4600 RADV_CMD_FLAG_START_PIPELINE_STATS; 4601 4602 if (i == 0) { 4603 /* The full flush preamble should also wait for previous shader work to finish. */ 4604 flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 4605 if (queue->qf == RADV_QUEUE_GENERAL) 4606 flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; 4607 } 4608 4609 si_cs_emit_cache_flush(cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, 0); 4610 } 4611 4612 result = ws->cs_finalize(cs); 4613 if (result != VK_SUCCESS) 4614 goto fail; 4615 } 4616 4617 if (queue->initial_full_flush_preamble_cs) 4618 ws->cs_destroy(queue->initial_full_flush_preamble_cs); 4619 4620 if (queue->initial_preamble_cs) 4621 ws->cs_destroy(queue->initial_preamble_cs); 4622 4623 if (queue->continue_preamble_cs) 4624 ws->cs_destroy(queue->continue_preamble_cs); 4625 4626 queue->initial_full_flush_preamble_cs = dest_cs[0]; 4627 queue->initial_preamble_cs = dest_cs[1]; 4628 queue->continue_preamble_cs = dest_cs[2]; 4629 4630 if (scratch_bo != queue->scratch_bo) { 4631 if (queue->scratch_bo) 4632 ws->buffer_destroy(ws, queue->scratch_bo); 4633 queue->scratch_bo = scratch_bo; 4634 } 4635 4636 if (compute_scratch_bo != queue->compute_scratch_bo) { 4637 if (queue->compute_scratch_bo) 4638 ws->buffer_destroy(ws, queue->compute_scratch_bo); 4639 queue->compute_scratch_bo = compute_scratch_bo; 4640 } 4641 4642 if (esgs_ring_bo != queue->esgs_ring_bo) { 4643 if (queue->esgs_ring_bo) 4644 ws->buffer_destroy(ws, queue->esgs_ring_bo); 4645 queue->esgs_ring_bo = esgs_ring_bo; 4646 } 4647 4648 if (gsvs_ring_bo != queue->gsvs_ring_bo) { 4649 if (queue->gsvs_ring_bo) 4650 ws->buffer_destroy(ws, queue->gsvs_ring_bo); 4651 queue->gsvs_ring_bo = gsvs_ring_bo; 4652 } 4653 4654 if (descriptor_bo != queue->descriptor_bo) { 4655 if (queue->descriptor_bo) 4656 ws->buffer_destroy(ws, queue->descriptor_bo); 4657 queue->descriptor_bo = descriptor_bo; 4658 } 4659 4660 queue->tess_rings_bo = tess_rings_bo; 4661 queue->task_rings_bo = task_rings_bo; 4662 queue->mesh_scratch_ring_bo = mesh_scratch_ring_bo; 4663 queue->gds_bo = gds_bo; 4664 queue->gds_oa_bo = gds_oa_bo; 4665 queue->ring_info = *needs; 4666 return VK_SUCCESS; 4667fail: 4668 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i) 4669 if (dest_cs[i]) 4670 ws->cs_destroy(dest_cs[i]); 4671 if (descriptor_bo && descriptor_bo != queue->descriptor_bo) 4672 ws->buffer_destroy(ws, descriptor_bo); 4673 if (scratch_bo && scratch_bo != queue->scratch_bo) 4674 ws->buffer_destroy(ws, scratch_bo); 4675 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo) 4676 ws->buffer_destroy(ws, compute_scratch_bo); 4677 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo) 4678 ws->buffer_destroy(ws, esgs_ring_bo); 4679 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo) 4680 ws->buffer_destroy(ws, gsvs_ring_bo); 4681 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo) 4682 ws->buffer_destroy(ws, tess_rings_bo); 4683 if (task_rings_bo && task_rings_bo != queue->task_rings_bo) 4684 ws->buffer_destroy(ws, task_rings_bo); 4685 if (gds_bo && gds_bo != queue->gds_bo) 4686 ws->buffer_destroy(ws, gds_bo); 4687 if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo) 4688 ws->buffer_destroy(ws, gds_oa_bo); 4689 4690 return vk_error(queue, result); 4691} 4692 4693static struct radeon_cmdbuf * 4694radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool unlock) 4695{ 4696 struct radeon_cmdbuf **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)]; 4697 struct radeon_cmdbuf *cs; 4698 4699 if (*cs_ref) 4700 return *cs_ref; 4701 4702 cs = device->ws->cs_create(device->ws, AMD_IP_GFX); 4703 if (!cs) 4704 return NULL; 4705 4706 ASSERTED unsigned cdw = radeon_check_space(device->ws, cs, 21); 4707 4708 if (!unlock) { 4709 uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET; 4710 radeon_emit(cs, PKT3(PKT3_ATOMIC_MEM, 7, 0)); 4711 radeon_emit(cs, ATOMIC_OP(TC_OP_ATOMIC_CMPSWAP_32) | ATOMIC_COMMAND(ATOMIC_COMMAND_LOOP)); 4712 radeon_emit(cs, mutex_va); /* addr lo */ 4713 radeon_emit(cs, mutex_va >> 32); /* addr hi */ 4714 radeon_emit(cs, 1); /* data lo */ 4715 radeon_emit(cs, 0); /* data hi */ 4716 radeon_emit(cs, 0); /* compare data lo */ 4717 radeon_emit(cs, 0); /* compare data hi */ 4718 radeon_emit(cs, 10); /* loop interval */ 4719 } 4720 4721 uint64_t va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET; 4722 uint64_t unset_va = va + (unlock ? 8 * pass : 0); 4723 uint64_t set_va = va + (unlock ? 0 : 8 * pass); 4724 4725 radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 4726 radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 4727 COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM); 4728 radeon_emit(cs, 0); /* immediate */ 4729 radeon_emit(cs, 0); 4730 radeon_emit(cs, unset_va); 4731 radeon_emit(cs, unset_va >> 32); 4732 4733 radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 4734 radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 4735 COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM); 4736 radeon_emit(cs, 1); /* immediate */ 4737 radeon_emit(cs, 0); 4738 radeon_emit(cs, set_va); 4739 radeon_emit(cs, set_va >> 32); 4740 4741 if (unlock) { 4742 uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET; 4743 4744 radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 4745 radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 4746 COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM); 4747 radeon_emit(cs, 0); /* immediate */ 4748 radeon_emit(cs, 0); 4749 radeon_emit(cs, mutex_va); 4750 radeon_emit(cs, mutex_va >> 32); 4751 } 4752 4753 assert(cs->cdw <= cdw); 4754 4755 VkResult result = device->ws->cs_finalize(cs); 4756 if (result != VK_SUCCESS) { 4757 device->ws->cs_destroy(cs); 4758 return NULL; 4759 } 4760 4761 /* All the casts are to avoid MSVC errors around pointer truncation in a non-taken 4762 * alternative. 4763 */ 4764 if (p_atomic_cmpxchg((uintptr_t*)cs_ref, 0, (uintptr_t)cs) != 0) { 4765 device->ws->cs_destroy(cs); 4766 } 4767 4768 return *cs_ref; 4769} 4770 4771static VkResult 4772radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind) 4773{ 4774 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer); 4775 VkResult result; 4776 4777 for (uint32_t i = 0; i < bind->bindCount; ++i) { 4778 struct radv_device_memory *mem = NULL; 4779 4780 if (bind->pBinds[i].memory != VK_NULL_HANDLE) 4781 mem = radv_device_memory_from_handle(bind->pBinds[i].memory); 4782 4783 result = device->ws->buffer_virtual_bind(device->ws, buffer->bo, 4784 bind->pBinds[i].resourceOffset, bind->pBinds[i].size, 4785 mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset); 4786 if (result != VK_SUCCESS) 4787 return result; 4788 } 4789 4790 return VK_SUCCESS; 4791} 4792 4793static VkResult 4794radv_sparse_image_opaque_bind_memory(struct radv_device *device, 4795 const VkSparseImageOpaqueMemoryBindInfo *bind) 4796{ 4797 RADV_FROM_HANDLE(radv_image, image, bind->image); 4798 VkResult result; 4799 4800 for (uint32_t i = 0; i < bind->bindCount; ++i) { 4801 struct radv_device_memory *mem = NULL; 4802 4803 if (bind->pBinds[i].memory != VK_NULL_HANDLE) 4804 mem = radv_device_memory_from_handle(bind->pBinds[i].memory); 4805 4806 result = device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, 4807 bind->pBinds[i].resourceOffset, bind->pBinds[i].size, 4808 mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset); 4809 if (result != VK_SUCCESS) 4810 return result; 4811 } 4812 4813 return VK_SUCCESS; 4814} 4815 4816static VkResult 4817radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind) 4818{ 4819 RADV_FROM_HANDLE(radv_image, image, bind->image); 4820 struct radeon_surf *surface = &image->planes[0].surface; 4821 uint32_t bs = vk_format_get_blocksize(image->vk.format); 4822 VkResult result; 4823 4824 for (uint32_t i = 0; i < bind->bindCount; ++i) { 4825 struct radv_device_memory *mem = NULL; 4826 uint32_t offset, pitch; 4827 uint32_t mem_offset = bind->pBinds[i].memoryOffset; 4828 const uint32_t layer = bind->pBinds[i].subresource.arrayLayer; 4829 const uint32_t level = bind->pBinds[i].subresource.mipLevel; 4830 4831 VkExtent3D bind_extent = bind->pBinds[i].extent; 4832 bind_extent.width = 4833 DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk.format)); 4834 bind_extent.height = 4835 DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk.format)); 4836 4837 VkOffset3D bind_offset = bind->pBinds[i].offset; 4838 bind_offset.x /= vk_format_get_blockwidth(image->vk.format); 4839 bind_offset.y /= vk_format_get_blockheight(image->vk.format); 4840 4841 if (bind->pBinds[i].memory != VK_NULL_HANDLE) 4842 mem = radv_device_memory_from_handle(bind->pBinds[i].memory); 4843 4844 if (device->physical_device->rad_info.gfx_level >= GFX9) { 4845 offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level]; 4846 pitch = surface->u.gfx9.prt_level_pitch[level]; 4847 } else { 4848 offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 + 4849 surface->u.legacy.level[level].slice_size_dw * 4 * layer; 4850 pitch = surface->u.legacy.level[level].nblk_x; 4851 } 4852 4853 offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs); 4854 4855 uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width); 4856 4857 bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch; 4858 4859 if (whole_subres) { 4860 uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height); 4861 4862 uint32_t size = aligned_extent_width * aligned_extent_height * bs; 4863 result = device->ws->buffer_virtual_bind(device->ws, image->bindings[0].bo, offset, size, 4864 mem ? mem->bo : NULL, mem_offset); 4865 if (result != VK_SUCCESS) 4866 return result; 4867 } else { 4868 uint32_t img_increment = pitch * bs; 4869 uint32_t mem_increment = aligned_extent_width * bs; 4870 uint32_t size = mem_increment * surface->prt_tile_height; 4871 for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) { 4872 result = device->ws->buffer_virtual_bind(device->ws, 4873 image->bindings[0].bo, offset + img_increment * y, size, mem ? mem->bo : NULL, 4874 mem_offset + mem_increment * y); 4875 if (result != VK_SUCCESS) 4876 return result; 4877 } 4878 } 4879 } 4880 4881 return VK_SUCCESS; 4882} 4883 4884static VkResult 4885radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device, 4886 struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, 4887 bool *uses_perf_counters) 4888{ 4889 if (queue->qf == RADV_QUEUE_TRANSFER) 4890 return VK_SUCCESS; 4891 4892 /* Figure out the needs of the current submission. 4893 * Start by copying the queue's current info. 4894 * This is done because we only allow two possible behaviours for these buffers: 4895 * - Grow when the newly needed amount is larger than what we had 4896 * - Allocate the max size and reuse it, but don't free it until the queue is destroyed 4897 */ 4898 struct radv_queue_ring_info needs = queue->ring_info; 4899 *uses_perf_counters = false; 4900 for (uint32_t j = 0; j < cmd_buffer_count; j++) { 4901 struct radv_cmd_buffer *cmd_buffer = container_of(cmd_buffers[j], struct radv_cmd_buffer, vk); 4902 4903 needs.scratch_size_per_wave = 4904 MAX2(needs.scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed); 4905 needs.scratch_waves = MAX2(needs.scratch_waves, cmd_buffer->scratch_waves_wanted); 4906 needs.compute_scratch_size_per_wave = MAX2(needs.compute_scratch_size_per_wave, 4907 cmd_buffer->compute_scratch_size_per_wave_needed); 4908 needs.compute_scratch_waves = 4909 MAX2(needs.compute_scratch_waves, cmd_buffer->compute_scratch_waves_wanted); 4910 needs.esgs_ring_size = MAX2(needs.esgs_ring_size, cmd_buffer->esgs_ring_size_needed); 4911 needs.gsvs_ring_size = MAX2(needs.gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed); 4912 needs.tess_rings |= cmd_buffer->tess_rings_needed; 4913 needs.task_rings |= cmd_buffer->task_rings_needed; 4914 needs.mesh_scratch_ring |= cmd_buffer->mesh_scratch_ring_needed; 4915 needs.gds |= cmd_buffer->gds_needed; 4916 needs.gds_oa |= cmd_buffer->gds_oa_needed; 4917 needs.sample_positions |= cmd_buffer->sample_positions_needed; 4918 *uses_perf_counters |= cmd_buffer->state.uses_perf_counters; 4919 } 4920 4921 /* Sanitize scratch size information. */ 4922 needs.scratch_waves = needs.scratch_size_per_wave 4923 ? MIN2(needs.scratch_waves, UINT32_MAX / needs.scratch_size_per_wave) 4924 : 0; 4925 needs.compute_scratch_waves = 4926 needs.compute_scratch_size_per_wave 4927 ? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave) 4928 : 0; 4929 4930 /* Return early if we already match these needs. 4931 * Note that it's not possible for any of the needed values to be less 4932 * than what the queue already had, because we only ever increase the allocated size. 4933 */ 4934 if (queue->initial_full_flush_preamble_cs && 4935 queue->ring_info.scratch_size_per_wave == needs.scratch_size_per_wave && 4936 queue->ring_info.scratch_waves == needs.scratch_waves && 4937 queue->ring_info.compute_scratch_size_per_wave == needs.compute_scratch_size_per_wave && 4938 queue->ring_info.compute_scratch_waves == needs.compute_scratch_waves && 4939 queue->ring_info.esgs_ring_size == needs.esgs_ring_size && 4940 queue->ring_info.gsvs_ring_size == needs.gsvs_ring_size && 4941 queue->ring_info.tess_rings == needs.tess_rings && 4942 queue->ring_info.task_rings == needs.task_rings && 4943 queue->ring_info.mesh_scratch_ring == needs.mesh_scratch_ring && 4944 queue->ring_info.gds == needs.gds && 4945 queue->ring_info.gds_oa == needs.gds_oa && 4946 queue->ring_info.sample_positions == needs.sample_positions) 4947 return VK_SUCCESS; 4948 4949 return radv_update_preamble_cs(queue, device, &needs); 4950} 4951 4952static VkResult 4953radv_update_ace_preambles(struct radv_queue *queue) 4954{ 4955 if (!radv_queue_init_ace_internal_state(queue)) 4956 return VK_ERROR_OUT_OF_HOST_MEMORY; 4957 4958 /* Copy task rings state. 4959 * Task shaders that are submitted on the ACE queue need to share 4960 * their ring buffers with the mesh shaders on the GFX queue. 4961 */ 4962 queue->ace_internal_state->ring_info.task_rings = queue->state.ring_info.task_rings; 4963 queue->ace_internal_state->task_rings_bo = queue->state.task_rings_bo; 4964 4965 /* Copy some needed states from the parent queue state. 4966 * These can only increase so it's okay to copy them as-is without checking. 4967 * Note, task shaders use the scratch size from their graphics pipeline. 4968 */ 4969 struct radv_queue_ring_info needs = queue->ace_internal_state->ring_info; 4970 needs.compute_scratch_size_per_wave = queue->state.ring_info.scratch_size_per_wave; 4971 needs.compute_scratch_waves = queue->state.ring_info.scratch_waves; 4972 needs.task_rings = queue->state.ring_info.task_rings; 4973 4974 return radv_update_preamble_cs(queue->ace_internal_state, queue->device, &needs); 4975} 4976 4977static bool 4978radv_cmd_buffer_needs_ace(const struct radv_cmd_buffer *cmd_buffer) 4979{ 4980 return cmd_buffer->ace_internal.cs && cmd_buffer->task_rings_needed; 4981} 4982 4983struct radv_deferred_queue_submission { 4984 struct radv_queue *queue; 4985 VkCommandBuffer *cmd_buffers; 4986 uint32_t cmd_buffer_count; 4987 4988 /* Sparse bindings that happen on a queue. */ 4989 VkSparseBufferMemoryBindInfo *buffer_binds; 4990 uint32_t buffer_bind_count; 4991 VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds; 4992 uint32_t image_opaque_bind_count; 4993 VkSparseImageMemoryBindInfo *image_binds; 4994 uint32_t image_bind_count; 4995 4996 bool flush_caches; 4997 VkPipelineStageFlags2 wait_dst_stage_mask; 4998 struct radv_semaphore_part **wait_semaphores; 4999 uint32_t wait_semaphore_count; 5000 struct radv_semaphore_part **signal_semaphores; 5001 uint32_t signal_semaphore_count; 5002 VkFence fence; 5003 5004 uint64_t *wait_values; 5005 uint64_t *signal_values; 5006 5007 struct radv_semaphore_part *temporary_semaphore_parts; 5008 uint32_t temporary_semaphore_part_count; 5009 5010 struct list_head queue_pending_list; 5011 uint32_t submission_wait_count; 5012 5013 struct list_head processing_list; 5014}; 5015 5016static VkResult 5017radv_queue_submit_bind_sparse_memory(struct radv_device *device, struct vk_queue_submit *submission) 5018{ 5019 for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) { 5020 VkResult result = radv_sparse_buffer_bind_memory(device, submission->buffer_binds + i); 5021 if (result != VK_SUCCESS) 5022 return result; 5023 } 5024 5025 for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) { 5026 VkResult result = 5027 radv_sparse_image_opaque_bind_memory(device, submission->image_opaque_binds + i); 5028 if (result != VK_SUCCESS) 5029 return result; 5030 } 5031 5032 for (uint32_t i = 0; i < submission->image_bind_count; ++i) { 5033 VkResult result = radv_sparse_image_bind_memory(device, submission->image_binds + i); 5034 if (result != VK_SUCCESS) 5035 return result; 5036 } 5037 5038 return VK_SUCCESS; 5039} 5040 5041static VkResult 5042radv_queue_submit_empty(struct radv_queue *queue, struct vk_queue_submit *submission) 5043{ 5044 struct radeon_winsys_ctx *ctx = queue->hw_ctx; 5045 struct radv_winsys_submit_info submit = { 5046 .ip_type = radv_queue_ring(queue), 5047 .queue_index = queue->vk.index_in_family, 5048 }; 5049 5050 return queue->device->ws->cs_submit(ctx, 1, &submit, submission->wait_count, submission->waits, 5051 submission->signal_count, submission->signals, false); 5052} 5053 5054static VkResult 5055radv_queue_submit_with_ace(struct radv_queue *queue, struct vk_queue_submit *submission, 5056 struct radeon_cmdbuf **cs_array, unsigned cs_count, unsigned cs_offset, 5057 bool can_patch) 5058{ 5059 /* Submits command buffers that may have an internal ACE cmdbuf 5060 * using scheduled dependencies. This guarantees that the GFX cmdbuf 5061 * is only scheduled after ACE. 5062 * 5063 * TODO: Unfortunately this is prone to a deadlock, so is considered a 5064 * temporary solution until gang submit is merged in the upstream kernel. 5065 */ 5066 struct radeon_winsys_ctx *ctx = queue->hw_ctx; 5067 const uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT; 5068 const bool need_wait = submission->wait_count > 0; 5069 VkResult result = VK_SUCCESS; 5070 5071 struct radeon_cmdbuf **ace_cs_array = calloc(max_cs_submission, sizeof(struct radeon_cmdbuf *)); 5072 if (!ace_cs_array) { 5073 result = VK_ERROR_OUT_OF_HOST_MEMORY; 5074 goto finish; 5075 } 5076 5077 result = radv_update_ace_preambles(queue); 5078 if (result != VK_SUCCESS) 5079 goto finish; 5080 5081 struct radv_winsys_submit_info submit[2] = { 5082 { 5083 .ip_type = AMD_IP_COMPUTE, 5084 .cs_array = ace_cs_array, 5085 .cs_count = 0, 5086 .initial_preamble_cs = need_wait 5087 ? queue->ace_internal_state->initial_full_flush_preamble_cs 5088 : queue->ace_internal_state->initial_preamble_cs, 5089 }, 5090 { 5091 .ip_type = radv_queue_ring(queue), 5092 .queue_index = queue->vk.index_in_family, 5093 .cs_array = cs_array, 5094 .cs_count = 0, 5095 .initial_preamble_cs = need_wait ? queue->state.initial_full_flush_preamble_cs 5096 : queue->state.initial_preamble_cs, 5097 }}; 5098 5099 for (uint32_t advance, j = 0; j < cs_count; j += advance) { 5100 advance = MIN2(max_cs_submission, cs_count - j); 5101 bool last_submit = j + advance == cs_count; 5102 5103 if (queue->device->trace_bo) 5104 *queue->device->trace_id_ptr = 0; 5105 5106 for (unsigned c = 0; c < advance; ++c) { 5107 const struct radv_cmd_buffer *cmd_buffer = 5108 (struct radv_cmd_buffer *)submission->command_buffers[j + c + cs_offset]; 5109 if (!radv_cmd_buffer_needs_ace(cmd_buffer)) 5110 continue; 5111 5112 submit[0].cs_array[submit[0].cs_count++] = cmd_buffer->ace_internal.cs; 5113 } 5114 5115 const uint32_t submit_count = 1 + !!submit[0].cs_count; 5116 const struct radv_winsys_submit_info *submit_ptr = submit + !submit[0].cs_count; 5117 submit[1].cs_count = advance; 5118 5119 result = queue->device->ws->cs_submit( 5120 ctx, submit_count, submit_ptr, j == 0 ? submission->wait_count : 0, submission->waits, 5121 last_submit ? submission->signal_count : 0, submission->signals, can_patch); 5122 5123 if (result != VK_SUCCESS) 5124 goto finish; 5125 5126 if (queue->device->trace_bo) { 5127 radv_check_gpu_hangs(queue, cs_array[j]); 5128 } 5129 5130 if (queue->device->tma_bo) { 5131 radv_check_trap_handler(queue); 5132 } 5133 5134 submit[1].cs_array += submit[1].cs_count; 5135 submit[1].initial_preamble_cs = queue->state.initial_preamble_cs; 5136 submit[0].cs_count = 0; 5137 submit[0].initial_preamble_cs = queue->ace_internal_state->initial_preamble_cs; 5138 } 5139 5140finish: 5141 free(ace_cs_array); 5142 return result; 5143} 5144 5145static VkResult 5146radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submission) 5147{ 5148 struct radeon_winsys_ctx *ctx = queue->hw_ctx; 5149 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT; 5150 bool can_patch = true; 5151 bool use_ace = false; 5152 uint32_t advance; 5153 VkResult result; 5154 bool uses_perf_counters = false; 5155 5156 result = radv_update_preambles(&queue->state, queue->device, submission->command_buffers, 5157 submission->command_buffer_count, &uses_perf_counters); 5158 if (result != VK_SUCCESS) 5159 return result; 5160 5161 if (queue->device->trace_bo) 5162 simple_mtx_lock(&queue->device->trace_mtx); 5163 5164 const unsigned cs_offset = uses_perf_counters ? 1 : 0; 5165 const unsigned cmd_buffer_count = 5166 submission->command_buffer_count + (uses_perf_counters ? 2 : 0); 5167 5168 struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) * cmd_buffer_count); 5169 if (!cs_array) 5170 goto fail; 5171 5172 for (uint32_t j = 0; j < submission->command_buffer_count; j++) { 5173 struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j]; 5174 assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 5175 5176 cs_array[j + cs_offset] = cmd_buffer->cs; 5177 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) 5178 can_patch = false; 5179 5180 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING; 5181 use_ace |= radv_cmd_buffer_needs_ace(cmd_buffer); 5182 } 5183 5184 if (uses_perf_counters) { 5185 cs_array[0] = 5186 radv_create_perf_counter_lock_cs(queue->device, submission->perf_pass_index, false); 5187 cs_array[cmd_buffer_count - 1] = 5188 radv_create_perf_counter_lock_cs(queue->device, submission->perf_pass_index, true); 5189 can_patch = false; 5190 if (!cs_array[0] || !cs_array[cmd_buffer_count - 1]) { 5191 result = VK_ERROR_OUT_OF_HOST_MEMORY; 5192 goto fail; 5193 } 5194 } 5195 5196 if (use_ace) { 5197 result = radv_queue_submit_with_ace(queue, submission, cs_array, cmd_buffer_count, cs_offset, 5198 can_patch); 5199 goto fail; 5200 } 5201 5202 /* For fences on the same queue/vm amdgpu doesn't wait till all processing is finished 5203 * before starting the next cmdbuffer, so we need to do it here. */ 5204 bool need_wait = submission->wait_count > 0; 5205 5206 struct radv_winsys_submit_info submit = { 5207 .ip_type = radv_queue_ring(queue), 5208 .queue_index = queue->vk.index_in_family, 5209 .cs_array = cs_array, 5210 .cs_count = 0, 5211 .initial_preamble_cs = 5212 need_wait ? queue->state.initial_full_flush_preamble_cs : queue->state.initial_preamble_cs, 5213 .continue_preamble_cs = queue->state.continue_preamble_cs, 5214 }; 5215 5216 for (uint32_t j = 0; j < cmd_buffer_count; j += advance) { 5217 advance = MIN2(max_cs_submission, cmd_buffer_count - j); 5218 bool last_submit = j + advance == cmd_buffer_count; 5219 5220 if (queue->device->trace_bo) 5221 *queue->device->trace_id_ptr = 0; 5222 5223 submit.cs_count = advance; 5224 5225 result = queue->device->ws->cs_submit( 5226 ctx, 1, &submit, j == 0 ? submission->wait_count : 0, submission->waits, 5227 last_submit ? submission->signal_count : 0, submission->signals, can_patch); 5228 5229 if (result != VK_SUCCESS) 5230 goto fail; 5231 5232 if (queue->device->trace_bo) { 5233 radv_check_gpu_hangs(queue, cs_array[j]); 5234 } 5235 5236 if (queue->device->tma_bo) { 5237 radv_check_trap_handler(queue); 5238 } 5239 5240 submit.cs_array += advance; 5241 submit.initial_preamble_cs = queue->state.initial_preamble_cs; 5242 } 5243 5244fail: 5245 free(cs_array); 5246 if (queue->device->trace_bo) 5247 simple_mtx_unlock(&queue->device->trace_mtx); 5248 5249 return result; 5250} 5251 5252static VkResult 5253radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) 5254{ 5255 struct radv_queue *queue = (struct radv_queue *)vqueue; 5256 VkResult result; 5257 5258 result = radv_queue_submit_bind_sparse_memory(queue->device, submission); 5259 if (result != VK_SUCCESS) 5260 goto fail; 5261 5262 if (!submission->command_buffer_count && !submission->wait_count && !submission->signal_count) 5263 return VK_SUCCESS; 5264 5265 if (!submission->command_buffer_count) { 5266 result = radv_queue_submit_empty(queue, submission); 5267 } else { 5268 result = radv_queue_submit_normal(queue, submission); 5269 } 5270 5271fail: 5272 if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) { 5273 /* When something bad happened during the submission, such as 5274 * an out of memory issue, it might be hard to recover from 5275 * this inconsistent state. To avoid this sort of problem, we 5276 * assume that we are in a really bad situation and return 5277 * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt 5278 * to submit the same job again to this device. 5279 */ 5280 result = vk_device_set_lost(&queue->device->vk, "vkQueueSubmit() failed"); 5281 } 5282 return result; 5283} 5284 5285bool 5286radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs) 5287{ 5288 struct radeon_winsys_ctx *ctx = queue->hw_ctx; 5289 struct radv_winsys_submit_info submit = { 5290 .ip_type = radv_queue_ring(queue), 5291 .queue_index = queue->vk.index_in_family, 5292 .cs_array = &cs, 5293 .cs_count = 1, 5294 }; 5295 5296 VkResult result = queue->device->ws->cs_submit(ctx, 1, &submit, 0, NULL, 0, NULL, false); 5297 if (result != VK_SUCCESS) 5298 return false; 5299 5300 return true; 5301} 5302 5303VKAPI_ATTR VkResult VKAPI_CALL 5304radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount, 5305 VkExtensionProperties *pProperties) 5306{ 5307 if (pLayerName) 5308 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); 5309 5310 return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported, 5311 pPropertyCount, pProperties); 5312} 5313 5314VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 5315radv_GetInstanceProcAddr(VkInstance _instance, const char *pName) 5316{ 5317 RADV_FROM_HANDLE(radv_instance, instance, _instance); 5318 5319 /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly 5320 * when we have to return valid function pointers, NULL, or it's left 5321 * undefined. See the table for exact details. 5322 */ 5323 if (pName == NULL) 5324 return NULL; 5325 5326#define LOOKUP_RADV_ENTRYPOINT(entrypoint) \ 5327 if (strcmp(pName, "vk" #entrypoint) == 0) \ 5328 return (PFN_vkVoidFunction)radv_##entrypoint 5329 5330 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties); 5331 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties); 5332 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion); 5333 LOOKUP_RADV_ENTRYPOINT(CreateInstance); 5334 5335 /* GetInstanceProcAddr() can also be called with a NULL instance. 5336 * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057 5337 */ 5338 LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr); 5339 5340#undef LOOKUP_RADV_ENTRYPOINT 5341 5342 if (instance == NULL) 5343 return NULL; 5344 5345 return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName); 5346} 5347 5348/* Windows will use a dll definition file to avoid build errors. */ 5349#ifdef _WIN32 5350#undef PUBLIC 5351#define PUBLIC 5352#endif 5353 5354/* The loader wants us to expose a second GetInstanceProcAddr function 5355 * to work around certain LD_PRELOAD issues seen in apps. 5356 */ 5357PUBLIC 5358VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 5359vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) 5360{ 5361 return radv_GetInstanceProcAddr(instance, pName); 5362} 5363 5364PUBLIC 5365VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL 5366vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName) 5367{ 5368 RADV_FROM_HANDLE(radv_instance, instance, _instance); 5369 return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); 5370} 5371 5372bool 5373radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD) 5374{ 5375 /* Only set BO metadata for the first plane */ 5376 if (memory->image && memory->image->bindings[0].offset == 0) { 5377 struct radeon_bo_metadata metadata; 5378 radv_init_metadata(device, memory->image, &metadata); 5379 device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata); 5380 } 5381 5382 return device->ws->buffer_get_fd(device->ws, memory->bo, pFD); 5383} 5384 5385void 5386radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device, 5387 struct radeon_winsys_bo *bo) 5388{ 5389 memset(mem, 0, sizeof(*mem)); 5390 vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY); 5391 5392 mem->bo = bo; 5393} 5394 5395void 5396radv_device_memory_finish(struct radv_device_memory *mem) 5397{ 5398 vk_object_base_finish(&mem->base); 5399} 5400 5401void 5402radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator, 5403 struct radv_device_memory *mem) 5404{ 5405 if (mem == NULL) 5406 return; 5407 5408#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 5409 if (mem->android_hardware_buffer) 5410 AHardwareBuffer_release(mem->android_hardware_buffer); 5411#endif 5412 5413 if (mem->bo) { 5414 if (device->overallocation_disallowed) { 5415 mtx_lock(&device->overallocation_mutex); 5416 device->allocated_memory_size[mem->heap_index] -= mem->alloc_size; 5417 mtx_unlock(&device->overallocation_mutex); 5418 } 5419 5420 if (device->use_global_bo_list) 5421 device->ws->buffer_make_resident(device->ws, mem->bo, false); 5422 device->ws->buffer_destroy(device->ws, mem->bo); 5423 mem->bo = NULL; 5424 } 5425 5426 radv_device_memory_finish(mem); 5427 vk_free2(&device->vk.alloc, pAllocator, mem); 5428} 5429 5430static VkResult 5431radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo, 5432 const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem) 5433{ 5434 struct radv_device_memory *mem; 5435 VkResult result; 5436 enum radeon_bo_domain domain; 5437 uint32_t flags = 0; 5438 5439 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); 5440 5441 const VkImportMemoryFdInfoKHR *import_info = 5442 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); 5443 const VkMemoryDedicatedAllocateInfo *dedicate_info = 5444 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); 5445 const VkExportMemoryAllocateInfo *export_info = 5446 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO); 5447 const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info = 5448 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID); 5449 const VkImportMemoryHostPointerInfoEXT *host_ptr_info = 5450 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT); 5451 5452 const struct wsi_memory_allocate_info *wsi_info = 5453 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA); 5454 5455 if (pAllocateInfo->allocationSize == 0 && !ahb_import_info && 5456 !(export_info && (export_info->handleTypes & 5457 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) { 5458 /* Apparently, this is allowed */ 5459 *pMem = VK_NULL_HANDLE; 5460 return VK_SUCCESS; 5461 } 5462 5463 mem = 5464 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 5465 if (mem == NULL) 5466 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 5467 5468 radv_device_memory_init(mem, device, NULL); 5469 5470 if (wsi_info) { 5471 if(wsi_info->implicit_sync) 5472 flags |= RADEON_FLAG_IMPLICIT_SYNC; 5473 5474 /* In case of prime, linear buffer is allocated in default heap which is VRAM. 5475 * Due to this when display is connected to iGPU and render on dGPU, ddx 5476 * function amdgpu_present_check_flip() fails due to which there is blit 5477 * instead of flip. Setting the flag RADEON_FLAG_GTT_WC allows kernel to 5478 * allocate GTT memory in supported hardware where GTT can be directly scanout. 5479 * Using wsi_info variable check to set the flag RADEON_FLAG_GTT_WC so that 5480 * only for memory allocated by driver this flag is set. 5481 */ 5482 flags |= RADEON_FLAG_GTT_WC; 5483 } 5484 5485 if (dedicate_info) { 5486 mem->image = radv_image_from_handle(dedicate_info->image); 5487 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer); 5488 } else { 5489 mem->image = NULL; 5490 mem->buffer = NULL; 5491 } 5492 5493 if (wsi_info && wsi_info->implicit_sync && mem->buffer) { 5494 /* Mark the linear prime buffer (aka the destination of the prime blit 5495 * as uncached. 5496 */ 5497 flags |= RADEON_FLAG_VA_UNCACHED; 5498 } 5499 5500 float priority_float = 0.5; 5501 const struct VkMemoryPriorityAllocateInfoEXT *priority_ext = 5502 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT); 5503 if (priority_ext) 5504 priority_float = priority_ext->priority; 5505 5506 uint64_t replay_address = 0; 5507 const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info = 5508 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO); 5509 if (replay_info && replay_info->opaqueCaptureAddress) 5510 replay_address = replay_info->opaqueCaptureAddress; 5511 5512 unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1, 5513 (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX)); 5514 5515 mem->user_ptr = NULL; 5516 5517#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 5518 mem->android_hardware_buffer = NULL; 5519#endif 5520 5521 if (ahb_import_info) { 5522 result = radv_import_ahb_memory(device, mem, priority, ahb_import_info); 5523 if (result != VK_SUCCESS) 5524 goto fail; 5525 } else if (export_info && (export_info->handleTypes & 5526 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) { 5527 result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo); 5528 if (result != VK_SUCCESS) 5529 goto fail; 5530 } else if (import_info) { 5531 assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 5532 import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 5533 result = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, &mem->bo, NULL); 5534 if (result != VK_SUCCESS) { 5535 goto fail; 5536 } else { 5537 close(import_info->fd); 5538 } 5539 5540 if (mem->image && mem->image->plane_count == 1 && 5541 !vk_format_is_depth_or_stencil(mem->image->vk.format) && mem->image->info.samples == 1 && 5542 mem->image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { 5543 struct radeon_bo_metadata metadata; 5544 device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata); 5545 5546 struct radv_image_create_info create_info = {.no_metadata_planes = true, 5547 .bo_metadata = &metadata}; 5548 5549 /* This gives a basic ability to import radeonsi images 5550 * that don't have DCC. This is not guaranteed by any 5551 * spec and can be removed after we support modifiers. */ 5552 result = radv_image_create_layout(device, create_info, NULL, mem->image); 5553 if (result != VK_SUCCESS) { 5554 device->ws->buffer_destroy(device->ws, mem->bo); 5555 goto fail; 5556 } 5557 } 5558 } else if (host_ptr_info) { 5559 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT); 5560 result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer, 5561 pAllocateInfo->allocationSize, priority, &mem->bo); 5562 if (result != VK_SUCCESS) { 5563 goto fail; 5564 } else { 5565 mem->user_ptr = host_ptr_info->pHostPointer; 5566 } 5567 } else { 5568 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); 5569 uint32_t heap_index; 5570 5571 heap_index = 5572 device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex] 5573 .heapIndex; 5574 domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex]; 5575 flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex]; 5576 5577 if (!import_info && (!export_info || !export_info->handleTypes)) { 5578 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; 5579 if (device->use_global_bo_list) { 5580 flags |= RADEON_FLAG_PREFER_LOCAL_BO; 5581 } 5582 } 5583 5584 const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO); 5585 if (flags_info && flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT) 5586 flags |= RADEON_FLAG_REPLAYABLE; 5587 5588 if (device->instance->zero_vram) 5589 flags |= RADEON_FLAG_ZERO_VRAM; 5590 5591 if (device->overallocation_disallowed) { 5592 uint64_t total_size = 5593 device->physical_device->memory_properties.memoryHeaps[heap_index].size; 5594 5595 mtx_lock(&device->overallocation_mutex); 5596 if (device->allocated_memory_size[heap_index] + alloc_size > total_size) { 5597 mtx_unlock(&device->overallocation_mutex); 5598 result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 5599 goto fail; 5600 } 5601 device->allocated_memory_size[heap_index] += alloc_size; 5602 mtx_unlock(&device->overallocation_mutex); 5603 } 5604 5605 result = device->ws->buffer_create(device->ws, alloc_size, 5606 device->physical_device->rad_info.max_alignment, domain, 5607 flags, priority, replay_address, &mem->bo); 5608 5609 if (result != VK_SUCCESS) { 5610 if (device->overallocation_disallowed) { 5611 mtx_lock(&device->overallocation_mutex); 5612 device->allocated_memory_size[heap_index] -= alloc_size; 5613 mtx_unlock(&device->overallocation_mutex); 5614 } 5615 goto fail; 5616 } 5617 5618 mem->heap_index = heap_index; 5619 mem->alloc_size = alloc_size; 5620 } 5621 5622 if (!wsi_info) { 5623 if (device->use_global_bo_list) { 5624 result = device->ws->buffer_make_resident(device->ws, mem->bo, true); 5625 if (result != VK_SUCCESS) 5626 goto fail; 5627 } 5628 } 5629 5630 *pMem = radv_device_memory_to_handle(mem); 5631 5632 return VK_SUCCESS; 5633 5634fail: 5635 radv_free_memory(device, pAllocator, mem); 5636 5637 return result; 5638} 5639 5640VKAPI_ATTR VkResult VKAPI_CALL 5641radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo, 5642 const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem) 5643{ 5644 RADV_FROM_HANDLE(radv_device, device, _device); 5645 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem); 5646} 5647 5648VKAPI_ATTR void VKAPI_CALL 5649radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator) 5650{ 5651 RADV_FROM_HANDLE(radv_device, device, _device); 5652 RADV_FROM_HANDLE(radv_device_memory, mem, _mem); 5653 5654 radv_free_memory(device, pAllocator, mem); 5655} 5656 5657VKAPI_ATTR VkResult VKAPI_CALL 5658radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size, 5659 VkMemoryMapFlags flags, void **ppData) 5660{ 5661 RADV_FROM_HANDLE(radv_device, device, _device); 5662 RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 5663 5664 if (mem->user_ptr) 5665 *ppData = mem->user_ptr; 5666 else 5667 *ppData = device->ws->buffer_map(mem->bo); 5668 5669 if (*ppData) { 5670 *ppData = (uint8_t *)*ppData + offset; 5671 return VK_SUCCESS; 5672 } 5673 5674 return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); 5675} 5676 5677VKAPI_ATTR void VKAPI_CALL 5678radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) 5679{ 5680 RADV_FROM_HANDLE(radv_device, device, _device); 5681 RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 5682 5683 if (mem->user_ptr == NULL) 5684 device->ws->buffer_unmap(mem->bo); 5685} 5686 5687VKAPI_ATTR VkResult VKAPI_CALL 5688radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, 5689 const VkMappedMemoryRange *pMemoryRanges) 5690{ 5691 return VK_SUCCESS; 5692} 5693 5694VKAPI_ATTR VkResult VKAPI_CALL 5695radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, 5696 const VkMappedMemoryRange *pMemoryRanges) 5697{ 5698 return VK_SUCCESS; 5699} 5700 5701static void 5702radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, 5703 VkBufferCreateFlags flags, VkBufferCreateFlags usage, 5704 VkMemoryRequirements2 *pMemoryRequirements) 5705{ 5706 pMemoryRequirements->memoryRequirements.memoryTypeBits = 5707 ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & 5708 ~device->physical_device->memory_types_32bit; 5709 5710 /* Allow 32-bit address-space for DGC usage, as this buffer will contain 5711 * cmd buffer upload buffers, and those get passed to shaders through 32-bit 5712 * pointers. 5713 * 5714 * We only allow it with this usage set, to "protect" the 32-bit address space 5715 * from being overused. The actual requirement is done as part of 5716 * vkGetGeneratedCommandsMemoryRequirementsNV. (we have to make sure their 5717 * intersection is non-zero at least) 5718 */ 5719 if ((usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) && device->uses_device_generated_commands) 5720 pMemoryRequirements->memoryRequirements.memoryTypeBits |= 5721 device->physical_device->memory_types_32bit; 5722 5723 if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) 5724 pMemoryRequirements->memoryRequirements.alignment = 4096; 5725 else 5726 pMemoryRequirements->memoryRequirements.alignment = 16; 5727 5728 /* Top level acceleration structures need the bottom 6 bits to store 5729 * the root ids of instances. The hardware also needs bvh nodes to 5730 * be 64 byte aligned. 5731 */ 5732 if (usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR) 5733 pMemoryRequirements->memoryRequirements.alignment = 5734 MAX2(pMemoryRequirements->memoryRequirements.alignment, 64); 5735 5736 pMemoryRequirements->memoryRequirements.size = 5737 align64(size, pMemoryRequirements->memoryRequirements.alignment); 5738 5739 vk_foreach_struct(ext, pMemoryRequirements->pNext) 5740 { 5741 switch (ext->sType) { 5742 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 5743 VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext; 5744 req->requiresDedicatedAllocation = false; 5745 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; 5746 break; 5747 } 5748 default: 5749 break; 5750 } 5751 } 5752} 5753 5754VKAPI_ATTR void VKAPI_CALL 5755radv_GetBufferMemoryRequirements2(VkDevice _device, const VkBufferMemoryRequirementsInfo2 *pInfo, 5756 VkMemoryRequirements2 *pMemoryRequirements) 5757{ 5758 RADV_FROM_HANDLE(radv_device, device, _device); 5759 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); 5760 5761 radv_get_buffer_memory_requirements(device, buffer->vk.size, buffer->vk.create_flags, 5762 buffer->vk.usage, pMemoryRequirements); 5763} 5764 5765VKAPI_ATTR void VKAPI_CALL 5766radv_GetDeviceBufferMemoryRequirements(VkDevice _device, 5767 const VkDeviceBufferMemoryRequirements *pInfo, 5768 VkMemoryRequirements2 *pMemoryRequirements) 5769{ 5770 RADV_FROM_HANDLE(radv_device, device, _device); 5771 5772 radv_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pInfo->pCreateInfo->flags, 5773 pInfo->pCreateInfo->usage, pMemoryRequirements); 5774} 5775 5776VKAPI_ATTR void VKAPI_CALL 5777radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequirementsInfo2 *pInfo, 5778 VkMemoryRequirements2 *pMemoryRequirements) 5779{ 5780 RADV_FROM_HANDLE(radv_device, device, _device); 5781 RADV_FROM_HANDLE(radv_image, image, pInfo->image); 5782 5783 pMemoryRequirements->memoryRequirements.memoryTypeBits = 5784 ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & 5785 ~device->physical_device->memory_types_32bit; 5786 5787 pMemoryRequirements->memoryRequirements.size = image->size; 5788 pMemoryRequirements->memoryRequirements.alignment = image->alignment; 5789 5790 vk_foreach_struct(ext, pMemoryRequirements->pNext) 5791 { 5792 switch (ext->sType) { 5793 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { 5794 VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext; 5795 req->requiresDedicatedAllocation = 5796 image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR; 5797 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; 5798 break; 5799 } 5800 default: 5801 break; 5802 } 5803 } 5804} 5805 5806VKAPI_ATTR void VKAPI_CALL 5807radv_GetDeviceImageMemoryRequirements(VkDevice device, 5808 const VkDeviceImageMemoryRequirements *pInfo, 5809 VkMemoryRequirements2 *pMemoryRequirements) 5810{ 5811 UNUSED VkResult result; 5812 VkImage image; 5813 5814 /* Determining the image size/alignment require to create a surface, which is complicated without 5815 * creating an image. 5816 * TODO: Avoid creating an image. 5817 */ 5818 result = radv_CreateImage(device, pInfo->pCreateInfo, NULL, &image); 5819 assert(result == VK_SUCCESS); 5820 5821 VkImageMemoryRequirementsInfo2 info2 = { 5822 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, 5823 .image = image, 5824 }; 5825 5826 radv_GetImageMemoryRequirements2(device, &info2, pMemoryRequirements); 5827 5828 radv_DestroyImage(device, image, NULL); 5829} 5830 5831VKAPI_ATTR void VKAPI_CALL 5832radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, 5833 VkDeviceSize *pCommittedMemoryInBytes) 5834{ 5835 *pCommittedMemoryInBytes = 0; 5836} 5837 5838VKAPI_ATTR VkResult VKAPI_CALL 5839radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount, 5840 const VkBindBufferMemoryInfo *pBindInfos) 5841{ 5842 RADV_FROM_HANDLE(radv_device, device, _device); 5843 5844 for (uint32_t i = 0; i < bindInfoCount; ++i) { 5845 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); 5846 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer); 5847 5848 if (mem->alloc_size) { 5849 VkBufferMemoryRequirementsInfo2 info = { 5850 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, 5851 .buffer = pBindInfos[i].buffer, 5852 }; 5853 VkMemoryRequirements2 reqs = { 5854 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, 5855 }; 5856 5857 radv_GetBufferMemoryRequirements2(_device, &info, &reqs); 5858 5859 if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) { 5860 return vk_errorf(device, VK_ERROR_UNKNOWN, 5861 "Device memory object too small for the buffer.\n"); 5862 } 5863 } 5864 5865 buffer->bo = mem->bo; 5866 buffer->offset = pBindInfos[i].memoryOffset; 5867 } 5868 return VK_SUCCESS; 5869} 5870 5871VKAPI_ATTR VkResult VKAPI_CALL 5872radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, 5873 const VkBindImageMemoryInfo *pBindInfos) 5874{ 5875 RADV_FROM_HANDLE(radv_device, device, _device); 5876 5877 for (uint32_t i = 0; i < bindInfoCount; ++i) { 5878 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); 5879 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image); 5880 5881 if (mem->alloc_size) { 5882 VkImageMemoryRequirementsInfo2 info = { 5883 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, 5884 .image = pBindInfos[i].image, 5885 }; 5886 VkMemoryRequirements2 reqs = { 5887 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, 5888 }; 5889 5890 radv_GetImageMemoryRequirements2(_device, &info, &reqs); 5891 5892 if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) { 5893 return vk_errorf(device, VK_ERROR_UNKNOWN, 5894 "Device memory object too small for the image.\n"); 5895 } 5896 } 5897 5898 if (image->disjoint) { 5899 const VkBindImagePlaneMemoryInfo *plane_info = 5900 vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_PLANE_MEMORY_INFO); 5901 5902 switch (plane_info->planeAspect) { 5903 case VK_IMAGE_ASPECT_PLANE_0_BIT: 5904 image->bindings[0].bo = mem->bo; 5905 image->bindings[0].offset = pBindInfos[i].memoryOffset; 5906 break; 5907 case VK_IMAGE_ASPECT_PLANE_1_BIT: 5908 image->bindings[1].bo = mem->bo; 5909 image->bindings[1].offset = pBindInfos[i].memoryOffset; 5910 break; 5911 case VK_IMAGE_ASPECT_PLANE_2_BIT: 5912 image->bindings[2].bo = mem->bo; 5913 image->bindings[2].offset = pBindInfos[i].memoryOffset; 5914 break; 5915 default: 5916 break; 5917 } 5918 } else { 5919 image->bindings[0].bo = mem->bo; 5920 image->bindings[0].offset = pBindInfos[i].memoryOffset; 5921 } 5922 } 5923 return VK_SUCCESS; 5924} 5925 5926static void 5927radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator, 5928 struct radv_event *event) 5929{ 5930 if (event->bo) 5931 device->ws->buffer_destroy(device->ws, event->bo); 5932 5933 vk_object_base_finish(&event->base); 5934 vk_free2(&device->vk.alloc, pAllocator, event); 5935} 5936 5937VKAPI_ATTR VkResult VKAPI_CALL 5938radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo, 5939 const VkAllocationCallbacks *pAllocator, VkEvent *pEvent) 5940{ 5941 RADV_FROM_HANDLE(radv_device, device, _device); 5942 enum radeon_bo_domain bo_domain; 5943 enum radeon_bo_flag bo_flags; 5944 struct radv_event *event; 5945 VkResult result; 5946 5947 event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, 5948 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 5949 if (!event) 5950 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 5951 5952 vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT); 5953 5954 if (pCreateInfo->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT) { 5955 bo_domain = RADEON_DOMAIN_VRAM; 5956 bo_flags = RADEON_FLAG_NO_CPU_ACCESS; 5957 } else { 5958 bo_domain = RADEON_DOMAIN_GTT; 5959 bo_flags = RADEON_FLAG_CPU_ACCESS; 5960 } 5961 5962 result = device->ws->buffer_create( 5963 device->ws, 8, 8, bo_domain, 5964 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_NO_INTERPROCESS_SHARING | bo_flags, 5965 RADV_BO_PRIORITY_FENCE, 0, &event->bo); 5966 if (result != VK_SUCCESS) { 5967 radv_destroy_event(device, pAllocator, event); 5968 return vk_error(device, result); 5969 } 5970 5971 if (!(pCreateInfo->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT)) { 5972 event->map = (uint64_t *)device->ws->buffer_map(event->bo); 5973 if (!event->map) { 5974 radv_destroy_event(device, pAllocator, event); 5975 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 5976 } 5977 } 5978 5979 *pEvent = radv_event_to_handle(event); 5980 5981 return VK_SUCCESS; 5982} 5983 5984VKAPI_ATTR void VKAPI_CALL 5985radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator) 5986{ 5987 RADV_FROM_HANDLE(radv_device, device, _device); 5988 RADV_FROM_HANDLE(radv_event, event, _event); 5989 5990 if (!event) 5991 return; 5992 5993 radv_destroy_event(device, pAllocator, event); 5994} 5995 5996VKAPI_ATTR VkResult VKAPI_CALL 5997radv_GetEventStatus(VkDevice _device, VkEvent _event) 5998{ 5999 RADV_FROM_HANDLE(radv_device, device, _device); 6000 RADV_FROM_HANDLE(radv_event, event, _event); 6001 6002 if (vk_device_is_lost(&device->vk)) 6003 return VK_ERROR_DEVICE_LOST; 6004 6005 if (*event->map == 1) 6006 return VK_EVENT_SET; 6007 return VK_EVENT_RESET; 6008} 6009 6010VKAPI_ATTR VkResult VKAPI_CALL 6011radv_SetEvent(VkDevice _device, VkEvent _event) 6012{ 6013 RADV_FROM_HANDLE(radv_event, event, _event); 6014 *event->map = 1; 6015 6016 return VK_SUCCESS; 6017} 6018 6019VKAPI_ATTR VkResult VKAPI_CALL 6020radv_ResetEvent(VkDevice _device, VkEvent _event) 6021{ 6022 RADV_FROM_HANDLE(radv_event, event, _event); 6023 *event->map = 0; 6024 6025 return VK_SUCCESS; 6026} 6027 6028void 6029radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device, 6030 struct radeon_winsys_bo *bo, uint64_t size, 6031 uint64_t offset) 6032{ 6033 VkBufferCreateInfo createInfo = { 6034 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 6035 .size = size, 6036 }; 6037 6038 vk_buffer_init(&device->vk, &buffer->vk, &createInfo); 6039 6040 buffer->bo = bo; 6041 buffer->offset = offset; 6042} 6043 6044void 6045radv_buffer_finish(struct radv_buffer *buffer) 6046{ 6047 vk_buffer_finish(&buffer->vk); 6048} 6049 6050static void 6051radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator, 6052 struct radv_buffer *buffer) 6053{ 6054 if ((buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo) 6055 device->ws->buffer_destroy(device->ws, buffer->bo); 6056 6057 radv_buffer_finish(buffer); 6058 vk_free2(&device->vk.alloc, pAllocator, buffer); 6059} 6060 6061VKAPI_ATTR VkResult VKAPI_CALL 6062radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo, 6063 const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer) 6064{ 6065 RADV_FROM_HANDLE(radv_device, device, _device); 6066 struct radv_buffer *buffer; 6067 6068 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); 6069 6070 buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8, 6071 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 6072 if (buffer == NULL) 6073 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 6074 6075 vk_buffer_init(&device->vk, &buffer->vk, pCreateInfo); 6076 buffer->bo = NULL; 6077 buffer->offset = 0; 6078 6079 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) { 6080 enum radeon_bo_flag flags = RADEON_FLAG_VIRTUAL; 6081 if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT) 6082 flags |= RADEON_FLAG_REPLAYABLE; 6083 6084 uint64_t replay_address = 0; 6085 const VkBufferOpaqueCaptureAddressCreateInfo *replay_info = 6086 vk_find_struct_const(pCreateInfo->pNext, BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO); 6087 if (replay_info && replay_info->opaqueCaptureAddress) 6088 replay_address = replay_info->opaqueCaptureAddress; 6089 6090 VkResult result = 6091 device->ws->buffer_create(device->ws, align64(buffer->vk.size, 4096), 4096, 0, flags, 6092 RADV_BO_PRIORITY_VIRTUAL, replay_address, &buffer->bo); 6093 if (result != VK_SUCCESS) { 6094 radv_destroy_buffer(device, pAllocator, buffer); 6095 return vk_error(device, result); 6096 } 6097 } 6098 6099 *pBuffer = radv_buffer_to_handle(buffer); 6100 6101 return VK_SUCCESS; 6102} 6103 6104VKAPI_ATTR void VKAPI_CALL 6105radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator) 6106{ 6107 RADV_FROM_HANDLE(radv_device, device, _device); 6108 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 6109 6110 if (!buffer) 6111 return; 6112 6113 radv_destroy_buffer(device, pAllocator, buffer); 6114} 6115 6116VKAPI_ATTR VkDeviceAddress VKAPI_CALL 6117radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo) 6118{ 6119 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); 6120 return radv_buffer_get_va(buffer->bo) + buffer->offset; 6121} 6122 6123VKAPI_ATTR uint64_t VKAPI_CALL 6124radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo) 6125{ 6126 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); 6127 return buffer->bo ? radv_buffer_get_va(buffer->bo) + buffer->offset : 0; 6128} 6129 6130VKAPI_ATTR uint64_t VKAPI_CALL 6131radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device, 6132 const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo) 6133{ 6134 RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory); 6135 return radv_buffer_get_va(mem->bo); 6136} 6137 6138static inline unsigned 6139si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil) 6140{ 6141 if (stencil) 6142 return plane->surface.u.legacy.zs.stencil_tiling_index[level]; 6143 else 6144 return plane->surface.u.legacy.tiling_index[level]; 6145} 6146 6147static uint32_t 6148radv_surface_max_layer_count(struct radv_image_view *iview) 6149{ 6150 return iview->vk.view_type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth 6151 : (iview->vk.base_array_layer + iview->vk.layer_count); 6152} 6153 6154static unsigned 6155get_dcc_max_uncompressed_block_size(const struct radv_device *device, 6156 const struct radv_image_view *iview) 6157{ 6158 if (device->physical_device->rad_info.gfx_level < GFX10 && iview->image->info.samples > 1) { 6159 if (iview->image->planes[0].surface.bpe == 1) 6160 return V_028C78_MAX_BLOCK_SIZE_64B; 6161 else if (iview->image->planes[0].surface.bpe == 2) 6162 return V_028C78_MAX_BLOCK_SIZE_128B; 6163 } 6164 6165 return V_028C78_MAX_BLOCK_SIZE_256B; 6166} 6167 6168static unsigned 6169get_dcc_min_compressed_block_size(const struct radv_device *device) 6170{ 6171 if (!device->physical_device->rad_info.has_dedicated_vram) { 6172 /* amdvlk: [min-compressed-block-size] should be set to 32 for 6173 * dGPU and 64 for APU because all of our APUs to date use 6174 * DIMMs which have a request granularity size of 64B while all 6175 * other chips have a 32B request size. 6176 */ 6177 return V_028C78_MIN_BLOCK_SIZE_64B; 6178 } 6179 6180 return V_028C78_MIN_BLOCK_SIZE_32B; 6181} 6182 6183static uint32_t 6184radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview) 6185{ 6186 unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview); 6187 unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device); 6188 unsigned max_compressed_block_size; 6189 unsigned independent_128b_blocks; 6190 unsigned independent_64b_blocks; 6191 6192 if (!radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) 6193 return 0; 6194 6195 /* For GFX9+ ac_surface computes values for us (except min_compressed 6196 * and max_uncompressed) */ 6197 if (device->physical_device->rad_info.gfx_level >= GFX9) { 6198 max_compressed_block_size = 6199 iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size; 6200 independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks; 6201 independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks; 6202 } else { 6203 independent_128b_blocks = 0; 6204 6205 if (iview->image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | 6206 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { 6207 /* If this DCC image is potentially going to be used in texture 6208 * fetches, we need some special settings. 6209 */ 6210 independent_64b_blocks = 1; 6211 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 6212 } else { 6213 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >= 6214 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as 6215 * big as possible for better compression state. 6216 */ 6217 independent_64b_blocks = 0; 6218 max_compressed_block_size = max_uncompressed_block_size; 6219 } 6220 } 6221 6222 uint32_t result = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 6223 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) | 6224 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 6225 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks); 6226 6227 if (device->physical_device->rad_info.gfx_level >= GFX11) { 6228 result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) | 6229 S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) | 6230 S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level)); 6231 } else { 6232 result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(independent_128b_blocks); 6233 } 6234 6235 return result; 6236} 6237 6238void 6239radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb, 6240 struct radv_image_view *iview) 6241{ 6242 const struct util_format_description *desc; 6243 unsigned ntype, format, swap, endian; 6244 unsigned blend_clamp = 0, blend_bypass = 0; 6245 uint64_t va; 6246 const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id]; 6247 const struct radeon_surf *surf = &plane->surface; 6248 6249 desc = vk_format_description(iview->vk.format); 6250 6251 memset(cb, 0, sizeof(*cb)); 6252 6253 /* Intensity is implemented as Red, so treat it that way. */ 6254 if (device->physical_device->rad_info.gfx_level >= GFX11) 6255 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1); 6256 else 6257 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1); 6258 6259 uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0; 6260 va = radv_buffer_get_va(iview->image->bindings[plane_id].bo) + 6261 iview->image->bindings[plane_id].offset; 6262 6263 cb->cb_color_base = va >> 8; 6264 6265 if (device->physical_device->rad_info.gfx_level >= GFX9) { 6266 if (device->physical_device->rad_info.gfx_level >= GFX11) { 6267 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | 6268 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned); 6269 } else if (device->physical_device->rad_info.gfx_level >= GFX10) { 6270 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | 6271 S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) | 6272 S_028EE0_CMASK_PIPE_ALIGNED(1) | 6273 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned); 6274 } else { 6275 struct gfx9_surf_meta_flags meta = { 6276 .rb_aligned = 1, 6277 .pipe_aligned = 1, 6278 }; 6279 6280 if (surf->meta_offset) 6281 meta = surf->u.gfx9.color.dcc; 6282 6283 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | 6284 S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) | 6285 S_028C74_RB_ALIGNED(meta.rb_aligned) | 6286 S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 6287 cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch); 6288 } 6289 6290 cb->cb_color_base += surf->u.gfx9.surf_offset >> 8; 6291 cb->cb_color_base |= surf->tile_swizzle; 6292 } else { 6293 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->vk.base_mip_level]; 6294 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 6295 6296 cb->cb_color_base += level_info->offset_256B; 6297 if (level_info->mode == RADEON_SURF_MODE_2D) 6298 cb->cb_color_base |= surf->tile_swizzle; 6299 6300 pitch_tile_max = level_info->nblk_x / 8 - 1; 6301 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1; 6302 tile_mode_index = si_tile_mode_index(plane, iview->vk.base_mip_level, false); 6303 6304 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 6305 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 6306 cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max; 6307 6308 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 6309 6310 if (radv_image_has_fmask(iview->image)) { 6311 if (device->physical_device->rad_info.gfx_level >= GFX7) 6312 cb->cb_color_pitch |= 6313 S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1); 6314 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index); 6315 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max); 6316 } else { 6317 /* This must be set for fast clear to work without FMASK. */ 6318 if (device->physical_device->rad_info.gfx_level >= GFX7) 6319 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 6320 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 6321 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 6322 } 6323 } 6324 6325 /* CMASK variables */ 6326 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset; 6327 va += surf->cmask_offset; 6328 cb->cb_color_cmask = va >> 8; 6329 6330 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset; 6331 va += surf->meta_offset; 6332 6333 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && 6334 device->physical_device->rad_info.gfx_level <= GFX8) 6335 va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset; 6336 6337 unsigned dcc_tile_swizzle = surf->tile_swizzle; 6338 dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8; 6339 6340 cb->cb_dcc_base = va >> 8; 6341 cb->cb_dcc_base |= dcc_tile_swizzle; 6342 6343 /* GFX10 field has the same base shift as the GFX6 field. */ 6344 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1; 6345 cb->cb_color_view = 6346 S_028C6C_SLICE_START(iview->vk.base_array_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice); 6347 6348 if (iview->image->info.samples > 1) { 6349 unsigned log_samples = util_logbase2(iview->image->info.samples); 6350 6351 if (device->physical_device->rad_info.gfx_level >= GFX11) 6352 cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples); 6353 else 6354 cb->cb_color_attrib |= 6355 S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples); 6356 } 6357 6358 if (radv_image_has_fmask(iview->image)) { 6359 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + 6360 surf->fmask_offset; 6361 cb->cb_color_fmask = va >> 8; 6362 cb->cb_color_fmask |= surf->fmask_tile_swizzle; 6363 } else { 6364 cb->cb_color_fmask = cb->cb_color_base; 6365 } 6366 6367 ntype = radv_translate_color_numformat(iview->vk.format, desc, 6368 vk_format_get_first_non_void_channel(iview->vk.format)); 6369 format = radv_translate_colorformat(iview->vk.format); 6370 assert(format != V_028C70_COLOR_INVALID); 6371 6372 swap = radv_translate_colorswap(iview->vk.format, false); 6373 endian = radv_colorformat_endian_swap(format); 6374 6375 /* blend clamp should be set for all NORM/SRGB types */ 6376 if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || 6377 ntype == V_028C70_NUMBER_SRGB) 6378 blend_clamp = 1; 6379 6380 /* set blend bypass according to docs if SINT/UINT or 6381 8/24 COLOR variants */ 6382 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 6383 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 6384 format == V_028C70_COLOR_X24_8_32_FLOAT) { 6385 blend_clamp = 0; 6386 blend_bypass = 1; 6387 } 6388#if 0 6389 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 6390 (format == V_028C70_COLOR_8 || 6391 format == V_028C70_COLOR_8_8 || 6392 format == V_028C70_COLOR_8_8_8_8)) 6393 ->color_is_int8 = true; 6394#endif 6395 cb->cb_color_info = 6396 S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | 6397 S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) | 6398 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM && 6399 ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 && 6400 format != V_028C70_COLOR_24_8) | 6401 S_028C70_NUMBER_TYPE(ntype); 6402 6403 if (device->physical_device->rad_info.gfx_level >= GFX11) 6404 cb->cb_color_info |= S_028C70_FORMAT_GFX11(format); 6405 else 6406 cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian); 6407 6408 if (radv_image_has_fmask(iview->image)) { 6409 cb->cb_color_info |= S_028C70_COMPRESSION(1); 6410 if (device->physical_device->rad_info.gfx_level == GFX6) { 6411 unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh); 6412 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 6413 } 6414 6415 if (radv_image_is_tc_compat_cmask(iview->image)) { 6416 /* Allow the texture block to read FMASK directly 6417 * without decompressing it. This bit must be cleared 6418 * when performing FMASK_DECOMPRESS or DCC_COMPRESS, 6419 * otherwise the operation doesn't happen. 6420 */ 6421 cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1); 6422 6423 if (device->physical_device->rad_info.gfx_level == GFX8) { 6424 /* Set CMASK into a tiling format that allows 6425 * the texture block to read it. 6426 */ 6427 cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2); 6428 } 6429 } 6430 } 6431 6432 if (radv_image_has_cmask(iview->image) && 6433 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)) 6434 cb->cb_color_info |= S_028C70_FAST_CLEAR(1); 6435 6436 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt && 6437 device->physical_device->rad_info.gfx_level < GFX11) 6438 cb->cb_color_info |= S_028C70_DCC_ENABLE(1); 6439 6440 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview); 6441 6442 /* This must be set for fast clear to work without FMASK. */ 6443 if (!radv_image_has_fmask(iview->image) && device->physical_device->rad_info.gfx_level == GFX6) { 6444 unsigned bankh = util_logbase2(surf->u.legacy.bankh); 6445 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 6446 } 6447 6448 if (device->physical_device->rad_info.gfx_level >= GFX9) { 6449 unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D 6450 ? (iview->extent.depth - 1) 6451 : (iview->image->info.array_size - 1); 6452 unsigned width = 6453 vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width); 6454 unsigned height = 6455 vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height); 6456 6457 if (device->physical_device->rad_info.gfx_level >= GFX10) { 6458 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->vk.base_mip_level); 6459 6460 cb->cb_color_attrib3 |= 6461 S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) | 6462 S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1); 6463 } else { 6464 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level); 6465 cb->cb_color_attrib |= 6466 S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type); 6467 } 6468 6469 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) | 6470 S_028C68_MAX_MIP(iview->image->info.levels - 1); 6471 } 6472} 6473 6474static unsigned 6475radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview) 6476{ 6477 unsigned max_zplanes = 0; 6478 6479 assert(radv_image_is_tc_compat_htile(iview->image)); 6480 6481 if (device->physical_device->rad_info.gfx_level >= GFX9) { 6482 /* Default value for 32-bit depth surfaces. */ 6483 max_zplanes = 4; 6484 6485 if (iview->vk.format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1) 6486 max_zplanes = 2; 6487 6488 /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */ 6489 if (device->physical_device->rad_info.has_two_planes_iterate256_bug && 6490 radv_image_get_iterate256(device, iview->image) && 6491 !radv_image_tile_stencil_disabled(device, iview->image) && 6492 iview->image->info.samples == 4) { 6493 max_zplanes = 1; 6494 } 6495 6496 max_zplanes = max_zplanes + 1; 6497 } else { 6498 if (iview->vk.format == VK_FORMAT_D16_UNORM) { 6499 /* Do not enable Z plane compression for 16-bit depth 6500 * surfaces because isn't supported on GFX8. Only 6501 * 32-bit depth surfaces are supported by the hardware. 6502 * This allows to maintain shader compatibility and to 6503 * reduce the number of depth decompressions. 6504 */ 6505 max_zplanes = 1; 6506 } else { 6507 if (iview->image->info.samples <= 1) 6508 max_zplanes = 5; 6509 else if (iview->image->info.samples <= 4) 6510 max_zplanes = 3; 6511 else 6512 max_zplanes = 2; 6513 } 6514 } 6515 6516 return max_zplanes; 6517} 6518 6519void 6520radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, 6521 struct radv_ds_buffer_info *ds) 6522{ 6523 const struct radeon_surf *surf = &image->planes[0].surface; 6524 6525 assert(image->vk.format == VK_FORMAT_D16_UNORM); 6526 memset(ds, 0, sizeof(*ds)); 6527 6528 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 6529 6530 ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) | 6531 S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | 6532 S_028038_ZRANGE_PRECISION(1) | 6533 S_028038_TILE_SURFACE_ENABLE(1); 6534 ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID); 6535 6536 ds->db_depth_size = S_02801C_X_MAX(image->info.width - 1) | 6537 S_02801C_Y_MAX(image->info.height - 1); 6538 6539 ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8; 6540 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) | 6541 S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING); 6542} 6543 6544void 6545radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds, 6546 struct radv_image_view *iview) 6547{ 6548 unsigned level = iview->vk.base_mip_level; 6549 unsigned format, stencil_format; 6550 uint64_t va, s_offs, z_offs; 6551 bool stencil_only = iview->image->vk.format == VK_FORMAT_S8_UINT; 6552 const struct radv_image_plane *plane = &iview->image->planes[0]; 6553 const struct radeon_surf *surf = &plane->surface; 6554 6555 assert(vk_format_get_plane_count(iview->image->vk.format) == 1); 6556 6557 memset(ds, 0, sizeof(*ds)); 6558 if (!device->instance->absolute_depth_bias) { 6559 switch (iview->image->vk.format) { 6560 case VK_FORMAT_D24_UNORM_S8_UINT: 6561 case VK_FORMAT_X8_D24_UNORM_PACK32: 6562 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 6563 break; 6564 case VK_FORMAT_D16_UNORM: 6565 case VK_FORMAT_D16_UNORM_S8_UINT: 6566 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 6567 break; 6568 case VK_FORMAT_D32_SFLOAT: 6569 case VK_FORMAT_D32_SFLOAT_S8_UINT: 6570 ds->pa_su_poly_offset_db_fmt_cntl = 6571 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 6572 break; 6573 default: 6574 break; 6575 } 6576 } 6577 6578 format = radv_translate_dbformat(iview->image->vk.format); 6579 stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 6580 6581 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1; 6582 ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | 6583 S_028008_SLICE_MAX(max_slice); 6584 if (device->physical_device->rad_info.gfx_level >= GFX10) { 6585 ds->db_depth_view |= S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | 6586 S_028008_SLICE_MAX_HI(max_slice >> 11); 6587 } 6588 6589 ds->db_htile_data_base = 0; 6590 ds->db_htile_surface = 0; 6591 6592 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset; 6593 s_offs = z_offs = va; 6594 6595 if (device->physical_device->rad_info.gfx_level >= GFX9) { 6596 assert(surf->u.gfx9.surf_offset == 0); 6597 s_offs += surf->u.gfx9.zs.stencil_offset; 6598 6599 ds->db_z_info = S_028038_FORMAT(format) | 6600 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) | 6601 S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | 6602 S_028038_MAXMIP(iview->image->info.levels - 1) | 6603 S_028038_ZRANGE_PRECISION(1) | 6604 S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11); 6605 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | 6606 S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) | 6607 S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11); 6608 6609 if (device->physical_device->rad_info.gfx_level == GFX9) { 6610 ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch); 6611 ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch); 6612 } 6613 6614 ds->db_depth_view |= S_028008_MIPID(level); 6615 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) | 6616 S_02801C_Y_MAX(iview->image->info.height - 1); 6617 6618 if (radv_htile_enabled(iview->image, level)) { 6619 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1); 6620 6621 if (radv_image_is_tc_compat_htile(iview->image)) { 6622 unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview); 6623 6624 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes); 6625 6626 if (device->physical_device->rad_info.gfx_level >= GFX10) { 6627 bool iterate256 = radv_image_get_iterate256(device, iview->image); 6628 6629 ds->db_z_info |= S_028040_ITERATE_FLUSH(1); 6630 ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1); 6631 ds->db_z_info |= S_028040_ITERATE_256(iterate256); 6632 ds->db_stencil_info |= S_028044_ITERATE_256(iterate256); 6633 } else { 6634 ds->db_z_info |= S_028038_ITERATE_FLUSH(1); 6635 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1); 6636 } 6637 } 6638 6639 if (radv_image_tile_stencil_disabled(device, iview->image)) { 6640 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1); 6641 } 6642 6643 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + 6644 surf->meta_offset; 6645 ds->db_htile_data_base = va >> 8; 6646 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); 6647 6648 if (device->physical_device->rad_info.gfx_level == GFX9) { 6649 ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1); 6650 } 6651 6652 if (radv_image_has_vrs_htile(device, iview->image)) { 6653 ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING); 6654 } 6655 } 6656 } else { 6657 const struct legacy_surf_level *level_info = &surf->u.legacy.level[level]; 6658 6659 if (stencil_only) 6660 level_info = &surf->u.legacy.zs.stencil_level[level]; 6661 6662 z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256; 6663 s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256; 6664 6665 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image)); 6666 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1); 6667 ds->db_stencil_info = S_028044_FORMAT(stencil_format); 6668 6669 if (iview->image->info.samples > 1) 6670 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples)); 6671 6672 if (device->physical_device->rad_info.gfx_level >= GFX7) { 6673 struct radeon_info *info = &device->physical_device->rad_info; 6674 unsigned tiling_index = surf->u.legacy.tiling_index[level]; 6675 unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level]; 6676 unsigned macro_index = surf->u.legacy.macro_tile_index; 6677 unsigned tile_mode = info->si_tile_mode_array[tiling_index]; 6678 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 6679 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 6680 6681 if (stencil_only) 6682 tile_mode = stencil_tile_mode; 6683 6684 ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 6685 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 6686 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 6687 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 6688 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 6689 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 6690 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 6691 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 6692 } else { 6693 unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false); 6694 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 6695 tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true); 6696 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 6697 if (stencil_only) 6698 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 6699 } 6700 6701 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | 6702 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1); 6703 ds->db_depth_slice = 6704 S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1); 6705 6706 if (radv_htile_enabled(iview->image, level)) { 6707 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1); 6708 6709 if (radv_image_tile_stencil_disabled(device, iview->image)) { 6710 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); 6711 } 6712 6713 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + 6714 surf->meta_offset; 6715 ds->db_htile_data_base = va >> 8; 6716 ds->db_htile_surface = S_028ABC_FULL_CACHE(1); 6717 6718 if (radv_image_is_tc_compat_htile(iview->image)) { 6719 unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview); 6720 6721 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 6722 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes); 6723 } 6724 } 6725 } 6726 6727 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8; 6728 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8; 6729} 6730 6731static unsigned 6732radv_tex_wrap(VkSamplerAddressMode address_mode) 6733{ 6734 switch (address_mode) { 6735 case VK_SAMPLER_ADDRESS_MODE_REPEAT: 6736 return V_008F30_SQ_TEX_WRAP; 6737 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: 6738 return V_008F30_SQ_TEX_MIRROR; 6739 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: 6740 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 6741 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: 6742 return V_008F30_SQ_TEX_CLAMP_BORDER; 6743 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: 6744 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 6745 default: 6746 unreachable("illegal tex wrap mode"); 6747 break; 6748 } 6749} 6750 6751static unsigned 6752radv_tex_compare(VkCompareOp op) 6753{ 6754 switch (op) { 6755 case VK_COMPARE_OP_NEVER: 6756 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 6757 case VK_COMPARE_OP_LESS: 6758 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 6759 case VK_COMPARE_OP_EQUAL: 6760 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 6761 case VK_COMPARE_OP_LESS_OR_EQUAL: 6762 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 6763 case VK_COMPARE_OP_GREATER: 6764 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 6765 case VK_COMPARE_OP_NOT_EQUAL: 6766 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 6767 case VK_COMPARE_OP_GREATER_OR_EQUAL: 6768 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 6769 case VK_COMPARE_OP_ALWAYS: 6770 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 6771 default: 6772 unreachable("illegal compare mode"); 6773 break; 6774 } 6775} 6776 6777static unsigned 6778radv_tex_filter(VkFilter filter, unsigned max_ansio) 6779{ 6780 switch (filter) { 6781 case VK_FILTER_NEAREST: 6782 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 6783 : V_008F38_SQ_TEX_XY_FILTER_POINT); 6784 case VK_FILTER_LINEAR: 6785 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 6786 : V_008F38_SQ_TEX_XY_FILTER_BILINEAR); 6787 case VK_FILTER_CUBIC_EXT: 6788 default: 6789 fprintf(stderr, "illegal texture filter"); 6790 return 0; 6791 } 6792} 6793 6794static unsigned 6795radv_tex_mipfilter(VkSamplerMipmapMode mode) 6796{ 6797 switch (mode) { 6798 case VK_SAMPLER_MIPMAP_MODE_NEAREST: 6799 return V_008F38_SQ_TEX_Z_FILTER_POINT; 6800 case VK_SAMPLER_MIPMAP_MODE_LINEAR: 6801 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 6802 default: 6803 return V_008F38_SQ_TEX_Z_FILTER_NONE; 6804 } 6805} 6806 6807static unsigned 6808radv_tex_bordercolor(VkBorderColor bcolor) 6809{ 6810 switch (bcolor) { 6811 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: 6812 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: 6813 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 6814 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: 6815 case VK_BORDER_COLOR_INT_OPAQUE_BLACK: 6816 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 6817 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: 6818 case VK_BORDER_COLOR_INT_OPAQUE_WHITE: 6819 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 6820 case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: 6821 case VK_BORDER_COLOR_INT_CUSTOM_EXT: 6822 return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER; 6823 default: 6824 break; 6825 } 6826 return 0; 6827} 6828 6829static unsigned 6830radv_tex_aniso_filter(unsigned filter) 6831{ 6832 return MIN2(util_logbase2(filter), 4); 6833} 6834 6835static unsigned 6836radv_tex_filter_mode(VkSamplerReductionMode mode) 6837{ 6838 switch (mode) { 6839 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE: 6840 return V_008F30_SQ_IMG_FILTER_MODE_BLEND; 6841 case VK_SAMPLER_REDUCTION_MODE_MIN: 6842 return V_008F30_SQ_IMG_FILTER_MODE_MIN; 6843 case VK_SAMPLER_REDUCTION_MODE_MAX: 6844 return V_008F30_SQ_IMG_FILTER_MODE_MAX; 6845 default: 6846 break; 6847 } 6848 return 0; 6849} 6850 6851static uint32_t 6852radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo) 6853{ 6854 if (device->force_aniso >= 0) 6855 return device->force_aniso; 6856 6857 if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f) 6858 return (uint32_t)pCreateInfo->maxAnisotropy; 6859 6860 return 0; 6861} 6862 6863static uint32_t 6864radv_register_border_color(struct radv_device *device, VkClearColorValue value) 6865{ 6866 uint32_t slot; 6867 6868 mtx_lock(&device->border_color_data.mutex); 6869 6870 for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) { 6871 if (!device->border_color_data.used[slot]) { 6872 /* Copy to the GPU wrt endian-ness. */ 6873 util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value, 6874 sizeof(VkClearColorValue)); 6875 6876 device->border_color_data.used[slot] = true; 6877 break; 6878 } 6879 } 6880 6881 mtx_unlock(&device->border_color_data.mutex); 6882 6883 return slot; 6884} 6885 6886static void 6887radv_unregister_border_color(struct radv_device *device, uint32_t slot) 6888{ 6889 mtx_lock(&device->border_color_data.mutex); 6890 6891 device->border_color_data.used[slot] = false; 6892 6893 mtx_unlock(&device->border_color_data.mutex); 6894} 6895 6896static void 6897radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, 6898 const VkSamplerCreateInfo *pCreateInfo) 6899{ 6900 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); 6901 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); 6902 bool compat_mode = device->physical_device->rad_info.gfx_level == GFX8 || 6903 device->physical_device->rad_info.gfx_level == GFX9; 6904 unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND; 6905 unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 6906 bool trunc_coord = 6907 pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST; 6908 bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || 6909 pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || 6910 pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; 6911 VkBorderColor border_color = 6912 uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; 6913 uint32_t border_color_ptr; 6914 bool disable_cube_wrap = pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT; 6915 6916 const struct VkSamplerReductionModeCreateInfo *sampler_reduction = 6917 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO); 6918 if (sampler_reduction) 6919 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode); 6920 6921 if (pCreateInfo->compareEnable) 6922 depth_compare_func = radv_tex_compare(pCreateInfo->compareOp); 6923 6924 sampler->border_color_slot = RADV_BORDER_COLOR_COUNT; 6925 6926 if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || 6927 border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) { 6928 const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color = 6929 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); 6930 6931 assert(custom_border_color); 6932 6933 sampler->border_color_slot = 6934 radv_register_border_color(device, custom_border_color->customBorderColor); 6935 6936 /* Did we fail to find a slot? */ 6937 if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) { 6938 fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n"); 6939 border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; 6940 } 6941 } 6942 6943 /* If we don't have a custom color, set the ptr to 0 */ 6944 border_color_ptr = 6945 sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0; 6946 6947 sampler->state[0] = 6948 (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) | 6949 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) | 6950 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) | 6951 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) | 6952 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) | 6953 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | 6954 S_008F30_DISABLE_CUBE_WRAP(disable_cube_wrap) | S_008F30_COMPAT_MODE(compat_mode) | 6955 S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord)); 6956 sampler->state[1] = (S_008F34_MIN_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->minLod, 0, 15), 8)) | 6957 S_008F34_MAX_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) | 6958 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 6959 sampler->state[2] = (S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | 6960 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) | 6961 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) | 6962 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode))); 6963 sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)); 6964 6965 if (device->physical_device->rad_info.gfx_level >= GFX10) { 6966 sampler->state[2] |= 6967 S_008F38_ANISO_OVERRIDE_GFX10(device->instance->disable_aniso_single_level); 6968 } else { 6969 sampler->state[2] |= 6970 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.gfx_level <= GFX8) | 6971 S_008F38_FILTER_PREC_FIX(1) | 6972 S_008F38_ANISO_OVERRIDE_GFX8(device->instance->disable_aniso_single_level && 6973 device->physical_device->rad_info.gfx_level >= GFX8); 6974 } 6975 6976 if (device->physical_device->rad_info.gfx_level >= GFX11) { 6977 sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(border_color_ptr); 6978 } else { 6979 sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr); 6980 } 6981} 6982 6983VKAPI_ATTR VkResult VKAPI_CALL 6984radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo, 6985 const VkAllocationCallbacks *pAllocator, VkSampler *pSampler) 6986{ 6987 RADV_FROM_HANDLE(radv_device, device, _device); 6988 struct radv_sampler *sampler; 6989 6990 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion = 6991 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO); 6992 6993 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); 6994 6995 sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8, 6996 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 6997 if (!sampler) 6998 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 6999 7000 vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER); 7001 7002 radv_init_sampler(device, sampler, pCreateInfo); 7003 7004 sampler->ycbcr_sampler = 7005 ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) 7006 : NULL; 7007 *pSampler = radv_sampler_to_handle(sampler); 7008 7009 return VK_SUCCESS; 7010} 7011 7012VKAPI_ATTR void VKAPI_CALL 7013radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator) 7014{ 7015 RADV_FROM_HANDLE(radv_device, device, _device); 7016 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler); 7017 7018 if (!sampler) 7019 return; 7020 7021 if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT) 7022 radv_unregister_border_color(device, sampler->border_color_slot); 7023 7024 vk_object_base_finish(&sampler->base); 7025 vk_free2(&device->vk.alloc, pAllocator, sampler); 7026} 7027 7028PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 7029vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) 7030{ 7031 /* For the full details on loader interface versioning, see 7032 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. 7033 * What follows is a condensed summary, to help you navigate the large and 7034 * confusing official doc. 7035 * 7036 * - Loader interface v0 is incompatible with later versions. We don't 7037 * support it. 7038 * 7039 * - In loader interface v1: 7040 * - The first ICD entrypoint called by the loader is 7041 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this 7042 * entrypoint. 7043 * - The ICD must statically expose no other Vulkan symbol unless it is 7044 * linked with -Bsymbolic. 7045 * - Each dispatchable Vulkan handle created by the ICD must be 7046 * a pointer to a struct whose first member is VK_LOADER_DATA. The 7047 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. 7048 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and 7049 * vkDestroySurfaceKHR(). The ICD must be capable of working with 7050 * such loader-managed surfaces. 7051 * 7052 * - Loader interface v2 differs from v1 in: 7053 * - The first ICD entrypoint called by the loader is 7054 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must 7055 * statically expose this entrypoint. 7056 * 7057 * - Loader interface v3 differs from v2 in: 7058 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), 7059 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, 7060 * because the loader no longer does so. 7061 * 7062 * - Loader interface v4 differs from v3 in: 7063 * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr(). 7064 * 7065 * - Loader interface v5 differs from v4 in: 7066 * - The ICD must support Vulkan API version 1.1 and must not return 7067 * VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a 7068 * Vulkan Loader with interface v4 or smaller is being used and the 7069 * application provides an API version that is greater than 1.0. 7070 */ 7071 *pSupportedVersion = MIN2(*pSupportedVersion, 5u); 7072 return VK_SUCCESS; 7073} 7074 7075VKAPI_ATTR VkResult VKAPI_CALL 7076radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD) 7077{ 7078 RADV_FROM_HANDLE(radv_device, device, _device); 7079 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory); 7080 7081 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); 7082 7083 /* At the moment, we support only the below handle types. */ 7084 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || 7085 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 7086 7087 bool ret = radv_get_memory_fd(device, memory, pFD); 7088 if (ret == false) 7089 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 7090 return VK_SUCCESS; 7091} 7092 7093static uint32_t 7094radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev, 7095 enum radeon_bo_domain domains, enum radeon_bo_flag flags, 7096 enum radeon_bo_flag ignore_flags) 7097{ 7098 /* Don't count GTT/CPU as relevant: 7099 * 7100 * - We're not fully consistent between the two. 7101 * - Sometimes VRAM gets VRAM|GTT. 7102 */ 7103 const enum radeon_bo_domain relevant_domains = 7104 RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA; 7105 uint32_t bits = 0; 7106 for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) { 7107 if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains)) 7108 continue; 7109 7110 if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags)) 7111 continue; 7112 7113 bits |= 1u << i; 7114 } 7115 7116 return bits; 7117} 7118 7119static uint32_t 7120radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains, 7121 enum radeon_bo_flag flags) 7122{ 7123 enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC); 7124 uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags); 7125 7126 if (!bits) { 7127 ignore_flags |= RADEON_FLAG_GTT_WC; 7128 bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags); 7129 } 7130 7131 if (!bits) { 7132 ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS; 7133 bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags); 7134 } 7135 7136 /* Avoid 32-bit memory types for shared memory. */ 7137 bits &= ~dev->memory_types_32bit; 7138 7139 return bits; 7140} 7141VKAPI_ATTR VkResult VKAPI_CALL 7142radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, 7143 int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties) 7144{ 7145 RADV_FROM_HANDLE(radv_device, device, _device); 7146 7147 switch (handleType) { 7148 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: { 7149 enum radeon_bo_domain domains; 7150 enum radeon_bo_flag flags; 7151 if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags)) 7152 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); 7153 7154 pMemoryFdProperties->memoryTypeBits = 7155 radv_compute_valid_memory_types(device->physical_device, domains, flags); 7156 return VK_SUCCESS; 7157 } 7158 default: 7159 /* The valid usage section for this function says: 7160 * 7161 * "handleType must not be one of the handle types defined as 7162 * opaque." 7163 * 7164 * So opaque handle types fall into the default "unsupported" case. 7165 */ 7166 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); 7167 } 7168} 7169 7170VKAPI_ATTR void VKAPI_CALL 7171radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex, 7172 uint32_t localDeviceIndex, uint32_t remoteDeviceIndex, 7173 VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) 7174{ 7175 assert(localDeviceIndex == remoteDeviceIndex); 7176 7177 *pPeerMemoryFeatures = 7178 VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | 7179 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; 7180} 7181 7182static const VkTimeDomainEXT radv_time_domains[] = { 7183 VK_TIME_DOMAIN_DEVICE_EXT, 7184 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT, 7185#ifdef CLOCK_MONOTONIC_RAW 7186 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT, 7187#endif 7188}; 7189 7190VKAPI_ATTR VkResult VKAPI_CALL 7191radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice, 7192 uint32_t *pTimeDomainCount, 7193 VkTimeDomainEXT *pTimeDomains) 7194{ 7195 int d; 7196 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount); 7197 7198 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) { 7199 vk_outarray_append_typed(VkTimeDomainEXT, &out, i) 7200 { 7201 *i = radv_time_domains[d]; 7202 } 7203 } 7204 7205 return vk_outarray_status(&out); 7206} 7207 7208#ifndef _WIN32 7209static uint64_t 7210radv_clock_gettime(clockid_t clock_id) 7211{ 7212 struct timespec current; 7213 int ret; 7214 7215 ret = clock_gettime(clock_id, ¤t); 7216#ifdef CLOCK_MONOTONIC_RAW 7217 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW) 7218 ret = clock_gettime(CLOCK_MONOTONIC, ¤t); 7219#endif 7220 if (ret < 0) 7221 return 0; 7222 7223 return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec; 7224} 7225 7226VKAPI_ATTR VkResult VKAPI_CALL 7227radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount, 7228 const VkCalibratedTimestampInfoEXT *pTimestampInfos, 7229 uint64_t *pTimestamps, uint64_t *pMaxDeviation) 7230{ 7231 RADV_FROM_HANDLE(radv_device, device, _device); 7232 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq; 7233 int d; 7234 uint64_t begin, end; 7235 uint64_t max_clock_period = 0; 7236 7237#ifdef CLOCK_MONOTONIC_RAW 7238 begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW); 7239#else 7240 begin = radv_clock_gettime(CLOCK_MONOTONIC); 7241#endif 7242 7243 for (d = 0; d < timestampCount; d++) { 7244 switch (pTimestampInfos[d].timeDomain) { 7245 case VK_TIME_DOMAIN_DEVICE_EXT: 7246 pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP); 7247 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq); 7248 max_clock_period = MAX2(max_clock_period, device_period); 7249 break; 7250 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT: 7251 pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC); 7252 max_clock_period = MAX2(max_clock_period, 1); 7253 break; 7254 7255#ifdef CLOCK_MONOTONIC_RAW 7256 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT: 7257 pTimestamps[d] = begin; 7258 break; 7259#endif 7260 default: 7261 pTimestamps[d] = 0; 7262 break; 7263 } 7264 } 7265 7266#ifdef CLOCK_MONOTONIC_RAW 7267 end = radv_clock_gettime(CLOCK_MONOTONIC_RAW); 7268#else 7269 end = radv_clock_gettime(CLOCK_MONOTONIC); 7270#endif 7271 7272 /* 7273 * The maximum deviation is the sum of the interval over which we 7274 * perform the sampling and the maximum period of any sampled 7275 * clock. That's because the maximum skew between any two sampled 7276 * clock edges is when the sampled clock with the largest period is 7277 * sampled at the end of that period but right at the beginning of the 7278 * sampling interval and some other clock is sampled right at the 7279 * begining of its sampling period and right at the end of the 7280 * sampling interval. Let's assume the GPU has the longest clock 7281 * period and that the application is sampling GPU and monotonic: 7282 * 7283 * s e 7284 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f 7285 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- 7286 * 7287 * g 7288 * 0 1 2 3 7289 * GPU -----_____-----_____-----_____-----_____ 7290 * 7291 * m 7292 * x y z 0 1 2 3 4 5 6 7 8 9 a b c 7293 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_- 7294 * 7295 * Interval <-----------------> 7296 * Deviation <--------------------------> 7297 * 7298 * s = read(raw) 2 7299 * g = read(GPU) 1 7300 * m = read(monotonic) 2 7301 * e = read(raw) b 7302 * 7303 * We round the sample interval up by one tick to cover sampling error 7304 * in the interval clock 7305 */ 7306 7307 uint64_t sample_interval = end - begin + 1; 7308 7309 *pMaxDeviation = sample_interval + max_clock_period; 7310 7311 return VK_SUCCESS; 7312} 7313#endif 7314 7315VKAPI_ATTR void VKAPI_CALL 7316radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, 7317 VkSampleCountFlagBits samples, 7318 VkMultisamplePropertiesEXT *pMultisampleProperties) 7319{ 7320 VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | 7321 VK_SAMPLE_COUNT_8_BIT; 7322 7323 if (samples & supported_samples) { 7324 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2}; 7325 } else { 7326 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0}; 7327 } 7328} 7329 7330VKAPI_ATTR VkResult VKAPI_CALL 7331radv_GetPhysicalDeviceFragmentShadingRatesKHR( 7332 VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount, 7333 VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates) 7334{ 7335 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates, 7336 pFragmentShadingRateCount); 7337 7338#define append_rate(w, h, s) \ 7339 { \ 7340 VkPhysicalDeviceFragmentShadingRateKHR rate = { \ 7341 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \ 7342 .sampleCounts = s, \ 7343 .fragmentSize = {.width = w, .height = h}, \ 7344 }; \ 7345 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \ 7346 } 7347 7348 for (uint32_t x = 2; x >= 1; x--) { 7349 for (uint32_t y = 2; y >= 1; y--) { 7350 VkSampleCountFlagBits samples; 7351 7352 if (x == 1 && y == 1) { 7353 samples = ~0; 7354 } else { 7355 samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | 7356 VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; 7357 } 7358 7359 append_rate(x, y, samples); 7360 } 7361 } 7362#undef append_rate 7363 7364 return vk_outarray_status(&out); 7365} 7366 7367static bool 7368radv_thread_trace_set_pstate(struct radv_device *device, bool enable) 7369{ 7370 struct radeon_winsys *ws = device->ws; 7371 enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE; 7372 7373 if (device->physical_device->rad_info.has_stable_pstate) { 7374 /* pstate is per-device; setting it for one ctx is sufficient. 7375 * We pick the first initialized one below. */ 7376 for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) 7377 if (device->hw_ctx[i]) 7378 return ws->ctx_set_pstate(device->hw_ctx[i], pstate) >= 0; 7379 } 7380 7381 return true; 7382} 7383 7384bool 7385radv_device_acquire_performance_counters(struct radv_device *device) 7386{ 7387 bool result = true; 7388 simple_mtx_lock(&device->pstate_mtx); 7389 7390 if (device->pstate_cnt == 0) { 7391 result = radv_thread_trace_set_pstate(device, true); 7392 if (result) 7393 ++device->pstate_cnt; 7394 } 7395 7396 simple_mtx_unlock(&device->pstate_mtx); 7397 return result; 7398} 7399 7400void 7401radv_device_release_performance_counters(struct radv_device *device) 7402{ 7403 simple_mtx_lock(&device->pstate_mtx); 7404 7405 if (--device->pstate_cnt == 0) 7406 radv_thread_trace_set_pstate(device, false); 7407 7408 simple_mtx_unlock(&device->pstate_mtx); 7409} 7410 7411VKAPI_ATTR VkResult VKAPI_CALL 7412radv_AcquireProfilingLockKHR(VkDevice _device, const VkAcquireProfilingLockInfoKHR *pInfo) 7413{ 7414 RADV_FROM_HANDLE(radv_device, device, _device); 7415 bool result = radv_device_acquire_performance_counters(device); 7416 return result ? VK_SUCCESS : VK_ERROR_UNKNOWN; 7417} 7418 7419VKAPI_ATTR void VKAPI_CALL 7420radv_ReleaseProfilingLockKHR(VkDevice _device) 7421{ 7422 RADV_FROM_HANDLE(radv_device, device, _device); 7423 radv_device_release_performance_counters(device); 7424} 7425