1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10#include "tu_device.h"
11
12#include <fcntl.h>
13#include <poll.h>
14#include <sys/sysinfo.h>
15
16#include "git_sha1.h"
17#include "util/debug.h"
18#include "util/disk_cache.h"
19#include "util/driconf.h"
20#include "util/os_misc.h"
21#include "vk_sampler.h"
22#include "vk_util.h"
23
24/* for fd_get_driver/device_uuid() */
25#include "freedreno/common/freedreno_uuid.h"
26
27#include "tu_clear_blit.h"
28#include "tu_cmd_buffer.h"
29#include "tu_cs.h"
30#include "tu_descriptor_set.h"
31#include "tu_dynamic_rendering.h"
32#include "tu_image.h"
33#include "tu_pass.h"
34#include "tu_query.h"
35#include "tu_tracepoints.h"
36#include "tu_wsi.h"
37
38#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
39     defined(VK_USE_PLATFORM_XCB_KHR) || \
40     defined(VK_USE_PLATFORM_XLIB_KHR) || \
41     defined(VK_USE_PLATFORM_DISPLAY_KHR)
42#define TU_HAS_SURFACE 1
43#else
44#define TU_HAS_SURFACE 0
45#endif
46
47
48static int
49tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
50{
51   struct mesa_sha1 ctx;
52   unsigned char sha1[20];
53   /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
54    * initialized until after compiler creation so we have to add it to the
55    * shader hash instead, since the compiler is only created with the logical
56    * device.
57    */
58   uint64_t driver_flags = device->instance->debug_flags & TU_DEBUG_NOMULTIPOS;
59   uint16_t family = fd_dev_gpu_id(&device->dev_id);
60
61   memset(uuid, 0, VK_UUID_SIZE);
62   _mesa_sha1_init(&ctx);
63
64   if (!disk_cache_get_function_identifier(tu_device_get_cache_uuid, &ctx))
65      return -1;
66
67   _mesa_sha1_update(&ctx, &family, sizeof(family));
68   _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
69   _mesa_sha1_final(&ctx, sha1);
70
71   memcpy(uuid, sha1, VK_UUID_SIZE);
72   return 0;
73}
74
75#define TU_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
76
77VKAPI_ATTR VkResult VKAPI_CALL
78tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
79{
80    *pApiVersion = TU_API_VERSION;
81    return VK_SUCCESS;
82}
83
84static const struct vk_instance_extension_table tu_instance_extensions_supported = {
85   .KHR_device_group_creation           = true,
86   .KHR_external_fence_capabilities     = true,
87   .KHR_external_memory_capabilities    = true,
88   .KHR_external_semaphore_capabilities = true,
89   .KHR_get_physical_device_properties2 = true,
90   .KHR_surface                         = TU_HAS_SURFACE,
91   .KHR_get_surface_capabilities2       = TU_HAS_SURFACE,
92   .EXT_debug_report                    = true,
93   .EXT_debug_utils                     = true,
94#ifdef VK_USE_PLATFORM_WAYLAND_KHR
95   .KHR_wayland_surface                 = true,
96#endif
97#ifdef VK_USE_PLATFORM_XCB_KHR
98   .KHR_xcb_surface                     = true,
99#endif
100#ifdef VK_USE_PLATFORM_XLIB_KHR
101   .KHR_xlib_surface                    = true,
102#endif
103#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
104   .EXT_acquire_xlib_display            = true,
105#endif
106#ifdef VK_USE_PLATFORM_DISPLAY_KHR
107   .KHR_display                         = true,
108   .KHR_get_display_properties2         = true,
109   .EXT_direct_mode_display             = true,
110   .EXT_display_surface_counter         = true,
111   .EXT_acquire_drm_display             = true,
112#endif
113};
114
115static void
116get_device_extensions(const struct tu_physical_device *device,
117                      struct vk_device_extension_table *ext)
118{
119   *ext = (struct vk_device_extension_table) {
120      .KHR_16bit_storage = device->info->a6xx.storage_16bit,
121      .KHR_bind_memory2 = true,
122      .KHR_copy_commands2 = true,
123      .KHR_create_renderpass2 = true,
124      .KHR_dedicated_allocation = true,
125      .KHR_depth_stencil_resolve = true,
126      .KHR_descriptor_update_template = true,
127      .KHR_device_group = true,
128      .KHR_draw_indirect_count = true,
129      .KHR_external_fence = true,
130      .KHR_external_fence_fd = true,
131      .KHR_external_memory = true,
132      .KHR_external_memory_fd = true,
133      .KHR_external_semaphore = true,
134      .KHR_external_semaphore_fd = true,
135      .KHR_format_feature_flags2 = true,
136      .KHR_get_memory_requirements2 = true,
137      .KHR_imageless_framebuffer = true,
138      .KHR_incremental_present = TU_HAS_SURFACE,
139      .KHR_image_format_list = true,
140      .KHR_maintenance1 = true,
141      .KHR_maintenance2 = true,
142      .KHR_maintenance3 = true,
143      .KHR_maintenance4 = true,
144      .KHR_multiview = true,
145      .KHR_performance_query = device->instance->debug_flags & TU_DEBUG_PERFC,
146      .KHR_pipeline_executable_properties = true,
147      .KHR_push_descriptor = true,
148      .KHR_relaxed_block_layout = true,
149      .KHR_sampler_mirror_clamp_to_edge = true,
150      .KHR_sampler_ycbcr_conversion = true,
151      .KHR_shader_draw_parameters = true,
152      .KHR_shader_float_controls = true,
153      .KHR_shader_float16_int8 = true,
154      .KHR_shader_subgroup_extended_types = true,
155      .KHR_shader_terminate_invocation = true,
156      .KHR_spirv_1_4 = true,
157      .KHR_storage_buffer_storage_class = true,
158      .KHR_swapchain = TU_HAS_SURFACE,
159      .KHR_swapchain_mutable_format = TU_HAS_SURFACE,
160      .KHR_uniform_buffer_standard_layout = true,
161      .KHR_variable_pointers = true,
162      .KHR_vulkan_memory_model = true,
163      .KHR_driver_properties = true,
164      .KHR_separate_depth_stencil_layouts = true,
165      .KHR_buffer_device_address = true,
166      .KHR_shader_integer_dot_product = true,
167      .KHR_zero_initialize_workgroup_memory = true,
168      .KHR_shader_non_semantic_info = true,
169      .KHR_synchronization2 = true,
170      .KHR_dynamic_rendering = true,
171#ifndef TU_USE_KGSL
172      .KHR_timeline_semaphore = true,
173#endif
174#ifdef VK_USE_PLATFORM_DISPLAY_KHR
175      .EXT_display_control = true,
176#endif
177      .EXT_external_memory_dma_buf = true,
178      .EXT_image_drm_format_modifier = true,
179      .EXT_sample_locations = device->info->a6xx.has_sample_locations,
180      .EXT_sampler_filter_minmax = true,
181      .EXT_transform_feedback = true,
182      .EXT_4444_formats = true,
183      .EXT_border_color_swizzle = true,
184      .EXT_conditional_rendering = true,
185      .EXT_custom_border_color = true,
186      .EXT_depth_clip_control = true,
187      .EXT_depth_clip_enable = true,
188      .EXT_descriptor_indexing = true,
189      .EXT_extended_dynamic_state = true,
190      .EXT_extended_dynamic_state2 = true,
191      .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
192      .EXT_host_query_reset = true,
193      .EXT_index_type_uint8 = true,
194      .EXT_memory_budget = true,
195      .EXT_primitive_topology_list_restart = true,
196      .EXT_private_data = true,
197      .EXT_queue_family_foreign = true,
198      .EXT_robustness2 = true,
199      .EXT_scalar_block_layout = true,
200      .EXT_separate_stencil_usage = true,
201      .EXT_shader_demote_to_helper_invocation = true,
202      .EXT_shader_stencil_export = true,
203      .EXT_shader_viewport_index_layer = true,
204      .EXT_shader_module_identifier = true,
205      .EXT_texel_buffer_alignment = true,
206      .EXT_vertex_attribute_divisor = true,
207      .EXT_provoking_vertex = true,
208      .EXT_line_rasterization = true,
209      .EXT_subgroup_size_control = true,
210      .EXT_image_robustness = true,
211      .EXT_primitives_generated_query = true,
212      .EXT_image_view_min_lod = true,
213      .EXT_pipeline_creation_feedback = true,
214      .EXT_pipeline_creation_cache_control = true,
215#ifndef TU_USE_KGSL
216      .EXT_physical_device_drm = true,
217#endif
218      /* For Graphics Flight Recorder (GFR) */
219      .AMD_buffer_marker = true,
220      .ARM_rasterization_order_attachment_access = true,
221#ifdef ANDROID
222      .ANDROID_native_buffer = true,
223#endif
224      .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
225      .VALVE_mutable_descriptor_type = true,
226      .EXT_image_2d_view_of_3d = true,
227      .EXT_color_write_enable = true,
228      .EXT_load_store_op_none = true,
229   };
230}
231
232static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
233   &tu_shaders_ops,
234   NULL,
235};
236
237VkResult
238tu_physical_device_init(struct tu_physical_device *device,
239                        struct tu_instance *instance)
240{
241   VkResult result = VK_SUCCESS;
242
243   const char *fd_name = fd_dev_name(&device->dev_id);
244   if (strncmp(fd_name, "FD", 2) == 0) {
245      device->name = vk_asprintf(&instance->vk.alloc,
246                                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
247                                 "Turnip Adreno (TM) %s", &fd_name[2]);
248   } else {
249      device->name = vk_strdup(&instance->vk.alloc, fd_name,
250                               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
251
252   }
253   if (!device->name) {
254      return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
255                               "device name alloc fail");
256   }
257
258   const struct fd_dev_info *info = fd_dev_info(&device->dev_id);
259   if (!info) {
260      result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
261                                 "device %s is unsupported", device->name);
262      goto fail_free_name;
263   }
264   switch (fd_dev_gen(&device->dev_id)) {
265   case 6:
266      device->info = info;
267      device->ccu_offset_bypass = device->info->num_ccu * A6XX_CCU_DEPTH_SIZE;
268      device->ccu_offset_gmem = (device->gmem_size -
269         device->info->num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
270      break;
271   default:
272      result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
273                                 "device %s is unsupported", device->name);
274      goto fail_free_name;
275   }
276   if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
277      result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
278                                 "cannot generate UUID");
279      goto fail_free_name;
280   }
281
282   fd_get_driver_uuid(device->driver_uuid);
283   fd_get_device_uuid(device->device_uuid, &device->dev_id);
284
285   struct vk_device_extension_table supported_extensions;
286   get_device_extensions(device, &supported_extensions);
287
288   struct vk_physical_device_dispatch_table dispatch_table;
289   vk_physical_device_dispatch_table_from_entrypoints(
290      &dispatch_table, &tu_physical_device_entrypoints, true);
291   vk_physical_device_dispatch_table_from_entrypoints(
292      &dispatch_table, &wsi_physical_device_entrypoints, false);
293
294   result = vk_physical_device_init(&device->vk, &instance->vk,
295                                    &supported_extensions,
296                                    &dispatch_table);
297   if (result != VK_SUCCESS)
298      goto fail_free_name;
299
300   device->vk.supported_sync_types = device->sync_types;
301
302#if TU_HAS_SURFACE
303   result = tu_wsi_init(device);
304   if (result != VK_SUCCESS) {
305      vk_startup_errorf(instance, result, "WSI init failure");
306      vk_physical_device_finish(&device->vk);
307      goto fail_free_name;
308   }
309#endif
310
311   /* The gpu id is already embedded in the uuid so we just pass "tu"
312    * when creating the cache.
313    */
314   char buf[VK_UUID_SIZE * 2 + 1];
315   disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
316   device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
317
318   device->vk.pipeline_cache_import_ops = cache_import_ops;
319
320   return VK_SUCCESS;
321
322fail_free_name:
323   vk_free(&instance->vk.alloc, (void *)device->name);
324   return result;
325}
326
327static void
328tu_physical_device_finish(struct tu_physical_device *device)
329{
330#if TU_HAS_SURFACE
331   tu_wsi_finish(device);
332#endif
333
334   close(device->local_fd);
335   if (device->master_fd != -1)
336      close(device->master_fd);
337
338   vk_free(&device->instance->vk.alloc, (void *)device->name);
339
340   vk_physical_device_finish(&device->vk);
341}
342
343static const struct debug_control tu_debug_options[] = {
344   { "startup", TU_DEBUG_STARTUP },
345   { "nir", TU_DEBUG_NIR },
346   { "nobin", TU_DEBUG_NOBIN },
347   { "sysmem", TU_DEBUG_SYSMEM },
348   { "gmem", TU_DEBUG_GMEM },
349   { "forcebin", TU_DEBUG_FORCEBIN },
350   { "layout", TU_DEBUG_LAYOUT },
351   { "noubwc", TU_DEBUG_NOUBWC },
352   { "nomultipos", TU_DEBUG_NOMULTIPOS },
353   { "nolrz", TU_DEBUG_NOLRZ },
354   { "nolrzfc", TU_DEBUG_NOLRZFC },
355   { "perf", TU_DEBUG_PERF },
356   { "perfc", TU_DEBUG_PERFC },
357   { "flushall", TU_DEBUG_FLUSHALL },
358   { "syncdraw", TU_DEBUG_SYNCDRAW },
359   { "dontcare_as_load", TU_DEBUG_DONT_CARE_AS_LOAD },
360   { "rast_order", TU_DEBUG_RAST_ORDER },
361   { "unaligned_store", TU_DEBUG_UNALIGNED_STORE },
362   { "log_skip_gmem_ops", TU_DEBUG_LOG_SKIP_GMEM_OPS },
363   { "dynamic", TU_DEBUG_DYNAMIC },
364   { NULL, 0 }
365};
366
367const char *
368tu_get_debug_option_name(int id)
369{
370   assert(id < ARRAY_SIZE(tu_debug_options) - 1);
371   return tu_debug_options[id].string;
372}
373
374static const driOptionDescription tu_dri_options[] = {
375   DRI_CONF_SECTION_PERFORMANCE
376      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
377      DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
378      DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
379      DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
380   DRI_CONF_SECTION_END
381
382   DRI_CONF_SECTION_DEBUG
383      DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
384      DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
385   DRI_CONF_SECTION_END
386};
387
388static void
389tu_init_dri_options(struct tu_instance *instance)
390{
391   driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
392                      ARRAY_SIZE(tu_dri_options));
393   driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
394                       instance->vk.app_info.app_name, instance->vk.app_info.app_version,
395                       instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
396
397   if (driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load"))
398      instance->debug_flags |= TU_DEBUG_DONT_CARE_AS_LOAD;
399}
400
401VKAPI_ATTR VkResult VKAPI_CALL
402tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
403                  const VkAllocationCallbacks *pAllocator,
404                  VkInstance *pInstance)
405{
406   struct tu_instance *instance;
407   VkResult result;
408
409   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
410
411   if (pAllocator == NULL)
412      pAllocator = vk_default_allocator();
413
414   instance = vk_zalloc(pAllocator, sizeof(*instance), 8,
415                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
416
417   if (!instance)
418      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
419
420   struct vk_instance_dispatch_table dispatch_table;
421   vk_instance_dispatch_table_from_entrypoints(
422      &dispatch_table, &tu_instance_entrypoints, true);
423   vk_instance_dispatch_table_from_entrypoints(
424      &dispatch_table, &wsi_instance_entrypoints, false);
425
426   result = vk_instance_init(&instance->vk,
427                             &tu_instance_extensions_supported,
428                             &dispatch_table,
429                             pCreateInfo, pAllocator);
430   if (result != VK_SUCCESS) {
431      vk_free(pAllocator, instance);
432      return vk_error(NULL, result);
433   }
434
435   instance->physical_device_count = -1;
436
437   instance->debug_flags =
438      parse_debug_string(os_get_option("TU_DEBUG"), tu_debug_options);
439
440#ifdef DEBUG
441   /* Enable startup debugging by default on debug drivers.  You almost always
442    * want to see your startup failures in that case, and it's hard to set
443    * this env var on android.
444    */
445   instance->debug_flags |= TU_DEBUG_STARTUP;
446#endif
447
448   if (instance->debug_flags & TU_DEBUG_STARTUP)
449      mesa_logi("Created an instance");
450
451   VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
452
453   tu_init_dri_options(instance);
454
455   *pInstance = tu_instance_to_handle(instance);
456
457#ifdef HAVE_PERFETTO
458   tu_perfetto_init();
459#endif
460
461   return VK_SUCCESS;
462}
463
464VKAPI_ATTR void VKAPI_CALL
465tu_DestroyInstance(VkInstance _instance,
466                   const VkAllocationCallbacks *pAllocator)
467{
468   TU_FROM_HANDLE(tu_instance, instance, _instance);
469
470   if (!instance)
471      return;
472
473   for (int i = 0; i < instance->physical_device_count; ++i) {
474      tu_physical_device_finish(instance->physical_devices + i);
475   }
476
477   VG(VALGRIND_DESTROY_MEMPOOL(instance));
478
479   driDestroyOptionCache(&instance->dri_options);
480   driDestroyOptionInfo(&instance->available_dri_options);
481
482   vk_instance_finish(&instance->vk);
483   vk_free(&instance->vk.alloc, instance);
484}
485
486VKAPI_ATTR VkResult VKAPI_CALL
487tu_EnumeratePhysicalDevices(VkInstance _instance,
488                            uint32_t *pPhysicalDeviceCount,
489                            VkPhysicalDevice *pPhysicalDevices)
490{
491   TU_FROM_HANDLE(tu_instance, instance, _instance);
492   VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out,
493                          pPhysicalDevices, pPhysicalDeviceCount);
494
495   VkResult result;
496
497   if (instance->physical_device_count < 0) {
498      result = tu_enumerate_devices(instance);
499      if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
500         return result;
501   }
502
503   for (uint32_t i = 0; i < instance->physical_device_count; ++i) {
504      vk_outarray_append_typed(VkPhysicalDevice, &out, p)
505      {
506         *p = tu_physical_device_to_handle(instance->physical_devices + i);
507      }
508   }
509
510   return vk_outarray_status(&out);
511}
512
513VKAPI_ATTR VkResult VKAPI_CALL
514tu_EnumeratePhysicalDeviceGroups(
515   VkInstance _instance,
516   uint32_t *pPhysicalDeviceGroupCount,
517   VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
518{
519   TU_FROM_HANDLE(tu_instance, instance, _instance);
520   VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
521                          pPhysicalDeviceGroupProperties,
522                          pPhysicalDeviceGroupCount);
523   VkResult result;
524
525   if (instance->physical_device_count < 0) {
526      result = tu_enumerate_devices(instance);
527      if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
528         return result;
529   }
530
531   for (uint32_t i = 0; i < instance->physical_device_count; ++i) {
532      vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
533      {
534         p->physicalDeviceCount = 1;
535         p->physicalDevices[0] =
536            tu_physical_device_to_handle(instance->physical_devices + i);
537         p->subsetAllocation = false;
538      }
539   }
540
541   return vk_outarray_status(&out);
542}
543
544static void
545tu_get_physical_device_features_1_1(struct tu_physical_device *pdevice,
546                                    VkPhysicalDeviceVulkan11Features *features)
547{
548   features->storageBuffer16BitAccess            = pdevice->info->a6xx.storage_16bit;
549   features->uniformAndStorageBuffer16BitAccess  = false;
550   features->storagePushConstant16               = false;
551   features->storageInputOutput16                = false;
552   features->multiview                           = true;
553   features->multiviewGeometryShader             = false;
554   features->multiviewTessellationShader         = false;
555   features->variablePointersStorageBuffer       = true;
556   features->variablePointers                    = true;
557   features->protectedMemory                     = false;
558   features->samplerYcbcrConversion              = true;
559   features->shaderDrawParameters                = true;
560}
561
562static void
563tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice,
564                                    VkPhysicalDeviceVulkan12Features *features)
565{
566   features->samplerMirrorClampToEdge            = true;
567   features->drawIndirectCount                   = true;
568   features->storageBuffer8BitAccess             = false;
569   features->uniformAndStorageBuffer8BitAccess   = false;
570   features->storagePushConstant8                = false;
571   features->shaderBufferInt64Atomics            = false;
572   features->shaderSharedInt64Atomics            = false;
573   features->shaderFloat16                       = true;
574   features->shaderInt8                          = false;
575
576   features->descriptorIndexing                                 = true;
577   features->shaderInputAttachmentArrayDynamicIndexing          = false;
578   features->shaderUniformTexelBufferArrayDynamicIndexing       = true;
579   features->shaderStorageTexelBufferArrayDynamicIndexing       = true;
580   features->shaderUniformBufferArrayNonUniformIndexing         = true;
581   features->shaderSampledImageArrayNonUniformIndexing          = true;
582   features->shaderStorageBufferArrayNonUniformIndexing         = true;
583   features->shaderStorageImageArrayNonUniformIndexing          = true;
584   features->shaderInputAttachmentArrayNonUniformIndexing       = false;
585   features->shaderUniformTexelBufferArrayNonUniformIndexing    = true;
586   features->shaderStorageTexelBufferArrayNonUniformIndexing    = true;
587   features->descriptorBindingUniformBufferUpdateAfterBind      = true;
588   features->descriptorBindingSampledImageUpdateAfterBind       = true;
589   features->descriptorBindingStorageImageUpdateAfterBind       = true;
590   features->descriptorBindingStorageBufferUpdateAfterBind      = true;
591   features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
592   features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
593   features->descriptorBindingUpdateUnusedWhilePending          = true;
594   features->descriptorBindingPartiallyBound                    = true;
595   features->descriptorBindingVariableDescriptorCount           = true;
596   features->runtimeDescriptorArray                             = true;
597
598   features->samplerFilterMinmax                 = true;
599   features->scalarBlockLayout                   = true;
600   features->imagelessFramebuffer                = true;
601   features->uniformBufferStandardLayout         = true;
602   features->shaderSubgroupExtendedTypes         = true;
603   features->separateDepthStencilLayouts         = true;
604   features->hostQueryReset                      = true;
605   features->timelineSemaphore                   = true;
606   features->bufferDeviceAddress                 = true;
607   features->bufferDeviceAddressCaptureReplay    = false;
608   features->bufferDeviceAddressMultiDevice      = false;
609   features->vulkanMemoryModel                   = true;
610   features->vulkanMemoryModelDeviceScope        = true;
611   features->vulkanMemoryModelAvailabilityVisibilityChains = true;
612   features->shaderOutputViewportIndex           = true;
613   features->shaderOutputLayer                   = true;
614   features->subgroupBroadcastDynamicId          = true;
615}
616
617static void
618tu_get_physical_device_features_1_3(struct tu_physical_device *pdevice,
619                                    VkPhysicalDeviceVulkan13Features *features)
620{
621   features->robustImageAccess                   = true;
622   features->inlineUniformBlock                  = false;
623   features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
624   features->pipelineCreationCacheControl        = true;
625   features->privateData                         = true;
626   features->shaderDemoteToHelperInvocation      = true;
627   features->shaderTerminateInvocation           = true;
628   features->subgroupSizeControl                 = true;
629   features->computeFullSubgroups                = true;
630   features->synchronization2                    = true;
631   features->textureCompressionASTC_HDR          = false;
632   features->shaderZeroInitializeWorkgroupMemory = true;
633   features->dynamicRendering                    = true;
634   features->shaderIntegerDotProduct             = true;
635   features->maintenance4                        = true;
636}
637
638void
639tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
640                              VkPhysicalDeviceFeatures2 *pFeatures)
641{
642   TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
643
644   pFeatures->features = (VkPhysicalDeviceFeatures) {
645      .robustBufferAccess = true,
646      .fullDrawIndexUint32 = true,
647      .imageCubeArray = true,
648      .independentBlend = true,
649      .geometryShader = true,
650      .tessellationShader = true,
651      .sampleRateShading = true,
652      .dualSrcBlend = true,
653      .logicOp = true,
654      .multiDrawIndirect = true,
655      .drawIndirectFirstInstance = true,
656      .depthClamp = true,
657      .depthBiasClamp = true,
658      .fillModeNonSolid = true,
659      .depthBounds = true,
660      .wideLines = false,
661      .largePoints = true,
662      .alphaToOne = true,
663      .multiViewport = true,
664      .samplerAnisotropy = true,
665      .textureCompressionETC2 = true,
666      .textureCompressionASTC_LDR = true,
667      .textureCompressionBC = true,
668      .occlusionQueryPrecise = true,
669      .pipelineStatisticsQuery = true,
670      .vertexPipelineStoresAndAtomics = true,
671      .fragmentStoresAndAtomics = true,
672      .shaderTessellationAndGeometryPointSize = true,
673      .shaderImageGatherExtended = true,
674      .shaderStorageImageExtendedFormats = true,
675      .shaderStorageImageMultisample = false,
676      .shaderUniformBufferArrayDynamicIndexing = true,
677      .shaderSampledImageArrayDynamicIndexing = true,
678      .shaderStorageBufferArrayDynamicIndexing = true,
679      .shaderStorageImageArrayDynamicIndexing = true,
680      .shaderStorageImageReadWithoutFormat = true,
681      .shaderStorageImageWriteWithoutFormat = true,
682      .shaderClipDistance = true,
683      .shaderCullDistance = true,
684      .shaderFloat64 = false,
685      .shaderInt64 = false,
686      .shaderInt16 = true,
687      .sparseBinding = false,
688      .variableMultisampleRate = true,
689      .inheritedQueries = true,
690   };
691
692   VkPhysicalDeviceVulkan11Features core_1_1 = {
693      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
694   };
695   tu_get_physical_device_features_1_1(pdevice, &core_1_1);
696
697   VkPhysicalDeviceVulkan12Features core_1_2 = {
698      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
699   };
700   tu_get_physical_device_features_1_2(pdevice, &core_1_2);
701
702   VkPhysicalDeviceVulkan13Features core_1_3 = {
703      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
704   };
705   tu_get_physical_device_features_1_3(pdevice, &core_1_3);
706
707   vk_foreach_struct(ext, pFeatures->pNext)
708   {
709      if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
710         continue;
711      if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
712         continue;
713      if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
714         continue;
715
716      switch (ext->sType) {
717      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
718         VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
719            (VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext;
720         features->conditionalRendering = true;
721         features->inheritedConditionalRendering = true;
722         break;
723      }
724      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
725         VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
726            (VkPhysicalDeviceTransformFeedbackFeaturesEXT *) ext;
727         features->transformFeedback = true;
728         features->geometryStreams = true;
729         break;
730      }
731      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
732         VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
733            (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
734         features->indexTypeUint8 = true;
735         break;
736      }
737      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
738         VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
739            (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
740         features->vertexAttributeInstanceRateDivisor = true;
741         features->vertexAttributeInstanceRateZeroDivisor = true;
742         break;
743      }
744      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
745         VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
746            (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
747         features->depthClipEnable = true;
748         break;
749      }
750      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
751         VkPhysicalDevice4444FormatsFeaturesEXT *features = (void *)ext;
752         features->formatA4R4G4B4 = true;
753         features->formatA4B4G4R4 = true;
754         break;
755      }
756      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: {
757         VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features = (void *)ext;
758         features->borderColorSwizzle = true;
759         features->borderColorSwizzleFromImage = true;
760         break;
761      }
762      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
763         VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = (void *) ext;
764         features->customBorderColors = true;
765         features->customBorderColorWithoutFormat = true;
766         break;
767      }
768      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
769         VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = (void *)ext;
770         features->extendedDynamicState = true;
771         break;
772      }
773      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
774         VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
775            (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
776         features->extendedDynamicState2 = true;
777         features->extendedDynamicState2LogicOp = true;
778         features->extendedDynamicState2PatchControlPoints = false;
779         break;
780      }
781      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
782         VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
783            (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
784         feature->performanceCounterQueryPools = true;
785         feature->performanceCounterMultipleQueryPools = false;
786         break;
787      }
788      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
789         VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
790            (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
791         features->pipelineExecutableInfo = true;
792         break;
793      }
794      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
795         VkPhysicalDeviceShaderFloat16Int8Features *features =
796            (VkPhysicalDeviceShaderFloat16Int8Features *) ext;
797         features->shaderFloat16 = true;
798         features->shaderInt8 = false;
799         break;
800      }
801      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
802         VkPhysicalDeviceScalarBlockLayoutFeatures *features = (void *)ext;
803         features->scalarBlockLayout = true;
804         break;
805      }
806      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
807         VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
808         features->robustBufferAccess2 = true;
809         features->robustImageAccess2 = true;
810         features->nullDescriptor = true;
811         break;
812      }
813      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
814         VkPhysicalDeviceTimelineSemaphoreFeatures *features =
815            (VkPhysicalDeviceTimelineSemaphoreFeatures *) ext;
816         features->timelineSemaphore = true;
817         break;
818      }
819      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
820         VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
821            (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
822         features->provokingVertexLast = true;
823         features->transformFeedbackPreservesProvokingVertex = true;
824         break;
825      }
826      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
827         VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
828            (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
829         features->mutableDescriptorType = true;
830         break;
831      }
832      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
833         VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
834            (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
835         features->rectangularLines = true;
836         features->bresenhamLines = true;
837         features->smoothLines = false;
838         features->stippledRectangularLines = false;
839         features->stippledBresenhamLines = false;
840         features->stippledSmoothLines = false;
841         break;
842      }
843      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
844         VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
845            (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
846         features->primitiveTopologyListRestart = true;
847         features->primitiveTopologyPatchListRestart = false;
848         break;
849      }
850      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM: {
851         VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *features =
852            (VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *)ext;
853         features->rasterizationOrderColorAttachmentAccess = true;
854         features->rasterizationOrderDepthAttachmentAccess = true;
855         features->rasterizationOrderStencilAttachmentAccess = true;
856         break;
857      }
858      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
859         VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
860            (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
861         features->depthClipControl = true;
862         break;
863      }
864      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
865         VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
866            (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
867         features->texelBufferAlignment = true;
868         break;
869      }
870      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
871         VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
872            (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
873         features->primitivesGeneratedQuery = true;
874         features->primitivesGeneratedQueryWithRasterizerDiscard = false;
875         features->primitivesGeneratedQueryWithNonZeroStreams = false;
876         break;
877      }
878      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: {
879         VkPhysicalDeviceImageViewMinLodFeaturesEXT *features =
880            (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext;
881         features->minLod = true;
882         break;
883      }
884      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
885         VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
886            (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
887         features->image2DViewOf3D = true;
888         features->sampler2DViewOf3D = true;
889         break;
890      }
891      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
892         VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
893            (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
894         features->colorWriteEnable = true;
895         break;
896      }
897      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: {
898         VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features =
899            (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext;
900         features->shaderModuleIdentifier = true;
901         break;
902      }
903
904      default:
905         break;
906      }
907   }
908}
909
910
911static void
912tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
913                                       VkPhysicalDeviceVulkan11Properties *p)
914{
915   assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
916
917   memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
918   memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
919   memset(p->deviceLUID, 0, VK_LUID_SIZE);
920   p->deviceNodeMask = 0;
921   p->deviceLUIDValid = false;
922
923   p->subgroupSize = 128;
924   p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
925   p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
926                                    VK_SUBGROUP_FEATURE_VOTE_BIT |
927                                    VK_SUBGROUP_FEATURE_BALLOT_BIT |
928                                    VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
929                                    VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
930                                    VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
931   if (pdevice->info->a6xx.has_getfiberid) {
932      p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
933      p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
934   }
935
936   p->subgroupQuadOperationsInAllStages = false;
937
938   p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
939   p->maxMultiviewViewCount = MAX_VIEWS;
940   p->maxMultiviewInstanceIndex = INT_MAX;
941   p->protectedNoFault = false;
942   /* Make sure everything is addressable by a signed 32-bit int, and
943    * our largest descriptors are 96 bytes.
944    */
945   p->maxPerSetDescriptors = (1ull << 31) / 96;
946   /* Our buffer size fields allow only this much */
947   p->maxMemoryAllocationSize = 0xFFFFFFFFull;
948
949}
950
951
952/* I have no idea what the maximum size is, but the hardware supports very
953 * large numbers of descriptors (at least 2^16). This limit is based on
954 * CP_LOAD_STATE6, which has a 28-bit field for the DWORD offset, so that
955 * we don't have to think about what to do if that overflows, but really
956 * nothing is likely to get close to this.
957 */
958static const size_t max_descriptor_set_size = (1 << 28) / A6XX_TEX_CONST_DWORDS;
959static const VkSampleCountFlags sample_counts =
960   VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
961
962static void
963tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
964                                       VkPhysicalDeviceVulkan12Properties *p)
965{
966   assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
967
968   p->driverID = VK_DRIVER_ID_MESA_TURNIP;
969   memset(p->driverName, 0, sizeof(p->driverName));
970   snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
971            "turnip Mesa driver");
972   memset(p->driverInfo, 0, sizeof(p->driverInfo));
973   snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
974            "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
975   p->conformanceVersion = (VkConformanceVersion) {
976      .major = 1,
977      .minor = 2,
978      .subminor = 7,
979      .patch = 1,
980   };
981
982   p->denormBehaviorIndependence =
983      VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
984   p->roundingModeIndependence =
985      VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
986
987   p->shaderDenormFlushToZeroFloat16         = true;
988   p->shaderDenormPreserveFloat16            = false;
989   p->shaderRoundingModeRTEFloat16           = true;
990   p->shaderRoundingModeRTZFloat16           = false;
991   p->shaderSignedZeroInfNanPreserveFloat16  = true;
992
993   p->shaderDenormFlushToZeroFloat32         = true;
994   p->shaderDenormPreserveFloat32            = false;
995   p->shaderRoundingModeRTEFloat32           = true;
996   p->shaderRoundingModeRTZFloat32           = false;
997   p->shaderSignedZeroInfNanPreserveFloat32  = true;
998
999   p->shaderDenormFlushToZeroFloat64         = false;
1000   p->shaderDenormPreserveFloat64            = false;
1001   p->shaderRoundingModeRTEFloat64           = false;
1002   p->shaderRoundingModeRTZFloat64           = false;
1003   p->shaderSignedZeroInfNanPreserveFloat64  = false;
1004
1005   p->shaderUniformBufferArrayNonUniformIndexingNative   = true;
1006   p->shaderSampledImageArrayNonUniformIndexingNative    = true;
1007   p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
1008   p->shaderStorageImageArrayNonUniformIndexingNative    = true;
1009   p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1010   p->robustBufferAccessUpdateAfterBind                  = false;
1011   p->quadDivergentImplicitLod                           = false;
1012
1013   p->maxUpdateAfterBindDescriptorsInAllPools            = max_descriptor_set_size;
1014   p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_descriptor_set_size;
1015   p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1016   p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1017   p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_descriptor_set_size;
1018   p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_descriptor_set_size;
1019   p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
1020   p->maxPerStageUpdateAfterBindResources                = max_descriptor_set_size;
1021   p->maxDescriptorSetUpdateAfterBindSamplers            = max_descriptor_set_size;
1022   p->maxDescriptorSetUpdateAfterBindUniformBuffers      = max_descriptor_set_size;
1023   p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1024   p->maxDescriptorSetUpdateAfterBindStorageBuffers      = max_descriptor_set_size;
1025   p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1026   p->maxDescriptorSetUpdateAfterBindSampledImages       = max_descriptor_set_size;
1027   p->maxDescriptorSetUpdateAfterBindStorageImages       = max_descriptor_set_size;
1028   p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_RTS;
1029
1030   p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
1031   p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
1032   p->independentResolveNone  = false;
1033   p->independentResolve      = false;
1034
1035   p->filterMinmaxSingleComponentFormats  = true;
1036   p->filterMinmaxImageComponentMapping   = true;
1037
1038   p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1039
1040   p->framebufferIntegerColorSampleCounts = sample_counts;
1041}
1042
1043static void
1044tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
1045                                       VkPhysicalDeviceVulkan13Properties *p)
1046{
1047   /* TODO move threadsize_base and max_waves to fd_dev_info and use them here */
1048   p->minSubgroupSize = 64; /* threadsize_base */
1049   p->maxSubgroupSize = 128; /* threadsize_base * 2 */
1050   p->maxComputeWorkgroupSubgroups = 16; /* max_waves */
1051   p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
1052
1053   /* VK_EXT_inline_uniform_block is not implemented */
1054   p->maxInlineUniformBlockSize = 0;
1055   p->maxPerStageDescriptorInlineUniformBlocks = 0;
1056   p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 0;
1057   p->maxDescriptorSetInlineUniformBlocks = 0;
1058   p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 0;
1059   p->maxInlineUniformTotalSize = 0;
1060
1061   p->integerDotProduct8BitUnsignedAccelerated = false;
1062   p->integerDotProduct8BitSignedAccelerated = false;
1063   p->integerDotProduct8BitMixedSignednessAccelerated = false;
1064   p->integerDotProduct4x8BitPackedUnsignedAccelerated =
1065      pdevice->info->a6xx.has_dp2acc;
1066   /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
1067   p->integerDotProduct4x8BitPackedSignedAccelerated = false;
1068   p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
1069      pdevice->info->a6xx.has_dp2acc;
1070   p->integerDotProduct16BitUnsignedAccelerated = false;
1071   p->integerDotProduct16BitSignedAccelerated = false;
1072   p->integerDotProduct16BitMixedSignednessAccelerated = false;
1073   p->integerDotProduct32BitUnsignedAccelerated = false;
1074   p->integerDotProduct32BitSignedAccelerated = false;
1075   p->integerDotProduct32BitMixedSignednessAccelerated = false;
1076   p->integerDotProduct64BitUnsignedAccelerated = false;
1077   p->integerDotProduct64BitSignedAccelerated = false;
1078   p->integerDotProduct64BitMixedSignednessAccelerated = false;
1079   p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1080   p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1081   p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1082   p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
1083      pdevice->info->a6xx.has_dp2acc;
1084   /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
1085   p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
1086   p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
1087      pdevice->info->a6xx.has_dp2acc;
1088   p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1089   p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1090   p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1091   p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1092   p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1093   p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1094   p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1095   p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1096   p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1097
1098   p->storageTexelBufferOffsetAlignmentBytes = 64;
1099   p->storageTexelBufferOffsetSingleTexelAlignment = false;
1100   p->uniformTexelBufferOffsetAlignmentBytes = 64;
1101   p->uniformTexelBufferOffsetSingleTexelAlignment = false;
1102
1103   /* The address space is 4GB for current kernels, so there's no point
1104    * allowing a larger buffer. Our buffer sizes are 64-bit though, so
1105    * GetBufferDeviceRequirements won't fall over if someone actually creates
1106    * a 4GB buffer.
1107    */
1108   p->maxBufferSize = 1ull << 32;
1109}
1110
1111VKAPI_ATTR void VKAPI_CALL
1112tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1113                                VkPhysicalDeviceProperties2 *pProperties)
1114{
1115   TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1116
1117   VkPhysicalDeviceLimits limits = {
1118      .maxImageDimension1D = (1 << 14),
1119      .maxImageDimension2D = (1 << 14),
1120      .maxImageDimension3D = (1 << 11),
1121      .maxImageDimensionCube = (1 << 14),
1122      .maxImageArrayLayers = (1 << 11),
1123      .maxTexelBufferElements = 128 * 1024 * 1024,
1124      .maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE,
1125      .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE,
1126      .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1127      .maxMemoryAllocationCount = UINT32_MAX,
1128      .maxSamplerAllocationCount = 64 * 1024,
1129      .bufferImageGranularity = 64,          /* A cache line */
1130      .sparseAddressSpaceSize = 0,
1131      .maxBoundDescriptorSets = MAX_SETS,
1132      .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1133      .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1134      .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1135      .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1136      .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1137      .maxPerStageDescriptorInputAttachments = MAX_RTS,
1138      .maxPerStageResources = max_descriptor_set_size,
1139      .maxDescriptorSetSamplers = max_descriptor_set_size,
1140      .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1141      .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1142      .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1143      .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1144      .maxDescriptorSetSampledImages = max_descriptor_set_size,
1145      .maxDescriptorSetStorageImages = max_descriptor_set_size,
1146      .maxDescriptorSetInputAttachments = MAX_RTS,
1147      .maxVertexInputAttributes = 32,
1148      .maxVertexInputBindings = 32,
1149      .maxVertexInputAttributeOffset = 4095,
1150      .maxVertexInputBindingStride = 2048,
1151      .maxVertexOutputComponents = 128,
1152      .maxTessellationGenerationLevel = 64,
1153      .maxTessellationPatchSize = 32,
1154      .maxTessellationControlPerVertexInputComponents = 128,
1155      .maxTessellationControlPerVertexOutputComponents = 128,
1156      .maxTessellationControlPerPatchOutputComponents = 120,
1157      .maxTessellationControlTotalOutputComponents = 4096,
1158      .maxTessellationEvaluationInputComponents = 128,
1159      .maxTessellationEvaluationOutputComponents = 128,
1160      .maxGeometryShaderInvocations = 32,
1161      .maxGeometryInputComponents = 64,
1162      .maxGeometryOutputComponents = 128,
1163      .maxGeometryOutputVertices = 256,
1164      .maxGeometryTotalOutputComponents = 1024,
1165      .maxFragmentInputComponents = 124,
1166      .maxFragmentOutputAttachments = 8,
1167      .maxFragmentDualSrcAttachments = 1,
1168      .maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2,
1169      .maxComputeSharedMemorySize = 32768,
1170      .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1171      .maxComputeWorkGroupInvocations = 2048,
1172      .maxComputeWorkGroupSize = { 1024, 1024, 1024 },
1173      .subPixelPrecisionBits = 8,
1174      .subTexelPrecisionBits = 8,
1175      .mipmapPrecisionBits = 8,
1176      .maxDrawIndexedIndexValue = UINT32_MAX,
1177      .maxDrawIndirectCount = UINT32_MAX,
1178      .maxSamplerLodBias = 4095.0 / 256.0, /* [-16, 15.99609375] */
1179      .maxSamplerAnisotropy = 16,
1180      .maxViewports = MAX_VIEWPORTS,
1181      .maxViewportDimensions = { MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE },
1182      .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1183      .viewportSubPixelBits = 8,
1184      .minMemoryMapAlignment = 4096, /* A page */
1185      .minTexelBufferOffsetAlignment = 64,
1186      .minUniformBufferOffsetAlignment = 64,
1187      .minStorageBufferOffsetAlignment = 64,
1188      .minTexelOffset = -16,
1189      .maxTexelOffset = 15,
1190      .minTexelGatherOffset = -32,
1191      .maxTexelGatherOffset = 31,
1192      .minInterpolationOffset = -0.5,
1193      .maxInterpolationOffset = 0.4375,
1194      .subPixelInterpolationOffsetBits = 4,
1195      .maxFramebufferWidth = (1 << 14),
1196      .maxFramebufferHeight = (1 << 14),
1197      .maxFramebufferLayers = (1 << 10),
1198      .framebufferColorSampleCounts = sample_counts,
1199      .framebufferDepthSampleCounts = sample_counts,
1200      .framebufferStencilSampleCounts = sample_counts,
1201      .framebufferNoAttachmentsSampleCounts = sample_counts,
1202      .maxColorAttachments = MAX_RTS,
1203      .sampledImageColorSampleCounts = sample_counts,
1204      .sampledImageIntegerSampleCounts = sample_counts,
1205      .sampledImageDepthSampleCounts = sample_counts,
1206      .sampledImageStencilSampleCounts = sample_counts,
1207      .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1208      .maxSampleMaskWords = 1,
1209      .timestampComputeAndGraphics = true,
1210      .timestampPeriod = 1000000000.0 / 19200000.0, /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
1211      .maxClipDistances = 8,
1212      .maxCullDistances = 8,
1213      .maxCombinedClipAndCullDistances = 8,
1214      .discreteQueuePriorities = 2,
1215      .pointSizeRange = { 1, 4092 },
1216      .lineWidthRange = { 1.0, 1.0 },
1217      .pointSizeGranularity = 	0.0625,
1218      .lineWidthGranularity = 0.0,
1219      .strictLines = true,
1220      .standardSampleLocations = true,
1221      .optimalBufferCopyOffsetAlignment = 128,
1222      .optimalBufferCopyRowPitchAlignment = 128,
1223      .nonCoherentAtomSize = 64,
1224   };
1225
1226   pProperties->properties = (VkPhysicalDeviceProperties) {
1227      .apiVersion = TU_API_VERSION,
1228      .driverVersion = vk_get_driver_version(),
1229      .vendorID = 0x5143,
1230      .deviceID = pdevice->dev_id.chip_id,
1231      .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1232      .limits = limits,
1233      .sparseProperties = { 0 },
1234   };
1235
1236   strcpy(pProperties->properties.deviceName, pdevice->name);
1237   memcpy(pProperties->properties.pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1238
1239   VkPhysicalDeviceVulkan11Properties core_1_1 = {
1240      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1241   };
1242   tu_get_physical_device_properties_1_1(pdevice, &core_1_1);
1243
1244   VkPhysicalDeviceVulkan12Properties core_1_2 = {
1245      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1246   };
1247   tu_get_physical_device_properties_1_2(pdevice, &core_1_2);
1248
1249   VkPhysicalDeviceVulkan13Properties core_1_3 = {
1250      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
1251   };
1252   tu_get_physical_device_properties_1_3(pdevice, &core_1_3);
1253
1254   vk_foreach_struct(ext, pProperties->pNext)
1255   {
1256      if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
1257         continue;
1258      if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
1259         continue;
1260      if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
1261         continue;
1262
1263      switch (ext->sType) {
1264      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1265         VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1266            (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1267         properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1268         break;
1269      }
1270      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1271         VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1272            (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1273
1274         properties->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1275         properties->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1276         properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1277         properties->maxTransformFeedbackStreamDataSize = 512;
1278         properties->maxTransformFeedbackBufferDataSize = 512;
1279         properties->maxTransformFeedbackBufferDataStride = 512;
1280         properties->transformFeedbackQueries = true;
1281         properties->transformFeedbackStreamsLinesTriangles = true;
1282         properties->transformFeedbackRasterizationStreamSelect = true;
1283         properties->transformFeedbackDraw = true;
1284         break;
1285      }
1286      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1287         VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
1288            (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1289         properties->sampleLocationSampleCounts = 0;
1290         if (pdevice->vk.supported_extensions.EXT_sample_locations) {
1291            properties->sampleLocationSampleCounts =
1292               VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
1293         }
1294         properties->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1295         properties->sampleLocationCoordinateRange[0] = 0.0f;
1296         properties->sampleLocationCoordinateRange[1] = 0.9375f;
1297         properties->sampleLocationSubPixelBits = 4;
1298         properties->variableSampleLocations = true;
1299         break;
1300      }
1301      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1302         VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
1303            (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1304         props->maxVertexAttribDivisor = UINT32_MAX;
1305         break;
1306      }
1307      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
1308         VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = (void *)ext;
1309         props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1310         break;
1311      }
1312      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
1313         VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
1314            (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
1315         properties->allowCommandBufferQueryCopies = false;
1316         break;
1317      }
1318      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
1319         VkPhysicalDeviceRobustness2PropertiesEXT *props = (void *)ext;
1320         /* see write_buffer_descriptor() */
1321         props->robustStorageBufferAccessSizeAlignment = 4;
1322         /* see write_ubo_descriptor() */
1323         props->robustUniformBufferAccessSizeAlignment = 16;
1324         break;
1325      }
1326
1327      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
1328         VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
1329            (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
1330         properties->provokingVertexModePerPipeline = true;
1331         properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1332         break;
1333      }
1334      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
1335         VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
1336            (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
1337         props->lineSubPixelPrecisionBits = 8;
1338         break;
1339      }
1340      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
1341         VkPhysicalDeviceDrmPropertiesEXT *props =
1342            (VkPhysicalDeviceDrmPropertiesEXT *)ext;
1343         props->hasPrimary = pdevice->has_master;
1344         props->primaryMajor = pdevice->master_major;
1345         props->primaryMinor = pdevice->master_minor;
1346
1347         props->hasRender = pdevice->has_local;
1348         props->renderMajor = pdevice->local_major;
1349         props->renderMinor = pdevice->local_minor;
1350         break;
1351      }
1352      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
1353         VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props =
1354            (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
1355         STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1356                       sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1357         memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1358                vk_shaderModuleIdentifierAlgorithmUUID,
1359                sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1360         break;
1361      }
1362      default:
1363         break;
1364      }
1365   }
1366}
1367
1368static const VkQueueFamilyProperties tu_queue_family_properties = {
1369   .queueFlags =
1370      VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1371   .queueCount = 1,
1372   .timestampValidBits = 48,
1373   .minImageTransferGranularity = { 1, 1, 1 },
1374};
1375
1376VKAPI_ATTR void VKAPI_CALL
1377tu_GetPhysicalDeviceQueueFamilyProperties2(
1378   VkPhysicalDevice physicalDevice,
1379   uint32_t *pQueueFamilyPropertyCount,
1380   VkQueueFamilyProperties2 *pQueueFamilyProperties)
1381{
1382   VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1383                          pQueueFamilyProperties, pQueueFamilyPropertyCount);
1384
1385   vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1386   {
1387      p->queueFamilyProperties = tu_queue_family_properties;
1388   }
1389}
1390
1391uint64_t
1392tu_get_system_heap_size()
1393{
1394   struct sysinfo info;
1395   sysinfo(&info);
1396
1397   uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit;
1398
1399   /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
1400    * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
1401    */
1402   uint64_t available_ram;
1403   if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1404      available_ram = total_ram / 2;
1405   else
1406      available_ram = total_ram * 3 / 4;
1407
1408   return available_ram;
1409}
1410
1411static VkDeviceSize
1412tu_get_budget_memory(struct tu_physical_device *physical_device)
1413{
1414   uint64_t heap_size = physical_device->heap.size;
1415   uint64_t heap_used = physical_device->heap.used;
1416   uint64_t sys_available;
1417   ASSERTED bool has_available_memory =
1418      os_get_available_system_memory(&sys_available);
1419   assert(has_available_memory);
1420
1421   /*
1422    * Let's not incite the app to starve the system: report at most 90% of
1423    * available system memory.
1424    */
1425   uint64_t heap_available = sys_available * 9 / 10;
1426   return MIN2(heap_size, heap_used + heap_available);
1427}
1428
1429VKAPI_ATTR void VKAPI_CALL
1430tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1431                                      VkPhysicalDeviceMemoryProperties2 *props2)
1432{
1433   TU_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1434
1435   VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1436   props->memoryHeapCount = 1;
1437   props->memoryHeaps[0].size = physical_device->heap.size;
1438   props->memoryHeaps[0].flags = physical_device->heap.flags;
1439
1440   props->memoryTypeCount = 1;
1441   props->memoryTypes[0].propertyFlags =
1442      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1443      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1444      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1445   props->memoryTypes[0].heapIndex = 0;
1446
1447   vk_foreach_struct(ext, props2->pNext)
1448   {
1449      switch (ext->sType) {
1450      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1451         VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1452            (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1453         memory_budget_props->heapUsage[0] = physical_device->heap.used;
1454         memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1455
1456         /* The heapBudget and heapUsage values must be zero for array elements
1457          * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1458          */
1459         for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1460            memory_budget_props->heapBudget[i] = 0u;
1461            memory_budget_props->heapUsage[i] = 0u;
1462         }
1463         break;
1464      }
1465      default:
1466         break;
1467      }
1468   }
1469}
1470
1471static VkResult
1472tu_queue_init(struct tu_device *device,
1473              struct tu_queue *queue,
1474              int idx,
1475              const VkDeviceQueueCreateInfo *create_info)
1476{
1477
1478   /* Match the default priority of fd_context_init.  We ignore
1479    * pQueuePriorities because the spec says
1480    *
1481    *   An implementation may allow a higher-priority queue to starve a
1482    *   lower-priority queue on the same VkDevice until the higher-priority
1483    *   queue has no further commands to execute. The relationship of queue
1484    *   priorities must not cause queues on one VkDevice to starve queues on
1485    *   another VkDevice.
1486    *
1487    * We cannot let one VkDevice starve another.
1488    */
1489   const int priority = 1;
1490
1491   VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
1492   if (result != VK_SUCCESS)
1493      return result;
1494
1495   queue->device = device;
1496#ifndef TU_USE_KGSL
1497   queue->vk.driver_submit = tu_queue_submit;
1498#endif
1499
1500   int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id);
1501   if (ret)
1502      return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1503                               "submitqueue create failed");
1504
1505   queue->fence = -1;
1506
1507   return VK_SUCCESS;
1508}
1509
1510static void
1511tu_queue_finish(struct tu_queue *queue)
1512{
1513   vk_queue_finish(&queue->vk);
1514   if (queue->fence >= 0)
1515      close(queue->fence);
1516   tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
1517}
1518
1519uint64_t
1520tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1521{
1522   /* This is based on the 19.2MHz always-on rbbm timer.
1523    *
1524    * TODO we should probably query this value from kernel..
1525    */
1526   return ts * (1000000000 / 19200000);
1527}
1528
1529static void*
1530tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
1531{
1532   struct tu_device *device =
1533      container_of(utctx, struct tu_device, trace_context);
1534
1535   struct tu_bo *bo;
1536   tu_bo_init_new(device, &bo, size, false);
1537
1538   return bo;
1539}
1540
1541static void
1542tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
1543{
1544   struct tu_device *device =
1545      container_of(utctx, struct tu_device, trace_context);
1546   struct tu_bo *bo = timestamps;
1547
1548   tu_bo_finish(device, bo);
1549}
1550
1551static void
1552tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1553                   unsigned idx, bool end_of_pipe)
1554{
1555   struct tu_bo *bo = timestamps;
1556   struct tu_cs *ts_cs = cs;
1557
1558   unsigned ts_offset = idx * sizeof(uint64_t);
1559   tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1560   tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
1561   tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
1562   tu_cs_emit(ts_cs, 0x00000000);
1563}
1564
1565static uint64_t
1566tu_trace_read_ts(struct u_trace_context *utctx,
1567                 void *timestamps, unsigned idx, void *flush_data)
1568{
1569   struct tu_device *device =
1570      container_of(utctx, struct tu_device, trace_context);
1571   struct tu_bo *bo = timestamps;
1572   struct tu_u_trace_submission_data *submission_data = flush_data;
1573
1574   /* Only need to stall on results for the first entry: */
1575   if (idx == 0) {
1576      tu_device_wait_u_trace(device, submission_data->syncobj);
1577   }
1578
1579   if (tu_bo_map(device, bo) != VK_SUCCESS) {
1580      return U_TRACE_NO_TIMESTAMP;
1581   }
1582
1583   uint64_t *ts = bo->map;
1584
1585   /* Don't translate the no-timestamp marker: */
1586   if (ts[idx] == U_TRACE_NO_TIMESTAMP)
1587      return U_TRACE_NO_TIMESTAMP;
1588
1589   return tu_device_ticks_to_ns(device, ts[idx]);
1590}
1591
1592static void
1593tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1594{
1595   struct tu_device *device =
1596      container_of(utctx, struct tu_device, trace_context);
1597   struct tu_u_trace_submission_data *submission_data = flush_data;
1598
1599   tu_u_trace_submission_data_finish(device, submission_data);
1600}
1601
1602void
1603tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
1604                         void *ts_from, uint32_t from_offset,
1605                         void *ts_to, uint32_t to_offset,
1606                         uint32_t count)
1607{
1608   struct tu_cs *cs = cmdstream;
1609   struct tu_bo *bo_from = ts_from;
1610   struct tu_bo *bo_to = ts_to;
1611
1612   tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1613   tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t));
1614   tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t));
1615   tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t));
1616}
1617
1618/* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
1619 * that ignore tracepoints at the beginning/end that are part of a
1620 * suspend/resume chain.
1621 */
1622static struct u_trace_iterator
1623tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
1624{
1625   switch (cmdbuf->state.suspend_resume) {
1626   case SR_IN_PRE_CHAIN:
1627      return cmdbuf->trace_renderpass_end;
1628   case SR_AFTER_PRE_CHAIN:
1629   case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1630      return cmdbuf->pre_chain.trace_renderpass_end;
1631   default:
1632      return u_trace_begin_iterator(&cmdbuf->trace);
1633   }
1634}
1635
1636static struct u_trace_iterator
1637tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
1638{
1639   switch (cmdbuf->state.suspend_resume) {
1640   case SR_IN_PRE_CHAIN:
1641      return cmdbuf->trace_renderpass_end;
1642   case SR_IN_CHAIN:
1643   case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1644      return cmdbuf->trace_renderpass_start;
1645   default:
1646      return u_trace_end_iterator(&cmdbuf->trace);
1647   }
1648}
1649VkResult
1650tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
1651                            struct u_trace **trace_copy)
1652{
1653   *cs = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct tu_cs), 8,
1654                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1655
1656   if (*cs == NULL) {
1657      return VK_ERROR_OUT_OF_HOST_MEMORY;
1658   }
1659
1660   tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
1661              list_length(&cmdbuf->trace.trace_chunks) * 6 + 3);
1662
1663   tu_cs_begin(*cs);
1664
1665   tu_cs_emit_wfi(*cs);
1666   tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
1667
1668   *trace_copy = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
1669                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1670
1671   if (*trace_copy == NULL) {
1672      return VK_ERROR_OUT_OF_HOST_MEMORY;
1673   }
1674
1675   u_trace_init(*trace_copy, cmdbuf->trace.utctx);
1676   u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
1677                        tu_cmd_end_iterator(cmdbuf),
1678                        *trace_copy, *cs,
1679                        tu_copy_timestamp_buffer);
1680
1681   tu_cs_emit_wfi(*cs);
1682
1683   tu_cs_end(*cs);
1684
1685   return VK_SUCCESS;
1686}
1687
1688VkResult
1689tu_u_trace_submission_data_create(
1690   struct tu_device *device,
1691   struct tu_cmd_buffer **cmd_buffers,
1692   uint32_t cmd_buffer_count,
1693   struct tu_u_trace_submission_data **submission_data)
1694{
1695   *submission_data =
1696      vk_zalloc(&device->vk.alloc,
1697                sizeof(struct tu_u_trace_submission_data), 8,
1698                VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1699
1700   if (!(*submission_data)) {
1701      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1702   }
1703
1704   struct tu_u_trace_submission_data *data = *submission_data;
1705
1706   data->cmd_trace_data =
1707      vk_zalloc(&device->vk.alloc,
1708                cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
1709                VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1710
1711   if (!data->cmd_trace_data) {
1712      goto fail;
1713   }
1714
1715   data->cmd_buffer_count = cmd_buffer_count;
1716   data->last_buffer_with_tracepoints = -1;
1717
1718   for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
1719      struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
1720
1721      if (!u_trace_has_points(&cmdbuf->trace))
1722         continue;
1723
1724      data->last_buffer_with_tracepoints = i;
1725
1726      if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
1727         /* A single command buffer could be submitted several times, but we
1728          * already baked timestamp iova addresses and trace points are
1729          * single-use. Therefor we have to copy trace points and create
1730          * a new timestamp buffer on every submit of reusable command buffer.
1731          */
1732         if (tu_create_copy_timestamp_cs(cmdbuf,
1733               &data->cmd_trace_data[i].timestamp_copy_cs,
1734               &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
1735            goto fail;
1736         }
1737
1738         assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
1739      } else {
1740         data->cmd_trace_data[i].trace = &cmdbuf->trace;
1741      }
1742   }
1743
1744   assert(data->last_buffer_with_tracepoints != -1);
1745
1746   return VK_SUCCESS;
1747
1748fail:
1749   tu_u_trace_submission_data_finish(device, data);
1750   *submission_data = NULL;
1751
1752   return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1753}
1754
1755void
1756tu_u_trace_submission_data_finish(
1757   struct tu_device *device,
1758   struct tu_u_trace_submission_data *submission_data)
1759{
1760   for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
1761      /* Only if we had to create a copy of trace we should free it */
1762      struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
1763      if (cmd_data->timestamp_copy_cs) {
1764         tu_cs_finish(cmd_data->timestamp_copy_cs);
1765         vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
1766
1767         u_trace_fini(cmd_data->trace);
1768         vk_free(&device->vk.alloc, cmd_data->trace);
1769      }
1770   }
1771
1772   vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
1773   vk_free(&device->vk.alloc, submission_data->syncobj);
1774   vk_free(&device->vk.alloc, submission_data);
1775}
1776
1777VKAPI_ATTR VkResult VKAPI_CALL
1778tu_CreateDevice(VkPhysicalDevice physicalDevice,
1779                const VkDeviceCreateInfo *pCreateInfo,
1780                const VkAllocationCallbacks *pAllocator,
1781                VkDevice *pDevice)
1782{
1783   TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
1784   VkResult result;
1785   struct tu_device *device;
1786   bool custom_border_colors = false;
1787   bool perf_query_pools = false;
1788   bool robust_buffer_access2 = false;
1789   bool border_color_without_format = false;
1790
1791   vk_foreach_struct_const(ext, pCreateInfo->pNext) {
1792      switch (ext->sType) {
1793      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1794         const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
1795         custom_border_colors = border_color_features->customBorderColors;
1796         border_color_without_format =
1797            border_color_features->customBorderColorWithoutFormat;
1798         break;
1799      }
1800      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1801         const VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1802            (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1803         perf_query_pools = feature->performanceCounterQueryPools;
1804         break;
1805      }
1806      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1807         VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1808         robust_buffer_access2 = features->robustBufferAccess2;
1809         break;
1810      }
1811      default:
1812         break;
1813      }
1814   }
1815
1816   device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
1817                       sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1818   if (!device)
1819      return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1820
1821   struct vk_device_dispatch_table dispatch_table;
1822   vk_device_dispatch_table_from_entrypoints(
1823      &dispatch_table, &tu_device_entrypoints, true);
1824   vk_device_dispatch_table_from_entrypoints(
1825      &dispatch_table, &wsi_device_entrypoints, false);
1826
1827   result = vk_device_init(&device->vk, &physical_device->vk,
1828                           &dispatch_table, pCreateInfo, pAllocator);
1829   if (result != VK_SUCCESS) {
1830      vk_free(&device->vk.alloc, device);
1831      return vk_startup_errorf(physical_device->instance, result,
1832                               "vk_device_init failed");
1833   }
1834
1835   device->instance = physical_device->instance;
1836   device->physical_device = physical_device;
1837   device->fd = physical_device->local_fd;
1838   device->vk.check_status = tu_device_check_status;
1839
1840   mtx_init(&device->bo_mutex, mtx_plain);
1841   mtx_init(&device->pipeline_mutex, mtx_plain);
1842   mtx_init(&device->autotune_mutex, mtx_plain);
1843   u_rwlock_init(&device->dma_bo_lock);
1844   pthread_mutex_init(&device->submit_mutex, NULL);
1845
1846#ifndef TU_USE_KGSL
1847   vk_device_set_drm_fd(&device->vk, device->fd);
1848#endif
1849
1850   for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1851      const VkDeviceQueueCreateInfo *queue_create =
1852         &pCreateInfo->pQueueCreateInfos[i];
1853      uint32_t qfi = queue_create->queueFamilyIndex;
1854      device->queues[qfi] = vk_alloc(
1855         &device->vk.alloc, queue_create->queueCount * sizeof(struct tu_queue),
1856         8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1857      if (!device->queues[qfi]) {
1858         result = vk_startup_errorf(physical_device->instance,
1859                                    VK_ERROR_OUT_OF_HOST_MEMORY,
1860                                    "OOM");
1861         goto fail_queues;
1862      }
1863
1864      memset(device->queues[qfi], 0,
1865             queue_create->queueCount * sizeof(struct tu_queue));
1866
1867      device->queue_count[qfi] = queue_create->queueCount;
1868
1869      for (unsigned q = 0; q < queue_create->queueCount; q++) {
1870         result = tu_queue_init(device, &device->queues[qfi][q], q,
1871                                queue_create);
1872         if (result != VK_SUCCESS)
1873            goto fail_queues;
1874      }
1875   }
1876
1877   device->compiler =
1878      ir3_compiler_create(NULL, &physical_device->dev_id,
1879                          &(struct ir3_compiler_options) {
1880                              .robust_buffer_access2 = robust_buffer_access2,
1881                              .push_ubo_with_preamble = true,
1882                              .disable_cache = true,
1883                           });
1884   if (!device->compiler) {
1885      result = vk_startup_errorf(physical_device->instance,
1886                                 VK_ERROR_INITIALIZATION_FAILED,
1887                                 "failed to initialize ir3 compiler");
1888      goto fail_queues;
1889   }
1890
1891   /* Initialize sparse array for refcounting imported BOs */
1892   util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
1893
1894   /* initial sizes, these will increase if there is overflow */
1895   device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
1896   device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
1897
1898   uint32_t global_size = sizeof(struct tu6_global);
1899   if (custom_border_colors)
1900      global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
1901
1902   tu_bo_suballocator_init(&device->pipeline_suballoc, device,
1903                           128 * 1024, TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP);
1904   tu_bo_suballocator_init(&device->autotune_suballoc, device,
1905                           128 * 1024, 0);
1906
1907   result = tu_bo_init_new(device, &device->global_bo, global_size,
1908                           TU_BO_ALLOC_ALLOW_DUMP);
1909   if (result != VK_SUCCESS) {
1910      vk_startup_errorf(device->instance, result, "BO init");
1911      goto fail_global_bo;
1912   }
1913
1914   result = tu_bo_map(device, device->global_bo);
1915   if (result != VK_SUCCESS) {
1916      vk_startup_errorf(device->instance, result, "BO map");
1917      goto fail_global_bo_map;
1918   }
1919
1920   struct tu6_global *global = device->global_bo->map;
1921   tu_init_clear_blit_shaders(device);
1922   global->predicate = 0;
1923   global->vtx_stats_query_not_running = 1;
1924   global->dbg_one = (uint32_t)-1;
1925   global->dbg_gmem_total_loads = 0;
1926   global->dbg_gmem_taken_loads = 0;
1927   global->dbg_gmem_total_stores = 0;
1928   global->dbg_gmem_taken_stores = 0;
1929   for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
1930      VkClearColorValue border_color = vk_border_color_value(i);
1931      tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
1932                            vk_border_color_is_int(i));
1933   }
1934
1935   /* initialize to ones so ffs can be used to find unused slots */
1936   BITSET_ONES(device->custom_border_color);
1937
1938   result = tu_init_dynamic_rendering(device);
1939   if (result != VK_SUCCESS) {
1940      vk_startup_errorf(device->instance, result, "dynamic rendering");
1941      goto fail_dynamic_rendering;
1942   }
1943
1944   struct vk_pipeline_cache_create_info pcc_info = { };
1945   device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
1946                                                false);
1947   if (!device->mem_cache) {
1948      result = VK_ERROR_OUT_OF_HOST_MEMORY;
1949      vk_startup_errorf(device->instance, result, "create pipeline cache failed");
1950      goto fail_pipeline_cache;
1951   }
1952
1953   if (perf_query_pools) {
1954      /* Prepare command streams setting pass index to the PERF_CNTRS_REG
1955       * from 0 to 31. One of these will be picked up at cmd submit time
1956       * when the perf query is executed.
1957       */
1958      struct tu_cs *cs;
1959
1960      if (!(device->perfcntrs_pass_cs = calloc(1, sizeof(struct tu_cs)))) {
1961         result = vk_startup_errorf(device->instance,
1962               VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1963         goto fail_perfcntrs_pass_alloc;
1964      }
1965
1966      device->perfcntrs_pass_cs_entries = calloc(32, sizeof(struct tu_cs_entry));
1967      if (!device->perfcntrs_pass_cs_entries) {
1968         result = vk_startup_errorf(device->instance,
1969               VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1970         goto fail_perfcntrs_pass_entries_alloc;
1971      }
1972
1973      cs = device->perfcntrs_pass_cs;
1974      tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96);
1975
1976      for (unsigned i = 0; i < 32; i++) {
1977         struct tu_cs sub_cs;
1978
1979         result = tu_cs_begin_sub_stream(cs, 3, &sub_cs);
1980         if (result != VK_SUCCESS) {
1981            vk_startup_errorf(device->instance, result,
1982                  "failed to allocate commands streams");
1983            goto fail_prepare_perfcntrs_pass_cs;
1984         }
1985
1986         tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
1987         tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
1988
1989         device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs);
1990      }
1991   }
1992
1993   /* Initialize a condition variable for timeline semaphore */
1994   pthread_condattr_t condattr;
1995   if (pthread_condattr_init(&condattr) != 0) {
1996      result = vk_startup_errorf(physical_device->instance,
1997                                 VK_ERROR_INITIALIZATION_FAILED,
1998                                 "pthread condattr init");
1999      goto fail_timeline_cond;
2000   }
2001   if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2002      pthread_condattr_destroy(&condattr);
2003      result = vk_startup_errorf(physical_device->instance,
2004                                 VK_ERROR_INITIALIZATION_FAILED,
2005                                 "pthread condattr clock setup");
2006      goto fail_timeline_cond;
2007   }
2008   if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2009      pthread_condattr_destroy(&condattr);
2010      result = vk_startup_errorf(physical_device->instance,
2011                                 VK_ERROR_INITIALIZATION_FAILED,
2012                                 "pthread cond init");
2013      goto fail_timeline_cond;
2014   }
2015   pthread_condattr_destroy(&condattr);
2016
2017   result = tu_autotune_init(&device->autotune, device);
2018   if (result != VK_SUCCESS) {
2019      goto fail_timeline_cond;
2020   }
2021
2022   for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2023      mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2024
2025   mtx_init(&device->mutex, mtx_plain);
2026
2027   device->use_z24uint_s8uint =
2028      physical_device->info->a6xx.has_z24uint_s8uint &&
2029      !border_color_without_format;
2030
2031   tu_gpu_tracepoint_config_variable();
2032
2033   device->submit_count = 0;
2034   u_trace_context_init(&device->trace_context, device,
2035                     tu_trace_create_ts_buffer,
2036                     tu_trace_destroy_ts_buffer,
2037                     tu_trace_record_ts,
2038                     tu_trace_read_ts,
2039                     tu_trace_delete_flush_data);
2040
2041   tu_breadcrumbs_init(device);
2042
2043   *pDevice = tu_device_to_handle(device);
2044   return VK_SUCCESS;
2045
2046fail_timeline_cond:
2047fail_prepare_perfcntrs_pass_cs:
2048   free(device->perfcntrs_pass_cs_entries);
2049   tu_cs_finish(device->perfcntrs_pass_cs);
2050fail_perfcntrs_pass_entries_alloc:
2051   free(device->perfcntrs_pass_cs);
2052fail_perfcntrs_pass_alloc:
2053   vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2054fail_pipeline_cache:
2055   tu_destroy_dynamic_rendering(device);
2056fail_dynamic_rendering:
2057   tu_destroy_clear_blit_shaders(device);
2058fail_global_bo_map:
2059   tu_bo_finish(device, device->global_bo);
2060   vk_free(&device->vk.alloc, device->bo_list);
2061fail_global_bo:
2062   ir3_compiler_destroy(device->compiler);
2063   util_sparse_array_finish(&device->bo_map);
2064
2065fail_queues:
2066   for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2067      for (unsigned q = 0; q < device->queue_count[i]; q++)
2068         tu_queue_finish(&device->queues[i][q]);
2069      if (device->queue_count[i])
2070         vk_free(&device->vk.alloc, device->queues[i]);
2071   }
2072
2073   u_rwlock_destroy(&device->dma_bo_lock);
2074   vk_device_finish(&device->vk);
2075   vk_free(&device->vk.alloc, device);
2076   return result;
2077}
2078
2079VKAPI_ATTR void VKAPI_CALL
2080tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2081{
2082   TU_FROM_HANDLE(tu_device, device, _device);
2083
2084   if (!device)
2085      return;
2086
2087   tu_breadcrumbs_finish(device);
2088
2089   u_trace_context_fini(&device->trace_context);
2090
2091   for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2092      for (unsigned q = 0; q < device->queue_count[i]; q++)
2093         tu_queue_finish(&device->queues[i][q]);
2094      if (device->queue_count[i])
2095         vk_free(&device->vk.alloc, device->queues[i]);
2096   }
2097
2098   for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2099      if (device->scratch_bos[i].initialized)
2100         tu_bo_finish(device, device->scratch_bos[i].bo);
2101   }
2102
2103   tu_destroy_clear_blit_shaders(device);
2104
2105   tu_destroy_dynamic_rendering(device);
2106
2107   ir3_compiler_destroy(device->compiler);
2108
2109   vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2110
2111   if (device->perfcntrs_pass_cs) {
2112      free(device->perfcntrs_pass_cs_entries);
2113      tu_cs_finish(device->perfcntrs_pass_cs);
2114      free(device->perfcntrs_pass_cs);
2115   }
2116
2117   tu_autotune_fini(&device->autotune, device);
2118
2119   tu_bo_suballocator_finish(&device->pipeline_suballoc);
2120   tu_bo_suballocator_finish(&device->autotune_suballoc);
2121
2122   util_sparse_array_finish(&device->bo_map);
2123   u_rwlock_destroy(&device->dma_bo_lock);
2124
2125   pthread_cond_destroy(&device->timeline_cond);
2126   vk_free(&device->vk.alloc, device->bo_list);
2127   vk_device_finish(&device->vk);
2128   vk_free(&device->vk.alloc, device);
2129}
2130
2131VkResult
2132tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2133{
2134   unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2135   unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2136   assert(index < ARRAY_SIZE(dev->scratch_bos));
2137
2138   for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2139      if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2140         /* Fast path: just return the already-allocated BO. */
2141         *bo = dev->scratch_bos[i].bo;
2142         return VK_SUCCESS;
2143      }
2144   }
2145
2146   /* Slow path: actually allocate the BO. We take a lock because the process
2147    * of allocating it is slow, and we don't want to block the CPU while it
2148    * finishes.
2149   */
2150   mtx_lock(&dev->scratch_bos[index].construct_mtx);
2151
2152   /* Another thread may have allocated it already while we were waiting on
2153    * the lock. We need to check this in order to avoid double-allocating.
2154    */
2155   if (dev->scratch_bos[index].initialized) {
2156      mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2157      *bo = dev->scratch_bos[index].bo;
2158      return VK_SUCCESS;
2159   }
2160
2161   unsigned bo_size = 1ull << size_log2;
2162   VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size,
2163                                    TU_BO_ALLOC_NO_FLAGS);
2164   if (result != VK_SUCCESS) {
2165      mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2166      return result;
2167   }
2168
2169   p_atomic_set(&dev->scratch_bos[index].initialized, true);
2170
2171   mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2172
2173   *bo = dev->scratch_bos[index].bo;
2174   return VK_SUCCESS;
2175}
2176
2177VKAPI_ATTR VkResult VKAPI_CALL
2178tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2179                                    VkLayerProperties *pProperties)
2180{
2181   *pPropertyCount = 0;
2182   return VK_SUCCESS;
2183}
2184
2185/* Only used for kgsl since drm started using common implementation */
2186#ifdef TU_USE_KGSL
2187VKAPI_ATTR VkResult VKAPI_CALL
2188tu_QueueWaitIdle(VkQueue _queue)
2189{
2190   TU_FROM_HANDLE(tu_queue, queue, _queue);
2191
2192   if (vk_device_is_lost(&queue->device->vk))
2193      return VK_ERROR_DEVICE_LOST;
2194
2195   if (queue->fence < 0)
2196      return VK_SUCCESS;
2197
2198   struct pollfd fds = { .fd = queue->fence, .events = POLLIN };
2199   int ret;
2200   do {
2201      ret = poll(&fds, 1, -1);
2202   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2203
2204   /* TODO: otherwise set device lost ? */
2205   assert(ret == 1 && !(fds.revents & (POLLERR | POLLNVAL)));
2206
2207   close(queue->fence);
2208   queue->fence = -1;
2209   return VK_SUCCESS;
2210}
2211#endif
2212
2213VKAPI_ATTR VkResult VKAPI_CALL
2214tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2215                                        uint32_t *pPropertyCount,
2216                                        VkExtensionProperties *pProperties)
2217{
2218   if (pLayerName)
2219      return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2220
2221   return vk_enumerate_instance_extension_properties(
2222      &tu_instance_extensions_supported, pPropertyCount, pProperties);
2223}
2224
2225VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2226tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2227{
2228   TU_FROM_HANDLE(tu_instance, instance, _instance);
2229   return vk_instance_get_proc_addr(&instance->vk,
2230                                    &tu_instance_entrypoints,
2231                                    pName);
2232}
2233
2234/* The loader wants us to expose a second GetInstanceProcAddr function
2235 * to work around certain LD_PRELOAD issues seen in apps.
2236 */
2237PUBLIC
2238VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2239vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName);
2240
2241PUBLIC
2242VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2243vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2244{
2245   return tu_GetInstanceProcAddr(instance, pName);
2246}
2247
2248/* With version 4+ of the loader interface the ICD should expose
2249 * vk_icdGetPhysicalDeviceProcAddr()
2250 */
2251PUBLIC
2252VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2253vk_icdGetPhysicalDeviceProcAddr(VkInstance  _instance,
2254                                const char* pName);
2255
2256PFN_vkVoidFunction
2257vk_icdGetPhysicalDeviceProcAddr(VkInstance  _instance,
2258                                const char* pName)
2259{
2260   TU_FROM_HANDLE(tu_instance, instance, _instance);
2261
2262   return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2263}
2264
2265VKAPI_ATTR VkResult VKAPI_CALL
2266tu_AllocateMemory(VkDevice _device,
2267                  const VkMemoryAllocateInfo *pAllocateInfo,
2268                  const VkAllocationCallbacks *pAllocator,
2269                  VkDeviceMemory *pMem)
2270{
2271   TU_FROM_HANDLE(tu_device, device, _device);
2272   struct tu_device_memory *mem;
2273   VkResult result;
2274
2275   assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2276
2277   if (pAllocateInfo->allocationSize == 0) {
2278      /* Apparently, this is allowed */
2279      *pMem = VK_NULL_HANDLE;
2280      return VK_SUCCESS;
2281   }
2282
2283   struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2284   uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2285   if (mem_heap_used > mem_heap->size)
2286      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2287
2288   mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
2289                         VK_OBJECT_TYPE_DEVICE_MEMORY);
2290   if (mem == NULL)
2291      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2292
2293   const VkImportMemoryFdInfoKHR *fd_info =
2294      vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2295   if (fd_info && !fd_info->handleType)
2296      fd_info = NULL;
2297
2298   if (fd_info) {
2299      assert(fd_info->handleType ==
2300                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2301             fd_info->handleType ==
2302                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2303
2304      /*
2305       * TODO Importing the same fd twice gives us the same handle without
2306       * reference counting.  We need to maintain a per-instance handle-to-bo
2307       * table and add reference count to tu_bo.
2308       */
2309      result = tu_bo_init_dmabuf(device, &mem->bo,
2310                                 pAllocateInfo->allocationSize, fd_info->fd);
2311      if (result == VK_SUCCESS) {
2312         /* take ownership and close the fd */
2313         close(fd_info->fd);
2314      }
2315   } else {
2316      result =
2317         tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize,
2318                        TU_BO_ALLOC_NO_FLAGS);
2319   }
2320
2321
2322   if (result == VK_SUCCESS) {
2323      mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
2324      if (mem_heap_used > mem_heap->size) {
2325         p_atomic_add(&mem_heap->used, -mem->bo->size);
2326         tu_bo_finish(device, mem->bo);
2327         result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
2328                            "Out of heap memory");
2329      }
2330   }
2331
2332   if (result != VK_SUCCESS) {
2333      vk_object_free(&device->vk, pAllocator, mem);
2334      return result;
2335   }
2336
2337   /* Track in the device whether our BO list contains any implicit-sync BOs, so
2338    * we can suppress implicit sync on non-WSI usage.
2339    */
2340   const struct wsi_memory_allocate_info *wsi_info =
2341      vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2342   if (wsi_info && wsi_info->implicit_sync) {
2343      mtx_lock(&device->bo_mutex);
2344      if (!mem->bo->implicit_sync) {
2345         mem->bo->implicit_sync = true;
2346         device->implicit_sync_bo_count++;
2347      }
2348      mtx_unlock(&device->bo_mutex);
2349   }
2350
2351   *pMem = tu_device_memory_to_handle(mem);
2352
2353   return VK_SUCCESS;
2354}
2355
2356VKAPI_ATTR void VKAPI_CALL
2357tu_FreeMemory(VkDevice _device,
2358              VkDeviceMemory _mem,
2359              const VkAllocationCallbacks *pAllocator)
2360{
2361   TU_FROM_HANDLE(tu_device, device, _device);
2362   TU_FROM_HANDLE(tu_device_memory, mem, _mem);
2363
2364   if (mem == NULL)
2365      return;
2366
2367   p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
2368   tu_bo_finish(device, mem->bo);
2369   vk_object_free(&device->vk, pAllocator, mem);
2370}
2371
2372VKAPI_ATTR VkResult VKAPI_CALL
2373tu_MapMemory(VkDevice _device,
2374             VkDeviceMemory _memory,
2375             VkDeviceSize offset,
2376             VkDeviceSize size,
2377             VkMemoryMapFlags flags,
2378             void **ppData)
2379{
2380   TU_FROM_HANDLE(tu_device, device, _device);
2381   TU_FROM_HANDLE(tu_device_memory, mem, _memory);
2382   VkResult result;
2383
2384   if (mem == NULL) {
2385      *ppData = NULL;
2386      return VK_SUCCESS;
2387   }
2388
2389   if (!mem->bo->map) {
2390      result = tu_bo_map(device, mem->bo);
2391      if (result != VK_SUCCESS)
2392         return result;
2393   }
2394
2395   *ppData = mem->bo->map + offset;
2396   return VK_SUCCESS;
2397}
2398
2399VKAPI_ATTR void VKAPI_CALL
2400tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2401{
2402   /* TODO: unmap here instead of waiting for FreeMemory */
2403}
2404
2405VKAPI_ATTR VkResult VKAPI_CALL
2406tu_FlushMappedMemoryRanges(VkDevice _device,
2407                           uint32_t memoryRangeCount,
2408                           const VkMappedMemoryRange *pMemoryRanges)
2409{
2410   return VK_SUCCESS;
2411}
2412
2413VKAPI_ATTR VkResult VKAPI_CALL
2414tu_InvalidateMappedMemoryRanges(VkDevice _device,
2415                                uint32_t memoryRangeCount,
2416                                const VkMappedMemoryRange *pMemoryRanges)
2417{
2418   return VK_SUCCESS;
2419}
2420
2421static void
2422tu_get_buffer_memory_requirements(uint64_t size,
2423                                  VkMemoryRequirements2 *pMemoryRequirements)
2424{
2425   pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2426      .memoryTypeBits = 1,
2427      .alignment = 64,
2428      .size = MAX2(align64(size, 64), size),
2429   };
2430
2431   vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2432      switch (ext->sType) {
2433      case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2434         VkMemoryDedicatedRequirements *req =
2435            (VkMemoryDedicatedRequirements *) ext;
2436         req->requiresDedicatedAllocation = false;
2437         req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2438         break;
2439      }
2440      default:
2441         break;
2442      }
2443   }
2444}
2445
2446VKAPI_ATTR void VKAPI_CALL
2447tu_GetBufferMemoryRequirements2(
2448   VkDevice device,
2449   const VkBufferMemoryRequirementsInfo2 *pInfo,
2450   VkMemoryRequirements2 *pMemoryRequirements)
2451{
2452   TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2453
2454   tu_get_buffer_memory_requirements(buffer->size, pMemoryRequirements);
2455}
2456
2457VKAPI_ATTR void VKAPI_CALL
2458tu_GetDeviceBufferMemoryRequirements(
2459   VkDevice device,
2460   const VkDeviceBufferMemoryRequirements *pInfo,
2461   VkMemoryRequirements2 *pMemoryRequirements)
2462{
2463   tu_get_buffer_memory_requirements(pInfo->pCreateInfo->size, pMemoryRequirements);
2464}
2465
2466VKAPI_ATTR void VKAPI_CALL
2467tu_GetDeviceMemoryCommitment(VkDevice device,
2468                             VkDeviceMemory memory,
2469                             VkDeviceSize *pCommittedMemoryInBytes)
2470{
2471   *pCommittedMemoryInBytes = 0;
2472}
2473
2474VKAPI_ATTR VkResult VKAPI_CALL
2475tu_BindBufferMemory2(VkDevice device,
2476                     uint32_t bindInfoCount,
2477                     const VkBindBufferMemoryInfo *pBindInfos)
2478{
2479   for (uint32_t i = 0; i < bindInfoCount; ++i) {
2480      TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2481      TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
2482
2483      if (mem) {
2484         buffer->bo = mem->bo;
2485         buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2486      } else {
2487         buffer->bo = NULL;
2488      }
2489   }
2490   return VK_SUCCESS;
2491}
2492
2493VKAPI_ATTR VkResult VKAPI_CALL
2494tu_BindImageMemory2(VkDevice device,
2495                    uint32_t bindInfoCount,
2496                    const VkBindImageMemoryInfo *pBindInfos)
2497{
2498   for (uint32_t i = 0; i < bindInfoCount; ++i) {
2499      TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image);
2500      TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2501
2502      if (mem) {
2503         image->bo = mem->bo;
2504         image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2505      } else {
2506         image->bo = NULL;
2507         image->iova = 0;
2508      }
2509   }
2510
2511   return VK_SUCCESS;
2512}
2513
2514VKAPI_ATTR VkResult VKAPI_CALL
2515tu_QueueBindSparse(VkQueue _queue,
2516                   uint32_t bindInfoCount,
2517                   const VkBindSparseInfo *pBindInfo,
2518                   VkFence _fence)
2519{
2520   return VK_SUCCESS;
2521}
2522
2523VKAPI_ATTR VkResult VKAPI_CALL
2524tu_CreateEvent(VkDevice _device,
2525               const VkEventCreateInfo *pCreateInfo,
2526               const VkAllocationCallbacks *pAllocator,
2527               VkEvent *pEvent)
2528{
2529   TU_FROM_HANDLE(tu_device, device, _device);
2530
2531   struct tu_event *event =
2532         vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
2533                         VK_OBJECT_TYPE_EVENT);
2534   if (!event)
2535      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2536
2537   VkResult result = tu_bo_init_new(device, &event->bo, 0x1000,
2538                                    TU_BO_ALLOC_NO_FLAGS);
2539   if (result != VK_SUCCESS)
2540      goto fail_alloc;
2541
2542   result = tu_bo_map(device, event->bo);
2543   if (result != VK_SUCCESS)
2544      goto fail_map;
2545
2546   *pEvent = tu_event_to_handle(event);
2547
2548   return VK_SUCCESS;
2549
2550fail_map:
2551   tu_bo_finish(device, event->bo);
2552fail_alloc:
2553   vk_object_free(&device->vk, pAllocator, event);
2554   return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2555}
2556
2557VKAPI_ATTR void VKAPI_CALL
2558tu_DestroyEvent(VkDevice _device,
2559                VkEvent _event,
2560                const VkAllocationCallbacks *pAllocator)
2561{
2562   TU_FROM_HANDLE(tu_device, device, _device);
2563   TU_FROM_HANDLE(tu_event, event, _event);
2564
2565   if (!event)
2566      return;
2567
2568   tu_bo_finish(device, event->bo);
2569   vk_object_free(&device->vk, pAllocator, event);
2570}
2571
2572VKAPI_ATTR VkResult VKAPI_CALL
2573tu_GetEventStatus(VkDevice _device, VkEvent _event)
2574{
2575   TU_FROM_HANDLE(tu_event, event, _event);
2576
2577   if (*(uint64_t*) event->bo->map == 1)
2578      return VK_EVENT_SET;
2579   return VK_EVENT_RESET;
2580}
2581
2582VKAPI_ATTR VkResult VKAPI_CALL
2583tu_SetEvent(VkDevice _device, VkEvent _event)
2584{
2585   TU_FROM_HANDLE(tu_event, event, _event);
2586   *(uint64_t*) event->bo->map = 1;
2587
2588   return VK_SUCCESS;
2589}
2590
2591VKAPI_ATTR VkResult VKAPI_CALL
2592tu_ResetEvent(VkDevice _device, VkEvent _event)
2593{
2594   TU_FROM_HANDLE(tu_event, event, _event);
2595   *(uint64_t*) event->bo->map = 0;
2596
2597   return VK_SUCCESS;
2598}
2599
2600VKAPI_ATTR VkResult VKAPI_CALL
2601tu_CreateBuffer(VkDevice _device,
2602                const VkBufferCreateInfo *pCreateInfo,
2603                const VkAllocationCallbacks *pAllocator,
2604                VkBuffer *pBuffer)
2605{
2606   TU_FROM_HANDLE(tu_device, device, _device);
2607   struct tu_buffer *buffer;
2608
2609   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2610
2611   buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer),
2612                            VK_OBJECT_TYPE_BUFFER);
2613   if (buffer == NULL)
2614      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2615
2616   buffer->size = pCreateInfo->size;
2617   buffer->usage = pCreateInfo->usage;
2618   buffer->flags = pCreateInfo->flags;
2619
2620   *pBuffer = tu_buffer_to_handle(buffer);
2621
2622   return VK_SUCCESS;
2623}
2624
2625VKAPI_ATTR void VKAPI_CALL
2626tu_DestroyBuffer(VkDevice _device,
2627                 VkBuffer _buffer,
2628                 const VkAllocationCallbacks *pAllocator)
2629{
2630   TU_FROM_HANDLE(tu_device, device, _device);
2631   TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
2632
2633   if (!buffer)
2634      return;
2635
2636   vk_object_free(&device->vk, pAllocator, buffer);
2637}
2638
2639VKAPI_ATTR VkResult VKAPI_CALL
2640tu_CreateFramebuffer(VkDevice _device,
2641                     const VkFramebufferCreateInfo *pCreateInfo,
2642                     const VkAllocationCallbacks *pAllocator,
2643                     VkFramebuffer *pFramebuffer)
2644{
2645   TU_FROM_HANDLE(tu_device, device, _device);
2646
2647   if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC))
2648      return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
2649                                         pFramebuffer);
2650
2651   TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
2652   struct tu_framebuffer *framebuffer;
2653
2654   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2655
2656   bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
2657
2658   size_t size = sizeof(*framebuffer);
2659   if (!imageless)
2660      size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
2661   framebuffer = vk_object_alloc(&device->vk, pAllocator, size,
2662                                 VK_OBJECT_TYPE_FRAMEBUFFER);
2663   if (framebuffer == NULL)
2664      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2665
2666   framebuffer->attachment_count = pCreateInfo->attachmentCount;
2667   framebuffer->width = pCreateInfo->width;
2668   framebuffer->height = pCreateInfo->height;
2669   framebuffer->layers = pCreateInfo->layers;
2670
2671   if (!imageless) {
2672      for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2673         VkImageView _iview = pCreateInfo->pAttachments[i];
2674         struct tu_image_view *iview = tu_image_view_from_handle(_iview);
2675         framebuffer->attachments[i].attachment = iview;
2676      }
2677   }
2678
2679   tu_framebuffer_tiling_config(framebuffer, device, pass);
2680
2681   *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
2682   return VK_SUCCESS;
2683}
2684
2685void
2686tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
2687                             const VkRenderingInfo *pRenderingInfo)
2688{
2689   struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
2690   struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
2691
2692   framebuffer->attachment_count = pass->attachment_count;
2693   framebuffer->width = pRenderingInfo->renderArea.offset.x +
2694      pRenderingInfo->renderArea.extent.width;
2695   framebuffer->height = pRenderingInfo->renderArea.offset.y +
2696      pRenderingInfo->renderArea.extent.height;
2697   framebuffer->layers = pRenderingInfo->layerCount;
2698
2699   tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
2700}
2701
2702VKAPI_ATTR void VKAPI_CALL
2703tu_DestroyFramebuffer(VkDevice _device,
2704                      VkFramebuffer _fb,
2705                      const VkAllocationCallbacks *pAllocator)
2706{
2707   TU_FROM_HANDLE(tu_device, device, _device);
2708
2709   if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
2710      vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
2711      return;
2712   }
2713
2714   TU_FROM_HANDLE(tu_framebuffer, fb, _fb);
2715
2716   if (!fb)
2717      return;
2718
2719   vk_object_free(&device->vk, pAllocator, fb);
2720}
2721
2722static void
2723tu_init_sampler(struct tu_device *device,
2724                struct tu_sampler *sampler,
2725                const VkSamplerCreateInfo *pCreateInfo)
2726{
2727   const struct VkSamplerReductionModeCreateInfo *reduction =
2728      vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
2729   const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
2730      vk_find_struct_const(pCreateInfo->pNext,  SAMPLER_YCBCR_CONVERSION_INFO);
2731   const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
2732      vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
2733   /* for non-custom border colors, the VK enum is translated directly to an offset in
2734    * the border color buffer. custom border colors are located immediately after the
2735    * builtin colors, and thus an offset of TU_BORDER_COLOR_BUILTIN is added.
2736    */
2737   uint32_t border_color = (unsigned) pCreateInfo->borderColor;
2738   if (pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
2739       pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
2740      mtx_lock(&device->mutex);
2741      border_color = BITSET_FFS(device->custom_border_color) - 1;
2742      assert(border_color < TU_BORDER_COLOR_COUNT);
2743      BITSET_CLEAR(device->custom_border_color, border_color);
2744      mtx_unlock(&device->mutex);
2745
2746      VkClearColorValue color = custom_border_color->customBorderColor;
2747      if (custom_border_color->format == VK_FORMAT_D24_UNORM_S8_UINT &&
2748          pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT &&
2749          device->use_z24uint_s8uint) {
2750         /* When sampling stencil using the special Z24UINT_S8UINT format, the
2751          * border color is in the second component. Note: if
2752          * customBorderColorWithoutFormat is enabled, we may miss doing this
2753          * here if the format isn't specified, which is why we don't use that
2754          * format.
2755          */
2756         color.uint32[1] = color.uint32[0];
2757      }
2758
2759      tu6_pack_border_color(device->global_bo->map + gb_offset(bcolor[border_color]),
2760                            &color,
2761                            pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
2762      border_color += TU_BORDER_COLOR_BUILTIN;
2763   }
2764
2765   unsigned aniso = pCreateInfo->anisotropyEnable ?
2766      util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
2767   bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
2768   float min_lod = CLAMP(pCreateInfo->minLod, 0.0f, 4095.0f / 256.0f);
2769   float max_lod = CLAMP(pCreateInfo->maxLod, 0.0f, 4095.0f / 256.0f);
2770
2771   sampler->descriptor[0] =
2772      COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
2773      A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) |
2774      A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) |
2775      A6XX_TEX_SAMP_0_ANISO(aniso) |
2776      A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) |
2777      A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) |
2778      A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) |
2779      A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias);
2780   sampler->descriptor[1] =
2781      /* COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | */
2782      COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
2783      A6XX_TEX_SAMP_1_MIN_LOD(min_lod) |
2784      A6XX_TEX_SAMP_1_MAX_LOD(max_lod) |
2785      COND(pCreateInfo->compareEnable,
2786           A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
2787   sampler->descriptor[2] = A6XX_TEX_SAMP_2_BCOLOR(border_color);
2788   sampler->descriptor[3] = 0;
2789
2790   if (reduction) {
2791      sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE(
2792         tu6_reduction_mode(reduction->reductionMode));
2793   }
2794
2795   sampler->ycbcr_sampler = ycbcr_conversion ?
2796      tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
2797
2798   if (sampler->ycbcr_sampler &&
2799       sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) {
2800      sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR;
2801   }
2802
2803   /* TODO:
2804    * A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
2805    */
2806}
2807
2808VKAPI_ATTR VkResult VKAPI_CALL
2809tu_CreateSampler(VkDevice _device,
2810                 const VkSamplerCreateInfo *pCreateInfo,
2811                 const VkAllocationCallbacks *pAllocator,
2812                 VkSampler *pSampler)
2813{
2814   TU_FROM_HANDLE(tu_device, device, _device);
2815   struct tu_sampler *sampler;
2816
2817   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2818
2819   sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
2820                             VK_OBJECT_TYPE_SAMPLER);
2821   if (!sampler)
2822      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2823
2824   tu_init_sampler(device, sampler, pCreateInfo);
2825   *pSampler = tu_sampler_to_handle(sampler);
2826
2827   return VK_SUCCESS;
2828}
2829
2830VKAPI_ATTR void VKAPI_CALL
2831tu_DestroySampler(VkDevice _device,
2832                  VkSampler _sampler,
2833                  const VkAllocationCallbacks *pAllocator)
2834{
2835   TU_FROM_HANDLE(tu_device, device, _device);
2836   TU_FROM_HANDLE(tu_sampler, sampler, _sampler);
2837   uint32_t border_color;
2838
2839   if (!sampler)
2840      return;
2841
2842   border_color = (sampler->descriptor[2] & A6XX_TEX_SAMP_2_BCOLOR__MASK) >> A6XX_TEX_SAMP_2_BCOLOR__SHIFT;
2843   if (border_color >= TU_BORDER_COLOR_BUILTIN) {
2844      border_color -= TU_BORDER_COLOR_BUILTIN;
2845      /* if the sampler had a custom border color, free it. TODO: no lock */
2846      mtx_lock(&device->mutex);
2847      assert(!BITSET_TEST(device->custom_border_color, border_color));
2848      BITSET_SET(device->custom_border_color, border_color);
2849      mtx_unlock(&device->mutex);
2850   }
2851
2852   vk_object_free(&device->vk, pAllocator, sampler);
2853}
2854
2855/* vk_icd.h does not declare this function, so we declare it here to
2856 * suppress Wmissing-prototypes.
2857 */
2858PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2859vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2860
2861PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2862vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2863{
2864   /* For the full details on loader interface versioning, see
2865    * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2866    * What follows is a condensed summary, to help you navigate the large and
2867    * confusing official doc.
2868    *
2869    *   - Loader interface v0 is incompatible with later versions. We don't
2870    *     support it.
2871    *
2872    *   - In loader interface v1:
2873    *       - The first ICD entrypoint called by the loader is
2874    *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2875    *         entrypoint.
2876    *       - The ICD must statically expose no other Vulkan symbol unless it
2877    * is linked with -Bsymbolic.
2878    *       - Each dispatchable Vulkan handle created by the ICD must be
2879    *         a pointer to a struct whose first member is VK_LOADER_DATA. The
2880    *         ICD must initialize VK_LOADER_DATA.loadMagic to
2881    * ICD_LOADER_MAGIC.
2882    *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2883    *         vkDestroySurfaceKHR(). The ICD must be capable of working with
2884    *         such loader-managed surfaces.
2885    *
2886    *    - Loader interface v2 differs from v1 in:
2887    *       - The first ICD entrypoint called by the loader is
2888    *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2889    *         statically expose this entrypoint.
2890    *
2891    *    - Loader interface v3 differs from v2 in:
2892    *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2893    *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2894    *          because the loader no longer does so.
2895    *
2896    *    - Loader interface v4 differs from v3 in:
2897    *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
2898    *
2899    *    - Loader interface v5 differs from v4 in:
2900    *        - The ICD must support Vulkan API version 1.1 and must not return
2901    *          VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
2902    *          Vulkan Loader with interface v4 or smaller is being used and the
2903    *          application provides an API version that is greater than 1.0.
2904    */
2905   *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
2906   return VK_SUCCESS;
2907}
2908
2909VKAPI_ATTR VkResult VKAPI_CALL
2910tu_GetMemoryFdKHR(VkDevice _device,
2911                  const VkMemoryGetFdInfoKHR *pGetFdInfo,
2912                  int *pFd)
2913{
2914   TU_FROM_HANDLE(tu_device, device, _device);
2915   TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
2916
2917   assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2918
2919   /* At the moment, we support only the below handle types. */
2920   assert(pGetFdInfo->handleType ==
2921             VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2922          pGetFdInfo->handleType ==
2923             VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2924
2925   int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
2926   if (prime_fd < 0)
2927      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2928
2929   *pFd = prime_fd;
2930   return VK_SUCCESS;
2931}
2932
2933VKAPI_ATTR VkResult VKAPI_CALL
2934tu_GetMemoryFdPropertiesKHR(VkDevice _device,
2935                            VkExternalMemoryHandleTypeFlagBits handleType,
2936                            int fd,
2937                            VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2938{
2939   assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2940   pMemoryFdProperties->memoryTypeBits = 1;
2941   return VK_SUCCESS;
2942}
2943
2944VKAPI_ATTR void VKAPI_CALL
2945tu_GetPhysicalDeviceExternalFenceProperties(
2946   VkPhysicalDevice physicalDevice,
2947   const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
2948   VkExternalFenceProperties *pExternalFenceProperties)
2949{
2950   pExternalFenceProperties->exportFromImportedHandleTypes = 0;
2951   pExternalFenceProperties->compatibleHandleTypes = 0;
2952   pExternalFenceProperties->externalFenceFeatures = 0;
2953}
2954
2955VKAPI_ATTR void VKAPI_CALL
2956tu_GetDeviceGroupPeerMemoryFeatures(
2957   VkDevice device,
2958   uint32_t heapIndex,
2959   uint32_t localDeviceIndex,
2960   uint32_t remoteDeviceIndex,
2961   VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
2962{
2963   assert(localDeviceIndex == remoteDeviceIndex);
2964
2965   *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2966                          VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2967                          VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2968                          VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2969}
2970
2971VKAPI_ATTR void VKAPI_CALL
2972tu_GetPhysicalDeviceMultisamplePropertiesEXT(
2973   VkPhysicalDevice                            physicalDevice,
2974   VkSampleCountFlagBits                       samples,
2975   VkMultisamplePropertiesEXT*                 pMultisampleProperties)
2976{
2977   TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
2978
2979   if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
2980      pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
2981   else
2982      pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
2983}
2984
2985VkDeviceAddress
2986tu_GetBufferDeviceAddress(VkDevice _device,
2987                          const VkBufferDeviceAddressInfo* pInfo)
2988{
2989   TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2990
2991   return buffer->iova;
2992}
2993
2994uint64_t tu_GetBufferOpaqueCaptureAddress(
2995    VkDevice                                    device,
2996    const VkBufferDeviceAddressInfo*            pInfo)
2997{
2998   tu_stub();
2999   return 0;
3000}
3001
3002uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3003    VkDevice                                    device,
3004    const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3005{
3006   tu_stub();
3007   return 0;
3008}
3009