1/*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on v3dv driver which is:
8 * Copyright © 2019 Raspberry Pi
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the next
18 * paragraph) shall be included in all copies or substantial portions of the
19 * Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30#include <assert.h>
31#include <fcntl.h>
32#include <inttypes.h>
33#include <stdbool.h>
34#include <stddef.h>
35#include <stdint.h>
36#include <stdlib.h>
37#include <string.h>
38#include <vulkan/vulkan.h>
39#include <xf86drm.h>
40
41#include "hwdef/rogue_hw_utils.h"
42#include "pipe/p_defines.h"
43#include "pvr_bo.h"
44#include "pvr_csb.h"
45#include "pvr_csb_enum_helpers.h"
46#include "pvr_debug.h"
47#include "pvr_device_info.h"
48#include "pvr_hardcode.h"
49#include "pvr_job_render.h"
50#include "pvr_limits.h"
51#include "pvr_nop_usc.h"
52#include "pvr_pds.h"
53#include "pvr_private.h"
54#include "pvr_tex_state.h"
55#include "pvr_types.h"
56#include "pvr_winsys.h"
57#include "rogue/rogue_compiler.h"
58#include "util/build_id.h"
59#include "util/log.h"
60#include "util/mesa-sha1.h"
61#include "util/os_misc.h"
62#include "util/u_math.h"
63#include "vk_alloc.h"
64#include "vk_log.h"
65#include "vk_object.h"
66#include "vk_util.h"
67
68#define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
69#define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
70#define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
71
72/* The grow threshold is a percentage. This is intended to be 12.5%, but has
73 * been rounded up since the percentage is treated as an integer.
74 */
75#define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
76
77#if defined(VK_USE_PLATFORM_DISPLAY_KHR)
78#   define PVR_USE_WSI_PLATFORM
79#endif
80
81#define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
82
83#define DEF_DRIVER(str_name)                        \
84   {                                                \
85      .name = str_name, .len = sizeof(str_name) - 1 \
86   }
87
88struct pvr_drm_device_info {
89   const char *name;
90   size_t len;
91};
92
93/* This is the list of supported DRM display drivers. */
94static const struct pvr_drm_device_info pvr_display_devices[] = {
95   DEF_DRIVER("mediatek-drm"),
96   DEF_DRIVER("ti,am65x-dss"),
97};
98
99/* This is the list of supported DRM render drivers. */
100static const struct pvr_drm_device_info pvr_render_devices[] = {
101   DEF_DRIVER("mediatek,mt8173-gpu"),
102   DEF_DRIVER("ti,am62-gpu"),
103};
104
105#undef DEF_DRIVER
106
107static const struct vk_instance_extension_table pvr_instance_extensions = {
108#if defined(VK_USE_PLATFORM_DISPLAY_KHR)
109   .KHR_display = true,
110#endif
111   .KHR_external_memory_capabilities = true,
112   .KHR_get_physical_device_properties2 = true,
113#if defined(PVR_USE_WSI_PLATFORM)
114   .KHR_surface = true,
115#endif
116   .EXT_debug_report = true,
117   .EXT_debug_utils = true,
118};
119
120static void pvr_physical_device_get_supported_extensions(
121   const struct pvr_physical_device *pdevice,
122   struct vk_device_extension_table *extensions)
123{
124   /* clang-format off */
125   *extensions = (struct vk_device_extension_table){
126      .KHR_external_memory = true,
127      .KHR_external_memory_fd = true,
128#if defined(PVR_USE_WSI_PLATFORM)
129      .KHR_swapchain = true,
130#endif
131      .EXT_external_memory_dma_buf = true,
132      .EXT_private_data = true,
133   };
134   /* clang-format on */
135}
136
137VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
138{
139   *pApiVersion = PVR_API_VERSION;
140   return VK_SUCCESS;
141}
142
143VkResult
144pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
145                                         uint32_t *pPropertyCount,
146                                         VkExtensionProperties *pProperties)
147{
148   if (pLayerName)
149      return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
150
151   return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
152                                                     pPropertyCount,
153                                                     pProperties);
154}
155
156VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
157                            const VkAllocationCallbacks *pAllocator,
158                            VkInstance *pInstance)
159{
160   struct vk_instance_dispatch_table dispatch_table;
161   struct pvr_instance *instance;
162   VkResult result;
163
164   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
165
166   if (!pAllocator)
167      pAllocator = vk_default_allocator();
168
169   instance = vk_alloc(pAllocator,
170                       sizeof(*instance),
171                       8,
172                       VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
173   if (!instance)
174      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
175
176   vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
177                                               &pvr_instance_entrypoints,
178                                               true);
179
180   vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
181                                               &wsi_instance_entrypoints,
182                                               false);
183
184   result = vk_instance_init(&instance->vk,
185                             &pvr_instance_extensions,
186                             &dispatch_table,
187                             pCreateInfo,
188                             pAllocator);
189   if (result != VK_SUCCESS) {
190      vk_free(pAllocator, instance);
191      return vk_error(NULL, result);
192   }
193
194   pvr_process_debug_variable();
195
196   instance->physical_devices_count = -1;
197
198   VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
199
200   *pInstance = pvr_instance_to_handle(instance);
201
202   return VK_SUCCESS;
203}
204
205static void pvr_physical_device_finish(struct pvr_physical_device *pdevice)
206{
207   /* Be careful here. The device might not have been initialized. This can
208    * happen since initialization is done in vkEnumeratePhysicalDevices() but
209    * finish is done in vkDestroyInstance(). Make sure that you check for NULL
210    * before freeing or that the freeing functions accept NULL pointers.
211    */
212
213   if (pdevice->compiler)
214      rogue_compiler_destroy(pdevice->compiler);
215
216   pvr_wsi_finish(pdevice);
217
218   free(pdevice->name);
219
220   if (pdevice->ws)
221      pvr_winsys_destroy(pdevice->ws);
222
223   if (pdevice->master_fd >= 0) {
224      vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
225      close(pdevice->master_fd);
226   }
227
228   if (pdevice->render_fd >= 0) {
229      vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
230      close(pdevice->render_fd);
231   }
232   vk_physical_device_finish(&pdevice->vk);
233}
234
235void pvr_DestroyInstance(VkInstance _instance,
236                         const VkAllocationCallbacks *pAllocator)
237{
238   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
239
240   if (!instance)
241      return;
242
243   pvr_physical_device_finish(&instance->physical_device);
244
245   VG(VALGRIND_DESTROY_MEMPOOL(instance));
246
247   vk_instance_finish(&instance->vk);
248   vk_free(&instance->vk.alloc, instance);
249}
250
251static VkResult
252pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice)
253{
254   struct mesa_sha1 sha1_ctx;
255   unsigned build_id_len;
256   uint8_t sha1[20];
257   uint64_t bvnc;
258
259   const struct build_id_note *note =
260      build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids);
261   if (!note) {
262      return vk_errorf(pdevice,
263                       VK_ERROR_INITIALIZATION_FAILED,
264                       "Failed to find build-id");
265   }
266
267   build_id_len = build_id_length(note);
268   if (build_id_len < 20) {
269      return vk_errorf(pdevice,
270                       VK_ERROR_INITIALIZATION_FAILED,
271                       "Build-id too short. It needs to be a SHA");
272   }
273
274   bvnc = pvr_get_packed_bvnc(&pdevice->dev_info);
275
276   _mesa_sha1_init(&sha1_ctx);
277   _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
278   _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
279   _mesa_sha1_final(&sha1_ctx, sha1);
280   memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
281
282   return VK_SUCCESS;
283}
284
285static uint64_t pvr_compute_heap_size(void)
286{
287   /* Query the total ram from the system */
288   uint64_t total_ram;
289   if (!os_get_total_physical_memory(&total_ram))
290      return 0;
291
292   /* We don't want to burn too much ram with the GPU. If the user has 4GiB
293    * or less, we use at most half. If they have more than 4GiB, we use 3/4.
294    */
295   uint64_t available_ram;
296   if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
297      available_ram = total_ram / 2U;
298   else
299      available_ram = total_ram * 3U / 4U;
300
301   return available_ram;
302}
303
304static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
305                                         struct pvr_instance *instance,
306                                         drmDevicePtr drm_render_device,
307                                         drmDevicePtr drm_primary_device)
308{
309   const char *path = drm_render_device->nodes[DRM_NODE_RENDER];
310   struct vk_device_extension_table supported_extensions;
311   struct vk_physical_device_dispatch_table dispatch_table;
312   const char *primary_path;
313   VkResult result;
314   int ret;
315
316   if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
317      return vk_errorf(instance,
318                       VK_ERROR_INCOMPATIBLE_DRIVER,
319                       "WARNING: powervr is not a conformant Vulkan "
320                       "implementation. Pass "
321                       "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
322                       "what you're doing.");
323   }
324
325   pvr_physical_device_get_supported_extensions(pdevice, &supported_extensions);
326
327   vk_physical_device_dispatch_table_from_entrypoints(
328      &dispatch_table,
329      &pvr_physical_device_entrypoints,
330      true);
331
332   vk_physical_device_dispatch_table_from_entrypoints(
333      &dispatch_table,
334      &wsi_physical_device_entrypoints,
335      false);
336
337   result = vk_physical_device_init(&pdevice->vk,
338                                    &instance->vk,
339                                    &supported_extensions,
340                                    &dispatch_table);
341   if (result != VK_SUCCESS)
342      return result;
343
344   pdevice->instance = instance;
345
346   pdevice->render_fd = open(path, O_RDWR | O_CLOEXEC);
347   if (pdevice->render_fd < 0) {
348      result = vk_errorf(instance,
349                         VK_ERROR_INCOMPATIBLE_DRIVER,
350                         "Failed to open device %s",
351                         path);
352      goto err_vk_physical_device_finish;
353   }
354
355   pdevice->render_path = vk_strdup(&pdevice->vk.instance->alloc,
356                                    path,
357                                    VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
358   if (!pdevice->render_path) {
359      result = VK_ERROR_OUT_OF_HOST_MEMORY;
360      goto err_close_render_fd;
361   }
362
363   if (instance->vk.enabled_extensions.KHR_display) {
364      primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
365
366      pdevice->master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
367   } else {
368      pdevice->master_fd = -1;
369   }
370
371   if (pdevice->master_fd >= 0) {
372      pdevice->master_path = vk_strdup(&pdevice->vk.instance->alloc,
373                                       primary_path,
374                                       VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
375      if (!pdevice->master_path) {
376         result = VK_ERROR_OUT_OF_HOST_MEMORY;
377         goto err_close_master_fd;
378      }
379   } else {
380      pdevice->master_path = NULL;
381   }
382
383   pdevice->ws = pvr_winsys_create(pdevice->master_fd,
384                                   pdevice->render_fd,
385                                   &pdevice->vk.instance->alloc);
386   if (!pdevice->ws) {
387      result = VK_ERROR_INITIALIZATION_FAILED;
388      goto err_vk_free_master_path;
389   }
390
391   pdevice->vk.supported_sync_types = pdevice->ws->sync_types;
392
393   ret = pdevice->ws->ops->device_info_init(pdevice->ws,
394                                            &pdevice->dev_info,
395                                            &pdevice->dev_runtime_info);
396   if (ret) {
397      result = VK_ERROR_INITIALIZATION_FAILED;
398      goto err_pvr_winsys_destroy;
399   }
400
401   result = pvr_physical_device_init_uuids(pdevice);
402   if (result != VK_SUCCESS)
403      goto err_pvr_winsys_destroy;
404
405   if (asprintf(&pdevice->name,
406                "Imagination PowerVR %s %s",
407                pdevice->dev_info.ident.series_name,
408                pdevice->dev_info.ident.public_name) < 0) {
409      result = vk_errorf(instance,
410                         VK_ERROR_OUT_OF_HOST_MEMORY,
411                         "Unable to allocate memory to store device name");
412      goto err_pvr_winsys_destroy;
413   }
414
415   /* Setup available memory heaps and types */
416   pdevice->memory.memoryHeapCount = 1;
417   pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
418   pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
419
420   pdevice->memory.memoryTypeCount = 1;
421   pdevice->memory.memoryTypes[0].propertyFlags =
422      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
423      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
424      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
425   pdevice->memory.memoryTypes[0].heapIndex = 0;
426
427   result = pvr_wsi_init(pdevice);
428   if (result != VK_SUCCESS) {
429      vk_error(instance, result);
430      goto err_free_name;
431   }
432
433   pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
434   if (!pdevice->compiler) {
435      result = vk_errorf(instance,
436                         VK_ERROR_INITIALIZATION_FAILED,
437                         "Failed to initialize Rogue compiler");
438      goto err_wsi_finish;
439   }
440
441   return VK_SUCCESS;
442
443err_wsi_finish:
444   pvr_wsi_finish(pdevice);
445
446err_free_name:
447   free(pdevice->name);
448
449err_pvr_winsys_destroy:
450   pvr_winsys_destroy(pdevice->ws);
451
452err_vk_free_master_path:
453   vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
454
455err_close_master_fd:
456   if (pdevice->master_fd >= 0)
457      close(pdevice->master_fd);
458
459   vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
460
461err_close_render_fd:
462   close(pdevice->render_fd);
463
464err_vk_physical_device_finish:
465   vk_physical_device_finish(&pdevice->vk);
466
467   return result;
468}
469
470static bool pvr_drm_device_is_supported(drmDevicePtr drm_dev, int node_type)
471{
472   char **compat = drm_dev->deviceinfo.platform->compatible;
473
474   if (!(drm_dev->available_nodes & BITFIELD_BIT(node_type))) {
475      assert(node_type == DRM_NODE_RENDER || node_type == DRM_NODE_PRIMARY);
476      return false;
477   }
478
479   if (node_type == DRM_NODE_RENDER) {
480      while (*compat) {
481         for (size_t i = 0U; i < ARRAY_SIZE(pvr_render_devices); i++) {
482            const char *const name = pvr_render_devices[i].name;
483            const size_t len = pvr_render_devices[i].len;
484
485            if (strncmp(*compat, name, len) == 0)
486               return true;
487         }
488
489         compat++;
490      }
491
492      return false;
493   } else if (node_type == DRM_NODE_PRIMARY) {
494      while (*compat) {
495         for (size_t i = 0U; i < ARRAY_SIZE(pvr_display_devices); i++) {
496            const char *const name = pvr_display_devices[i].name;
497            const size_t len = pvr_display_devices[i].len;
498
499            if (strncmp(*compat, name, len) == 0)
500               return true;
501         }
502
503         compat++;
504      }
505
506      return false;
507   }
508
509   unreachable("Incorrect node_type.");
510}
511
512static VkResult pvr_enumerate_devices(struct pvr_instance *instance)
513{
514   /* FIXME: It should be possible to query the number of devices via
515    * drmGetDevices2 by passing in NULL for the 'devices' parameter. However,
516    * this was broken by libdrm commit
517    * 8cb12a2528d795c45bba5f03b3486b4040fb0f45, so, until this is fixed in
518    * upstream, hard-code the maximum number of devices.
519    */
520   drmDevicePtr drm_primary_device = NULL;
521   drmDevicePtr drm_render_device = NULL;
522   drmDevicePtr drm_devices[8];
523   int max_drm_devices;
524   VkResult result;
525
526   instance->physical_devices_count = 0;
527
528   max_drm_devices = drmGetDevices2(0, drm_devices, ARRAY_SIZE(drm_devices));
529   if (max_drm_devices < 1)
530      return VK_SUCCESS;
531
532   for (unsigned i = 0; i < (unsigned)max_drm_devices; i++) {
533      if (drm_devices[i]->bustype != DRM_BUS_PLATFORM)
534         continue;
535
536      if (pvr_drm_device_is_supported(drm_devices[i], DRM_NODE_RENDER)) {
537         drm_render_device = drm_devices[i];
538
539         mesa_logd("Found compatible render device '%s'.",
540                   drm_render_device->nodes[DRM_NODE_RENDER]);
541      } else if (pvr_drm_device_is_supported(drm_devices[i],
542                                             DRM_NODE_PRIMARY)) {
543         drm_primary_device = drm_devices[i];
544
545         mesa_logd("Found compatible primary device '%s'.",
546                   drm_primary_device->nodes[DRM_NODE_PRIMARY]);
547      }
548   }
549
550   if (drm_render_device && drm_primary_device) {
551      result = pvr_physical_device_init(&instance->physical_device,
552                                        instance,
553                                        drm_render_device,
554                                        drm_primary_device);
555      if (result == VK_SUCCESS)
556         instance->physical_devices_count = 1;
557      else if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
558         result = VK_SUCCESS;
559   } else {
560      result = VK_SUCCESS;
561   }
562
563   drmFreeDevices(drm_devices, max_drm_devices);
564
565   return result;
566}
567
568VkResult pvr_EnumeratePhysicalDevices(VkInstance _instance,
569                                      uint32_t *pPhysicalDeviceCount,
570                                      VkPhysicalDevice *pPhysicalDevices)
571{
572   VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice,
573                          out,
574                          pPhysicalDevices,
575                          pPhysicalDeviceCount);
576   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
577   VkResult result;
578
579   if (instance->physical_devices_count < 0) {
580      result = pvr_enumerate_devices(instance);
581      if (result != VK_SUCCESS)
582         return result;
583   }
584
585   if (instance->physical_devices_count == 0)
586      return VK_SUCCESS;
587
588   assert(instance->physical_devices_count == 1);
589   vk_outarray_append_typed (VkPhysicalDevice, &out, p) {
590      *p = pvr_physical_device_to_handle(&instance->physical_device);
591   }
592
593   return vk_outarray_status(&out);
594}
595
596void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
597                                    VkPhysicalDeviceFeatures2 *pFeatures)
598{
599   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
600
601   pFeatures->features = (VkPhysicalDeviceFeatures){
602      .robustBufferAccess =
603         PVR_HAS_FEATURE(&pdevice->dev_info, robust_buffer_access),
604      .fullDrawIndexUint32 = true,
605      .imageCubeArray = true,
606      .independentBlend = true,
607      .geometryShader = false,
608      .tessellationShader = false,
609      .sampleRateShading = true,
610      .dualSrcBlend = false,
611      .logicOp = true,
612      .multiDrawIndirect = true,
613      .drawIndirectFirstInstance = true,
614      .depthClamp = true,
615      .depthBiasClamp = true,
616      .fillModeNonSolid = false,
617      .depthBounds = false,
618      .wideLines = true,
619      .largePoints = true,
620      .alphaToOne = true,
621      .multiViewport = false,
622      .samplerAnisotropy = true,
623      .textureCompressionETC2 = true,
624      .textureCompressionASTC_LDR = PVR_HAS_FEATURE(&pdevice->dev_info, astc),
625      .textureCompressionBC = false,
626      .occlusionQueryPrecise = true,
627      .pipelineStatisticsQuery = false,
628      .vertexPipelineStoresAndAtomics = true,
629      .fragmentStoresAndAtomics = true,
630      .shaderTessellationAndGeometryPointSize = false,
631      .shaderImageGatherExtended = false,
632      .shaderStorageImageExtendedFormats = true,
633      .shaderStorageImageMultisample = false,
634      .shaderStorageImageReadWithoutFormat = true,
635      .shaderStorageImageWriteWithoutFormat = false,
636      .shaderUniformBufferArrayDynamicIndexing = true,
637      .shaderSampledImageArrayDynamicIndexing = true,
638      .shaderStorageBufferArrayDynamicIndexing = true,
639      .shaderStorageImageArrayDynamicIndexing = true,
640      .shaderClipDistance = true,
641      .shaderCullDistance = true,
642      .shaderFloat64 = false,
643      .shaderInt64 = true,
644      .shaderInt16 = true,
645      .shaderResourceResidency = false,
646      .shaderResourceMinLod = false,
647      .sparseBinding = false,
648      .sparseResidencyBuffer = false,
649      .sparseResidencyImage2D = false,
650      .sparseResidencyImage3D = false,
651      .sparseResidency2Samples = false,
652      .sparseResidency4Samples = false,
653      .sparseResidency8Samples = false,
654      .sparseResidency16Samples = false,
655      .sparseResidencyAliased = false,
656      .variableMultisampleRate = false,
657      .inheritedQueries = false,
658   };
659
660   vk_foreach_struct (ext, pFeatures->pNext) {
661      pvr_debug_ignored_stype(ext->sType);
662   }
663}
664
665/* TODO: See if this function can be improved once fully implemented. */
666uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
667   const struct pvr_physical_device *pdevice,
668   uint32_t fs_common_size,
669   uint32_t min_tiles_in_flight)
670{
671   const struct pvr_device_runtime_info *dev_runtime_info =
672      &pdevice->dev_runtime_info;
673   const struct pvr_device_info *dev_info = &pdevice->dev_info;
674   uint32_t max_tiles_in_flight;
675   uint32_t num_allocs;
676
677   if (PVR_HAS_FEATURE(dev_info, s8xe)) {
678      num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
679   } else {
680      uint32_t min_cluster_per_phantom = 0;
681
682      if (dev_runtime_info->num_phantoms > 1) {
683         pvr_finishme("Unimplemented path!!");
684      } else {
685         min_cluster_per_phantom =
686            PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
687      }
688
689      if (dev_runtime_info->num_phantoms > 1)
690         pvr_finishme("Unimplemented path!!");
691
692      if (dev_runtime_info->num_phantoms > 2)
693         pvr_finishme("Unimplemented path!!");
694
695      if (dev_runtime_info->num_phantoms > 3)
696         pvr_finishme("Unimplemented path!!");
697
698      if (min_cluster_per_phantom >= 4)
699         num_allocs = 1;
700      else if (min_cluster_per_phantom == 2)
701         num_allocs = 2;
702      else
703         num_allocs = 4;
704   }
705
706   max_tiles_in_flight =
707      PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
708
709   if (fs_common_size == UINT_MAX) {
710      const struct pvr_device_runtime_info *dev_runtime_info =
711         &pdevice->dev_runtime_info;
712      uint32_t max_common_size;
713
714      num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
715
716      if (!PVR_HAS_ERN(dev_info, 38748)) {
717         /* Hardware needs space for one extra shared allocation. */
718         num_allocs += 1;
719      }
720
721      max_common_size =
722         dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
723
724      /* Double resource requirements to deal with fragmentation. */
725      max_common_size /= num_allocs * 2;
726      max_common_size =
727         ROUND_DOWN_TO(max_common_size,
728                       PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
729
730      return max_common_size;
731   } else if (fs_common_size == 0) {
732      return max_tiles_in_flight;
733   }
734
735   pvr_finishme("Unimplemented path!!");
736
737   return 0;
738}
739
740struct pvr_descriptor_limits {
741   uint32_t max_per_stage_resources;
742   uint32_t max_per_stage_samplers;
743   uint32_t max_per_stage_uniform_buffers;
744   uint32_t max_per_stage_storage_buffers;
745   uint32_t max_per_stage_sampled_images;
746   uint32_t max_per_stage_storage_images;
747   uint32_t max_per_stage_input_attachments;
748};
749
750static const struct pvr_descriptor_limits *
751pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice)
752{
753   enum pvr_descriptor_cs_level {
754      /* clang-format off */
755      CS4096, /* 6XT and some XE cores with large CS. */
756      CS2560, /* Mid range Rogue XE cores. */
757      CS2048, /* Low end Rogue XE cores. */
758      CS1536, /* Ultra-low-end 9XEP. */
759      CS680,  /* lower limits for older devices. */
760      CS408,  /* 7XE. */
761      /* clang-format on */
762   };
763
764   static const struct pvr_descriptor_limits descriptor_limits[] = {
765      [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
766      [CS2560] = {  648U, 128U, 128U, 128U, 128U, 128U, 8U, },
767      [CS2048] = {  584U, 128U,  96U,  64U, 128U, 128U, 8U, },
768      [CS1536] = {  456U,  64U,  96U,  64U, 128U,  64U, 8U, },
769      [CS680]  = {  224U,  32U,  64U,  36U,  48U,   8U, 8U, },
770      [CS408]  = {  128U,  16U,  40U,  28U,  16U,   8U, 8U, },
771   };
772
773   const uint32_t common_size =
774      pvr_calc_fscommon_size_and_tiles_in_flight(pdevice, -1, 1);
775   enum pvr_descriptor_cs_level cs_level;
776
777   if (common_size >= 2048) {
778      cs_level = CS2048;
779   } else if (common_size >= 1526) {
780      cs_level = CS1536;
781   } else if (common_size >= 680) {
782      cs_level = CS680;
783   } else if (common_size >= 408) {
784      cs_level = CS408;
785   } else {
786      mesa_loge("This core appears to have a very limited amount of shared "
787                "register space and may not meet the Vulkan spec limits.");
788      abort();
789   }
790
791   return &descriptor_limits[cs_level];
792}
793
794void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
795                                      VkPhysicalDeviceProperties2 *pProperties)
796{
797   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
798   const struct pvr_descriptor_limits *descriptor_limits =
799      pvr_get_physical_device_descriptor_limits(pdevice);
800
801   /* Default value based on the minimum value found in all existing cores. */
802   const uint32_t max_multisample =
803      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, max_multisample, 4);
804
805   /* Default value based on the minimum value found in all existing cores. */
806   const uint32_t uvs_banks =
807      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_banks, 2);
808
809   /* Default value based on the minimum value found in all existing cores. */
810   const uint32_t uvs_pba_entries =
811      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_pba_entries, 160);
812
813   /* Default value based on the minimum value found in all existing cores. */
814   const uint32_t num_user_clip_planes =
815      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, num_user_clip_planes, 8);
816
817   const uint32_t sub_pixel_precision =
818      PVR_HAS_FEATURE(&pdevice->dev_info, simple_internal_parameter_format)
819         ? 4U
820         : 8U;
821
822   const uint32_t max_render_size =
823      rogue_get_render_size_max(&pdevice->dev_info);
824
825   const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
826
827   const uint32_t max_user_vertex_components =
828      ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
829
830   /* The workgroup invocations are limited by the case where we have a compute
831    * barrier - each slot has a fixed number of invocations, the whole workgroup
832    * may need to span multiple slots. As each slot will WAIT at the barrier
833    * until the last invocation completes, all have to be schedulable at the
834    * same time.
835    *
836    * Typically all Rogue cores have 16 slots. Some of the smallest cores are
837    * reduced to 14.
838    *
839    * The compute barrier slot exhaustion scenario can be tested with:
840    * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
841    *    .atomicwrite*guard*comp
842    */
843
844   /* Default value based on the minimum value found in all existing cores. */
845   const uint32_t usc_slots =
846      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, usc_slots, 14);
847
848   /* Default value based on the minimum value found in all existing cores. */
849   const uint32_t max_instances_per_pds_task =
850      PVR_GET_FEATURE_VALUE(&pdevice->dev_info,
851                            max_instances_per_pds_task,
852                            32U);
853
854   const uint32_t max_compute_work_group_invocations =
855      (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
856
857   VkPhysicalDeviceLimits limits = {
858      .maxImageDimension1D = max_render_size,
859      .maxImageDimension2D = max_render_size,
860      .maxImageDimension3D = 2U * 1024U,
861      .maxImageDimensionCube = max_render_size,
862      .maxImageArrayLayers = 2U * 1024U,
863      .maxTexelBufferElements = 64U * 1024U,
864      .maxUniformBufferRange = 128U * 1024U * 1024U,
865      .maxStorageBufferRange = 128U * 1024U * 1024U,
866      .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
867      .maxMemoryAllocationCount = UINT32_MAX,
868      .maxSamplerAllocationCount = UINT32_MAX,
869      .bufferImageGranularity = 1U,
870      .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
871
872      /* Maximum number of descriptor sets that can be bound at the same time.
873       */
874      .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
875
876      .maxPerStageResources = descriptor_limits->max_per_stage_resources,
877      .maxPerStageDescriptorSamplers =
878         descriptor_limits->max_per_stage_samplers,
879      .maxPerStageDescriptorUniformBuffers =
880         descriptor_limits->max_per_stage_uniform_buffers,
881      .maxPerStageDescriptorStorageBuffers =
882         descriptor_limits->max_per_stage_storage_buffers,
883      .maxPerStageDescriptorSampledImages =
884         descriptor_limits->max_per_stage_sampled_images,
885      .maxPerStageDescriptorStorageImages =
886         descriptor_limits->max_per_stage_storage_images,
887      .maxPerStageDescriptorInputAttachments =
888         descriptor_limits->max_per_stage_input_attachments,
889
890      .maxDescriptorSetSamplers = 256U,
891      .maxDescriptorSetUniformBuffers = 256U,
892      .maxDescriptorSetUniformBuffersDynamic = 8U,
893      .maxDescriptorSetStorageBuffers = 256U,
894      .maxDescriptorSetStorageBuffersDynamic = 8U,
895      .maxDescriptorSetSampledImages = 256U,
896      .maxDescriptorSetStorageImages = 256U,
897      .maxDescriptorSetInputAttachments = 256U,
898
899      /* Vertex Shader Limits */
900      .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
901      .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
902      .maxVertexInputAttributeOffset = 0xFFFF,
903      .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
904      .maxVertexOutputComponents = max_user_vertex_components,
905
906      /* Tessellation Limits */
907      .maxTessellationGenerationLevel = 0,
908      .maxTessellationPatchSize = 0,
909      .maxTessellationControlPerVertexInputComponents = 0,
910      .maxTessellationControlPerVertexOutputComponents = 0,
911      .maxTessellationControlPerPatchOutputComponents = 0,
912      .maxTessellationControlTotalOutputComponents = 0,
913      .maxTessellationEvaluationInputComponents = 0,
914      .maxTessellationEvaluationOutputComponents = 0,
915
916      /* Geometry Shader Limits */
917      .maxGeometryShaderInvocations = 0,
918      .maxGeometryInputComponents = 0,
919      .maxGeometryOutputComponents = 0,
920      .maxGeometryOutputVertices = 0,
921      .maxGeometryTotalOutputComponents = 0,
922
923      /* Fragment Shader Limits */
924      .maxFragmentInputComponents = max_user_vertex_components,
925      .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
926      .maxFragmentDualSrcAttachments = 0,
927      .maxFragmentCombinedOutputResources =
928         descriptor_limits->max_per_stage_storage_buffers +
929         descriptor_limits->max_per_stage_storage_images +
930         PVR_MAX_COLOR_ATTACHMENTS,
931
932      /* Compute Shader Limits */
933      .maxComputeSharedMemorySize = 16U * 1024U,
934      .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
935      .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
936      .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
937                                   max_compute_work_group_invocations,
938                                   64U },
939
940      /* Rasterization Limits */
941      .subPixelPrecisionBits = sub_pixel_precision,
942      .subTexelPrecisionBits = 8U,
943      .mipmapPrecisionBits = 8U,
944
945      .maxDrawIndexedIndexValue = UINT32_MAX,
946      .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
947      .maxSamplerLodBias = 16.0f,
948      .maxSamplerAnisotropy = 1.0f,
949      .maxViewports = PVR_MAX_VIEWPORTS,
950
951      .maxViewportDimensions[0] = max_render_size,
952      .maxViewportDimensions[1] = max_render_size,
953      .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
954      .viewportBoundsRange[1] = 2U * max_render_size,
955
956      .viewportSubPixelBits = 0,
957      .minMemoryMapAlignment = 64U,
958      .minTexelBufferOffsetAlignment = 16U,
959      .minUniformBufferOffsetAlignment = 4U,
960      .minStorageBufferOffsetAlignment = 4U,
961
962      .minTexelOffset = -8,
963      .maxTexelOffset = 7U,
964      .minTexelGatherOffset = -8,
965      .maxTexelGatherOffset = 7,
966      .minInterpolationOffset = -0.5,
967      .maxInterpolationOffset = 0.5,
968      .subPixelInterpolationOffsetBits = 4U,
969
970      .maxFramebufferWidth = max_render_size,
971      .maxFramebufferHeight = max_render_size,
972      .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
973
974      .framebufferColorSampleCounts = max_sample_bits,
975      .framebufferDepthSampleCounts = max_sample_bits,
976      .framebufferStencilSampleCounts = max_sample_bits,
977      .framebufferNoAttachmentsSampleCounts = max_sample_bits,
978      .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
979      .sampledImageColorSampleCounts = max_sample_bits,
980      .sampledImageIntegerSampleCounts = max_sample_bits,
981      .sampledImageDepthSampleCounts = max_sample_bits,
982      .sampledImageStencilSampleCounts = max_sample_bits,
983      .storageImageSampleCounts = max_sample_bits,
984      .maxSampleMaskWords = 1U,
985      .timestampComputeAndGraphics = false,
986      .timestampPeriod = 0.0f,
987      .maxClipDistances = num_user_clip_planes,
988      .maxCullDistances = num_user_clip_planes,
989      .maxCombinedClipAndCullDistances = num_user_clip_planes,
990      .discreteQueuePriorities = 2U,
991      .pointSizeRange[0] = 1.0f,
992      .pointSizeRange[1] = 511.0f,
993      .pointSizeGranularity = 0.0625f,
994      .lineWidthRange[0] = 1.0f / 16.0f,
995      .lineWidthRange[1] = 16.0f,
996      .lineWidthGranularity = 1.0f / 16.0f,
997      .strictLines = false,
998      .standardSampleLocations = true,
999      .optimalBufferCopyOffsetAlignment = 4U,
1000      .optimalBufferCopyRowPitchAlignment = 4U,
1001      .nonCoherentAtomSize = 1U,
1002   };
1003
1004   pProperties->properties = (VkPhysicalDeviceProperties){
1005      .apiVersion = PVR_API_VERSION,
1006      .driverVersion = vk_get_driver_version(),
1007      .vendorID = VK_VENDOR_ID_IMAGINATION,
1008      .deviceID = pdevice->dev_info.ident.device_id,
1009      .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1010      .limits = limits,
1011      .sparseProperties = { 0 },
1012   };
1013
1014   snprintf(pProperties->properties.deviceName,
1015            sizeof(pProperties->properties.deviceName),
1016            "%s",
1017            pdevice->name);
1018
1019   memcpy(pProperties->properties.pipelineCacheUUID,
1020          pdevice->pipeline_cache_uuid,
1021          VK_UUID_SIZE);
1022
1023   vk_foreach_struct (ext, pProperties->pNext) {
1024      pvr_debug_ignored_stype(ext->sType);
1025   }
1026}
1027
1028const static VkQueueFamilyProperties pvr_queue_family_properties = {
1029   .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1030                 VK_QUEUE_TRANSFER_BIT,
1031   .queueCount = PVR_MAX_QUEUES,
1032   .timestampValidBits = 0,
1033   .minImageTransferGranularity = { 1, 1, 1 },
1034};
1035
1036void pvr_GetPhysicalDeviceQueueFamilyProperties(
1037   VkPhysicalDevice physicalDevice,
1038   uint32_t *pCount,
1039   VkQueueFamilyProperties *pQueueFamilyProperties)
1040{
1041   VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties,
1042                          out,
1043                          pQueueFamilyProperties,
1044                          pCount);
1045
1046   vk_outarray_append_typed (VkQueueFamilyProperties, &out, p) {
1047      *p = pvr_queue_family_properties;
1048   }
1049}
1050
1051void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1052   VkPhysicalDevice physicalDevice,
1053   uint32_t *pQueueFamilyPropertyCount,
1054   VkQueueFamilyProperties2 *pQueueFamilyProperties)
1055{
1056   VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1057                          out,
1058                          pQueueFamilyProperties,
1059                          pQueueFamilyPropertyCount);
1060
1061   vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1062      p->queueFamilyProperties = pvr_queue_family_properties;
1063
1064      vk_foreach_struct (ext, p->pNext) {
1065         pvr_debug_ignored_stype(ext->sType);
1066      }
1067   }
1068}
1069
1070void pvr_GetPhysicalDeviceMemoryProperties2(
1071   VkPhysicalDevice physicalDevice,
1072   VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1073{
1074   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1075
1076   pMemoryProperties->memoryProperties = pdevice->memory;
1077
1078   vk_foreach_struct (ext, pMemoryProperties->pNext) {
1079      pvr_debug_ignored_stype(ext->sType);
1080   }
1081}
1082
1083PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1084                                           const char *pName)
1085{
1086   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1087   return vk_instance_get_proc_addr(&instance->vk,
1088                                    &pvr_instance_entrypoints,
1089                                    pName);
1090}
1091
1092/* With version 1+ of the loader interface the ICD should expose
1093 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1094 * apps.
1095 */
1096PUBLIC
1097VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
1098vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1099{
1100   return pvr_GetInstanceProcAddr(instance, pName);
1101}
1102
1103/* With version 4+ of the loader interface the ICD should expose
1104 * vk_icdGetPhysicalDeviceProcAddr().
1105 */
1106PUBLIC
1107VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
1108vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
1109{
1110   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1111   return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
1112}
1113
1114static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1115{
1116   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1117   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1118   struct pvr_pds_compute_shader_program program = { 0U };
1119   size_t staging_buffer_size;
1120   uint32_t *staging_buffer;
1121   uint32_t *data_buffer;
1122   uint32_t *code_buffer;
1123   VkResult result;
1124
1125   STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
1126                 ARRAY_SIZE(program.work_group_input_regs));
1127   STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
1128                 ARRAY_SIZE(program.global_input_regs));
1129
1130   /* Initialize PDS structure. */
1131   for (uint32_t i = 0U; i < ARRAY_SIZE(program.local_input_regs); i++) {
1132      program.local_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1133      program.work_group_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1134      program.global_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1135   }
1136
1137   program.barrier_coefficient = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1138
1139   /* Fence kernel. */
1140   program.fence = true;
1141   program.clear_pds_barrier = true;
1142
1143   /* Calculate how much space we'll need for the compute shader PDS program.
1144    */
1145   pvr_pds_set_sizes_compute_shader(&program, dev_info);
1146
1147   /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1148    * data size being in dwords.
1149    */
1150   /* Code size is in bytes, data size in dwords. */
1151   staging_buffer_size =
1152      program.data_size * sizeof(uint32_t) + program.code_size;
1153
1154   staging_buffer = vk_alloc(&device->vk.alloc,
1155                             staging_buffer_size,
1156                             8U,
1157                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1158   if (!staging_buffer)
1159      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1160
1161   data_buffer = staging_buffer;
1162   code_buffer = pvr_pds_generate_compute_shader_data_segment(&program,
1163                                                              data_buffer,
1164                                                              dev_info);
1165   pvr_pds_generate_compute_shader_code_segment(&program,
1166                                                code_buffer,
1167                                                dev_info);
1168   result = pvr_gpu_upload_pds(device,
1169                               data_buffer,
1170                               program.data_size,
1171                               PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
1172                               code_buffer,
1173                               program.code_size / sizeof(uint32_t),
1174                               PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
1175                               cache_line_size,
1176                               &device->pds_compute_fence_program);
1177
1178   vk_free(&device->vk.alloc, staging_buffer);
1179
1180   return result;
1181}
1182
1183static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1184   struct pvr_device *device,
1185   pvr_dev_addr_t usc_addr,
1186   uint32_t shareds,
1187   uint32_t temps,
1188   pvr_dev_addr_t shareds_buffer_addr,
1189   struct pvr_pds_upload *const upload_out,
1190   struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1191{
1192   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1193   struct pvr_pds_vertex_shader_sa_program program = {
1194      .kick_usc = true,
1195      .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1196   };
1197   size_t staging_buffer_size;
1198   uint32_t *staging_buffer;
1199   VkResult result;
1200
1201   /* We'll need to DMA the shareds into the USC's Common Store. */
1202   program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1203                                                    program.dma_address,
1204                                                    0,
1205                                                    shareds,
1206                                                    shareds_buffer_addr.addr,
1207                                                    dev_info);
1208
1209   /* DMA temp regs. */
1210   pvr_pds_setup_doutu(&program.usc_task_control,
1211                       usc_addr.addr,
1212                       temps,
1213                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1214                       false);
1215
1216   pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1217
1218   staging_buffer_size =
1219      (program.code_size + program.data_size) * sizeof(*staging_buffer);
1220
1221   staging_buffer = vk_alloc(&device->vk.alloc,
1222                             staging_buffer_size,
1223                             8,
1224                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1225   if (!staging_buffer)
1226      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1227
1228   /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1229   pvr_pds_vertex_shader_sa(&program,
1230                            staging_buffer,
1231                            PDS_GENERATE_DATA_SEGMENT,
1232                            dev_info);
1233   pvr_pds_vertex_shader_sa(&program,
1234                            &staging_buffer[program.data_size],
1235                            PDS_GENERATE_CODE_SEGMENT,
1236                            dev_info);
1237
1238   /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1239    * is bigger so we handle it first (if needed) and realloc() for a smaller
1240    * size.
1241    */
1242   if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1243      /* FIXME: Figure out the define for alignment of 16. */
1244      result = pvr_gpu_upload_pds(device,
1245                                  &staging_buffer[0],
1246                                  program.data_size,
1247                                  16,
1248                                  &staging_buffer[program.data_size],
1249                                  program.code_size,
1250                                  16,
1251                                  16,
1252                                  sw_compute_barrier_upload_out);
1253      if (result != VK_SUCCESS) {
1254         vk_free(&device->vk.alloc, staging_buffer);
1255         return result;
1256      }
1257
1258      program.clear_pds_barrier = false;
1259
1260      pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1261
1262      staging_buffer_size =
1263         (program.code_size + program.data_size) * sizeof(*staging_buffer);
1264
1265      staging_buffer = vk_realloc(&device->vk.alloc,
1266                                  staging_buffer,
1267                                  staging_buffer_size,
1268                                  8,
1269                                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1270      if (!staging_buffer) {
1271         pvr_bo_free(device, sw_compute_barrier_upload_out->pvr_bo);
1272
1273         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1274      }
1275
1276      /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1277      pvr_pds_vertex_shader_sa(&program,
1278                               staging_buffer,
1279                               PDS_GENERATE_DATA_SEGMENT,
1280                               dev_info);
1281      pvr_pds_vertex_shader_sa(&program,
1282                               &staging_buffer[program.data_size],
1283                               PDS_GENERATE_CODE_SEGMENT,
1284                               dev_info);
1285   } else {
1286      *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1287         .pvr_bo = NULL,
1288      };
1289   }
1290
1291   /* FIXME: Figure out the define for alignment of 16. */
1292   result = pvr_gpu_upload_pds(device,
1293                               &staging_buffer[0],
1294                               program.data_size,
1295                               16,
1296                               &staging_buffer[program.data_size],
1297                               program.code_size,
1298                               16,
1299                               16,
1300                               upload_out);
1301   if (result != VK_SUCCESS) {
1302      vk_free(&device->vk.alloc, staging_buffer);
1303      pvr_bo_free(device, sw_compute_barrier_upload_out->pvr_bo);
1304
1305      return result;
1306   }
1307
1308   vk_free(&device->vk.alloc, staging_buffer);
1309
1310   return VK_SUCCESS;
1311}
1312
1313static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1314{
1315   uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1316   uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1317   const struct rogue_shader_binary *usc_program;
1318   struct pvr_texture_state_info tex_info;
1319   uint32_t *dword_ptr;
1320   uint32_t usc_shareds;
1321   uint32_t usc_temps;
1322   VkResult result;
1323
1324   pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1325                                    &usc_program,
1326                                    &usc_shareds,
1327                                    &usc_temps);
1328
1329   device->idfwdf_state.usc_shareds = usc_shareds;
1330
1331   /* FIXME: Figure out the define for alignment of 16. */
1332   result = pvr_gpu_upload_usc(device,
1333                               usc_program->data,
1334                               usc_program->size,
1335                               16,
1336                               &device->idfwdf_state.usc);
1337   if (result != VK_SUCCESS)
1338      return result;
1339
1340   /* TODO: Get the store buffer size from the compiler? */
1341   /* TODO: How was the size derived here? */
1342   result = pvr_bo_alloc(device,
1343                         device->heaps.general_heap,
1344                         4 * sizeof(float) * 4 * 2,
1345                         4,
1346                         0,
1347                         &device->idfwdf_state.store_bo);
1348   if (result != VK_SUCCESS)
1349      goto err_free_usc_program;
1350
1351   result = pvr_bo_alloc(device,
1352                         device->heaps.general_heap,
1353                         usc_shareds * ROGUE_REG_SIZE_BYTES,
1354                         ROGUE_REG_SIZE_BYTES,
1355                         PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1356                         &device->idfwdf_state.shareds_bo);
1357   if (result != VK_SUCCESS)
1358      goto err_free_store_buffer;
1359
1360   /* Pack state words. */
1361
1362   pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1363      sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1364      sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1365      sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1366      sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1367   }
1368
1369   /* clang-format off */
1370   pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1371   /* clang-format on */
1372
1373   STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1374
1375   tex_info = (struct pvr_texture_state_info){
1376      .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1377      .mem_layout = PVR_MEMLAYOUT_LINEAR,
1378      .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1379      /* TODO: Is this correct? Is it 2D, 3D, or 2D_ARRAY? */
1380      .type = VK_IMAGE_VIEW_TYPE_2D,
1381      .extent = { .width = 4, .height = 2, .depth = 0 },
1382      .mip_levels = 1,
1383      .sample_count = 1,
1384      .stride = 4,
1385      .swizzle = { PIPE_SWIZZLE_X,
1386                   PIPE_SWIZZLE_Y,
1387                   PIPE_SWIZZLE_Z,
1388                   PIPE_SWIZZLE_W },
1389      .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1390   };
1391
1392   result = pvr_pack_tex_state(device, &tex_info, image_state);
1393   if (result != VK_SUCCESS)
1394      goto err_free_shareds_buffer;
1395
1396   /* Fill the shareds buffer. */
1397
1398   dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1399
1400#define HIGH_32(val) ((uint32_t)((val) >> 32U))
1401#define LOW_32(val) ((uint32_t)(val))
1402
1403   /* TODO: Should we use compiler info to setup the shareds data instead of
1404    * assuming there's always 12 and this is how they should be setup?
1405    */
1406
1407   dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1408   dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1409
1410   /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1411   dword_ptr[2] = 0U;
1412   dword_ptr[3] = 0U;
1413
1414   dword_ptr[4] = LOW_32(image_state[0]);
1415   dword_ptr[5] = HIGH_32(image_state[0]);
1416   dword_ptr[6] = LOW_32(image_state[1]);
1417   dword_ptr[7] = HIGH_32(image_state[1]);
1418
1419   dword_ptr[8] = LOW_32(sampler_state[0]);
1420   dword_ptr[9] = HIGH_32(sampler_state[0]);
1421   dword_ptr[10] = LOW_32(sampler_state[1]);
1422   dword_ptr[11] = HIGH_32(sampler_state[1]);
1423   assert(11 + 1 == usc_shareds);
1424
1425#undef HIGH_32
1426#undef LOW_32
1427
1428   pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1429   dword_ptr = NULL;
1430
1431   /* Generate and upload PDS programs. */
1432   result = pvr_pds_idfwdf_programs_create_and_upload(
1433      device,
1434      device->idfwdf_state.usc->vma->dev_addr,
1435      usc_shareds,
1436      usc_temps,
1437      device->idfwdf_state.shareds_bo->vma->dev_addr,
1438      &device->idfwdf_state.pds,
1439      &device->idfwdf_state.sw_compute_barrier_pds);
1440   if (result != VK_SUCCESS)
1441      goto err_free_shareds_buffer;
1442
1443   return VK_SUCCESS;
1444
1445err_free_shareds_buffer:
1446   pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1447
1448err_free_store_buffer:
1449   pvr_bo_free(device, device->idfwdf_state.store_bo);
1450
1451err_free_usc_program:
1452   pvr_bo_free(device, device->idfwdf_state.usc);
1453
1454   return result;
1455}
1456
1457static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1458{
1459   pvr_bo_free(device, device->idfwdf_state.pds.pvr_bo);
1460   pvr_bo_free(device, device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1461   pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1462   pvr_bo_free(device, device->idfwdf_state.store_bo);
1463   pvr_bo_free(device, device->idfwdf_state.usc);
1464}
1465
1466/* FIXME: We should be calculating the size when we upload the code in
1467 * pvr_srv_setup_static_pixel_event_program().
1468 */
1469static void pvr_device_get_pixel_event_pds_program_data_size(
1470   const struct pvr_device_info *dev_info,
1471   uint32_t *const data_size_in_dwords_out)
1472{
1473   struct pvr_pds_event_program program = {
1474      /* No data to DMA, just a DOUTU needed. */
1475      .num_emit_word_pairs = 0,
1476   };
1477
1478   pvr_pds_set_sizes_pixel_event(&program, dev_info);
1479
1480   *data_size_in_dwords_out = program.data_size;
1481}
1482
1483static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1484{
1485   const uint32_t cache_line_size =
1486      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1487   struct pvr_pds_kickusc_program program = { 0 };
1488   uint32_t staging_buffer_size;
1489   uint32_t *staging_buffer;
1490   VkResult result;
1491
1492   result = pvr_gpu_upload_usc(device,
1493                               pvr_nop_usc_code,
1494                               sizeof(pvr_nop_usc_code),
1495                               cache_line_size,
1496                               &device->nop_program.usc);
1497   if (result != VK_SUCCESS)
1498      return result;
1499
1500   /* Setup a PDS program that kicks the static USC program. */
1501   pvr_pds_setup_doutu(&program.usc_task_control,
1502                       device->nop_program.usc->vma->dev_addr.addr,
1503                       0U,
1504                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1505                       false);
1506
1507   pvr_pds_set_sizes_pixel_shader(&program);
1508
1509   staging_buffer_size =
1510      (program.code_size + program.data_size) * sizeof(*staging_buffer);
1511
1512   staging_buffer = vk_alloc(&device->vk.alloc,
1513                             staging_buffer_size,
1514                             8U,
1515                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1516   if (!staging_buffer) {
1517      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1518      goto err_free_nop_usc_bo;
1519   }
1520
1521   pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1522
1523   /* FIXME: Figure out the define for alignment of 16. */
1524   result = pvr_gpu_upload_pds(device,
1525                               staging_buffer,
1526                               program.data_size,
1527                               16U,
1528                               &staging_buffer[program.data_size],
1529                               program.code_size,
1530                               16U,
1531                               16U,
1532                               &device->nop_program.pds);
1533   if (result != VK_SUCCESS)
1534      goto err_free_staging_buffer;
1535
1536   vk_free(&device->vk.alloc, staging_buffer);
1537
1538   return VK_SUCCESS;
1539
1540err_free_staging_buffer:
1541   vk_free(&device->vk.alloc, staging_buffer);
1542
1543err_free_nop_usc_bo:
1544   pvr_bo_free(device, device->nop_program.usc);
1545
1546   return result;
1547}
1548
1549static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1550{
1551   pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1552      sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1553      sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1554      sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1555      sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1556      sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1557      sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1558      sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1559      sampler.non_normalized_coords = true;
1560   }
1561}
1562
1563VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1564                          const VkDeviceCreateInfo *pCreateInfo,
1565                          const VkAllocationCallbacks *pAllocator,
1566                          VkDevice *pDevice)
1567{
1568   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1569   struct pvr_instance *instance = pdevice->instance;
1570   struct vk_device_dispatch_table dispatch_table;
1571   struct pvr_device *device;
1572   VkResult result;
1573
1574   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1575
1576   device = vk_alloc2(&pdevice->vk.instance->alloc,
1577                      pAllocator,
1578                      sizeof(*device),
1579                      8,
1580                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1581   if (!device)
1582      return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1583
1584   vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1585                                             &pvr_device_entrypoints,
1586                                             true);
1587
1588   vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1589                                             &wsi_device_entrypoints,
1590                                             false);
1591
1592   result = vk_device_init(&device->vk,
1593                           &pdevice->vk,
1594                           &dispatch_table,
1595                           pCreateInfo,
1596                           pAllocator);
1597   if (result != VK_SUCCESS)
1598      goto err_free_device;
1599
1600   device->render_fd = open(pdevice->render_path, O_RDWR | O_CLOEXEC);
1601   if (device->render_fd < 0) {
1602      result = vk_errorf(instance,
1603                         VK_ERROR_INITIALIZATION_FAILED,
1604                         "Failed to open device %s",
1605                         pdevice->render_path);
1606      goto err_vk_device_finish;
1607   }
1608
1609   if (pdevice->master_path)
1610      device->master_fd = open(pdevice->master_path, O_RDWR | O_CLOEXEC);
1611   else
1612      device->master_fd = -1;
1613
1614   vk_device_set_drm_fd(&device->vk, device->render_fd);
1615
1616   device->instance = instance;
1617   device->pdevice = pdevice;
1618
1619   device->ws = pvr_winsys_create(device->master_fd,
1620                                  device->render_fd,
1621                                  &device->vk.alloc);
1622   if (!device->ws) {
1623      result = VK_ERROR_INITIALIZATION_FAILED;
1624      goto err_close_master_fd;
1625   }
1626
1627   device->ws->ops->get_heaps_info(device->ws, &device->heaps);
1628
1629   result = pvr_free_list_create(device,
1630                                 PVR_GLOBAL_FREE_LIST_INITIAL_SIZE,
1631                                 PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1632                                 PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1633                                 PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1634                                 NULL /* parent_free_list */,
1635                                 &device->global_free_list);
1636   if (result != VK_SUCCESS)
1637      goto err_pvr_winsys_destroy;
1638
1639   result = pvr_device_init_nop_program(device);
1640   if (result != VK_SUCCESS)
1641      goto err_pvr_free_list_destroy;
1642
1643   result = pvr_device_init_compute_fence_program(device);
1644   if (result != VK_SUCCESS)
1645      goto err_pvr_free_nop_program;
1646
1647   result = pvr_device_init_compute_idfwdf_state(device);
1648   if (result != VK_SUCCESS)
1649      goto err_pvr_free_compute_fence;
1650
1651   result = pvr_queues_create(device, pCreateInfo);
1652   if (result != VK_SUCCESS)
1653      goto err_pvr_finish_compute_idfwdf;
1654
1655   pvr_device_init_default_sampler_state(device);
1656
1657   if (pCreateInfo->pEnabledFeatures)
1658      memcpy(&device->features,
1659             pCreateInfo->pEnabledFeatures,
1660             sizeof(device->features));
1661
1662   /* FIXME: Move this to a later stage and possibly somewhere other than
1663    * pvr_device. The purpose of this is so that we don't have to get the size
1664    * on each kick.
1665    */
1666   pvr_device_get_pixel_event_pds_program_data_size(
1667      &pdevice->dev_info,
1668      &device->pixel_event_data_size_in_dwords);
1669
1670   device->global_queue_job_count = 0;
1671   device->global_queue_present_count = 0;
1672
1673   *pDevice = pvr_device_to_handle(device);
1674
1675   return VK_SUCCESS;
1676
1677err_pvr_finish_compute_idfwdf:
1678   pvr_device_finish_compute_idfwdf_state(device);
1679
1680err_pvr_free_compute_fence:
1681   pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo);
1682
1683err_pvr_free_nop_program:
1684   pvr_bo_free(device, device->nop_program.pds.pvr_bo);
1685   pvr_bo_free(device, device->nop_program.usc);
1686
1687err_pvr_free_list_destroy:
1688   pvr_free_list_destroy(device->global_free_list);
1689
1690err_pvr_winsys_destroy:
1691   pvr_winsys_destroy(device->ws);
1692
1693err_close_master_fd:
1694   if (device->master_fd >= 0)
1695      close(device->master_fd);
1696
1697   close(device->render_fd);
1698
1699err_vk_device_finish:
1700   vk_device_finish(&device->vk);
1701
1702err_free_device:
1703   vk_free(&device->vk.alloc, device);
1704
1705   return result;
1706}
1707
1708void pvr_DestroyDevice(VkDevice _device,
1709                       const VkAllocationCallbacks *pAllocator)
1710{
1711   PVR_FROM_HANDLE(pvr_device, device, _device);
1712
1713   pvr_queues_destroy(device);
1714   pvr_device_finish_compute_idfwdf_state(device);
1715   pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo);
1716   pvr_bo_free(device, device->nop_program.pds.pvr_bo);
1717   pvr_bo_free(device, device->nop_program.usc);
1718   pvr_free_list_destroy(device->global_free_list);
1719   pvr_winsys_destroy(device->ws);
1720   close(device->render_fd);
1721   vk_device_finish(&device->vk);
1722   vk_free(&device->vk.alloc, device);
1723}
1724
1725VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
1726                                              VkLayerProperties *pProperties)
1727{
1728   if (!pProperties) {
1729      *pPropertyCount = 0;
1730      return VK_SUCCESS;
1731   }
1732
1733   return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1734}
1735
1736VkResult pvr_AllocateMemory(VkDevice _device,
1737                            const VkMemoryAllocateInfo *pAllocateInfo,
1738                            const VkAllocationCallbacks *pAllocator,
1739                            VkDeviceMemory *pMem)
1740{
1741   const VkImportMemoryFdInfoKHR *fd_info = NULL;
1742   PVR_FROM_HANDLE(pvr_device, device, _device);
1743   enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
1744   struct pvr_device_memory *mem;
1745   VkResult result;
1746
1747   assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1748   assert(pAllocateInfo->allocationSize > 0);
1749
1750   mem = vk_object_alloc(&device->vk,
1751                         pAllocator,
1752                         sizeof(*mem),
1753                         VK_OBJECT_TYPE_DEVICE_MEMORY);
1754   if (!mem)
1755      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1756
1757   vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
1758      switch ((unsigned)ext->sType) {
1759      case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
1760         type = PVR_WINSYS_BO_TYPE_DISPLAY;
1761         break;
1762      case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
1763         fd_info = (void *)ext;
1764         break;
1765      default:
1766         pvr_debug_ignored_stype(ext->sType);
1767         break;
1768      }
1769   }
1770
1771   if (fd_info && fd_info->handleType) {
1772      VkDeviceSize aligned_alloc_size =
1773         ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
1774
1775      assert(
1776         fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1777         fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1778
1779      result = device->ws->ops->buffer_create_from_fd(device->ws,
1780                                                      fd_info->fd,
1781                                                      &mem->bo);
1782      if (result != VK_SUCCESS)
1783         goto err_vk_object_free_mem;
1784
1785      /* For security purposes, we reject importing the bo if it's smaller
1786       * than the requested allocation size. This prevents a malicious client
1787       * from passing a buffer to a trusted client, lying about the size, and
1788       * telling the trusted client to try and texture from an image that goes
1789       * out-of-bounds. This sort of thing could lead to GPU hangs or worse
1790       * in the trusted client. The trusted client can protect itself against
1791       * this sort of attack but only if it can trust the buffer size.
1792       */
1793      if (aligned_alloc_size > mem->bo->size) {
1794         result = vk_errorf(device,
1795                            VK_ERROR_INVALID_EXTERNAL_HANDLE,
1796                            "Aligned requested size too large for the given fd "
1797                            "%" PRIu64 "B > %" PRIu64 "B",
1798                            pAllocateInfo->allocationSize,
1799                            mem->bo->size);
1800         device->ws->ops->buffer_destroy(mem->bo);
1801         goto err_vk_object_free_mem;
1802      }
1803
1804      /* From the Vulkan spec:
1805       *
1806       *    "Importing memory from a file descriptor transfers ownership of
1807       *    the file descriptor from the application to the Vulkan
1808       *    implementation. The application must not perform any operations on
1809       *    the file descriptor after a successful import."
1810       *
1811       * If the import fails, we leave the file descriptor open.
1812       */
1813      close(fd_info->fd);
1814   } else {
1815      /* Align physical allocations to the page size of the heap that will be
1816       * used when binding device memory (see pvr_bind_memory()) to ensure the
1817       * entire allocation can be mapped.
1818       */
1819      const uint64_t alignment = device->heaps.general_heap->page_size;
1820
1821      /* FIXME: Need to determine the flags based on
1822       * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
1823       *
1824       * The alternative would be to store the flags alongside the memory
1825       * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
1826       * that they can be looked up.
1827       */
1828      result = device->ws->ops->buffer_create(device->ws,
1829                                              pAllocateInfo->allocationSize,
1830                                              alignment,
1831                                              type,
1832                                              PVR_WINSYS_BO_FLAG_CPU_ACCESS,
1833                                              &mem->bo);
1834      if (result != VK_SUCCESS)
1835         goto err_vk_object_free_mem;
1836   }
1837
1838   *pMem = pvr_device_memory_to_handle(mem);
1839
1840   return VK_SUCCESS;
1841
1842err_vk_object_free_mem:
1843   vk_object_free(&device->vk, pAllocator, mem);
1844
1845   return result;
1846}
1847
1848VkResult pvr_GetMemoryFdKHR(VkDevice _device,
1849                            const VkMemoryGetFdInfoKHR *pGetFdInfo,
1850                            int *pFd)
1851{
1852   PVR_FROM_HANDLE(pvr_device, device, _device);
1853   PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
1854
1855   assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1856
1857   assert(
1858      pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1859      pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1860
1861   return device->ws->ops->buffer_get_fd(mem->bo, pFd);
1862}
1863
1864VkResult
1865pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
1866                             VkExternalMemoryHandleTypeFlagBits handleType,
1867                             int fd,
1868                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
1869{
1870   PVR_FROM_HANDLE(pvr_device, device, _device);
1871
1872   switch (handleType) {
1873   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
1874      /* FIXME: This should only allow memory types having
1875       * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
1876       * dma-buf should be imported using cacheable memory types,
1877       * given exporter's mmap will always map it as cacheable.
1878       * Ref:
1879       * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
1880       */
1881      pMemoryFdProperties->memoryTypeBits =
1882         (1 << device->pdevice->memory.memoryTypeCount) - 1;
1883      return VK_SUCCESS;
1884   default:
1885      return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1886   }
1887}
1888
1889void pvr_FreeMemory(VkDevice _device,
1890                    VkDeviceMemory _mem,
1891                    const VkAllocationCallbacks *pAllocator)
1892{
1893   PVR_FROM_HANDLE(pvr_device, device, _device);
1894   PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
1895
1896   if (!mem)
1897      return;
1898
1899   device->ws->ops->buffer_destroy(mem->bo);
1900
1901   vk_object_free(&device->vk, pAllocator, mem);
1902}
1903
1904VkResult pvr_MapMemory(VkDevice _device,
1905                       VkDeviceMemory _memory,
1906                       VkDeviceSize offset,
1907                       VkDeviceSize size,
1908                       VkMemoryMapFlags flags,
1909                       void **ppData)
1910{
1911   PVR_FROM_HANDLE(pvr_device, device, _device);
1912   PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
1913   void *map;
1914
1915   if (!mem) {
1916      *ppData = NULL;
1917      return VK_SUCCESS;
1918   }
1919
1920   if (size == VK_WHOLE_SIZE)
1921      size = mem->bo->size - offset;
1922
1923   /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1924    *
1925    *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
1926    *    assert(size != 0);
1927    *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
1928    *    equal to the size of the memory minus offset
1929    */
1930
1931   assert(size > 0);
1932   assert(offset + size <= mem->bo->size);
1933
1934   /* Check if already mapped */
1935   if (mem->bo->map) {
1936      *ppData = mem->bo->map + offset;
1937      return VK_SUCCESS;
1938   }
1939
1940   /* Map it all at once */
1941   map = device->ws->ops->buffer_map(mem->bo);
1942   if (!map)
1943      return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
1944
1945   *ppData = map + offset;
1946
1947   return VK_SUCCESS;
1948}
1949
1950void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
1951{
1952   PVR_FROM_HANDLE(pvr_device, device, _device);
1953   PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
1954
1955   if (!mem || !mem->bo->map)
1956      return;
1957
1958   device->ws->ops->buffer_unmap(mem->bo);
1959}
1960
1961VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
1962                                     uint32_t memoryRangeCount,
1963                                     const VkMappedMemoryRange *pMemoryRanges)
1964{
1965   return VK_SUCCESS;
1966}
1967
1968VkResult
1969pvr_InvalidateMappedMemoryRanges(VkDevice _device,
1970                                 uint32_t memoryRangeCount,
1971                                 const VkMappedMemoryRange *pMemoryRanges)
1972{
1973   return VK_SUCCESS;
1974}
1975
1976void pvr_GetImageSparseMemoryRequirements2(
1977   VkDevice device,
1978   const VkImageSparseMemoryRequirementsInfo2 *pInfo,
1979   uint32_t *pSparseMemoryRequirementCount,
1980   VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
1981{
1982   *pSparseMemoryRequirementCount = 0;
1983}
1984
1985void pvr_GetDeviceMemoryCommitment(VkDevice device,
1986                                   VkDeviceMemory memory,
1987                                   VkDeviceSize *pCommittedMemoryInBytes)
1988{
1989   *pCommittedMemoryInBytes = 0;
1990}
1991
1992VkResult pvr_bind_memory(struct pvr_device *device,
1993                         struct pvr_device_memory *mem,
1994                         VkDeviceSize offset,
1995                         VkDeviceSize size,
1996                         VkDeviceSize alignment,
1997                         struct pvr_winsys_vma **const vma_out,
1998                         pvr_dev_addr_t *const dev_addr_out)
1999{
2000   VkDeviceSize virt_size =
2001      size + (offset & (device->heaps.general_heap->page_size - 1));
2002   struct pvr_winsys_vma *vma;
2003   pvr_dev_addr_t dev_addr;
2004
2005   /* Valid usage:
2006    *
2007    *   "memoryOffset must be an integer multiple of the alignment member of
2008    *    the VkMemoryRequirements structure returned from a call to
2009    *    vkGetBufferMemoryRequirements with buffer"
2010    *
2011    *   "memoryOffset must be an integer multiple of the alignment member of
2012    *    the VkMemoryRequirements structure returned from a call to
2013    *    vkGetImageMemoryRequirements with image"
2014    */
2015   assert(offset % alignment == 0);
2016   assert(offset < mem->bo->size);
2017
2018   vma = device->ws->ops->heap_alloc(device->heaps.general_heap,
2019                                     virt_size,
2020                                     alignment);
2021   if (!vma)
2022      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2023
2024   dev_addr = device->ws->ops->vma_map(vma, mem->bo, offset, size);
2025   if (!dev_addr.addr) {
2026      device->ws->ops->heap_free(vma);
2027      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2028   }
2029
2030   *dev_addr_out = dev_addr;
2031   *vma_out = vma;
2032
2033   return VK_SUCCESS;
2034}
2035
2036void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2037{
2038   device->ws->ops->vma_unmap(vma);
2039   device->ws->ops->heap_free(vma);
2040}
2041
2042VkResult pvr_BindBufferMemory2(VkDevice _device,
2043                               uint32_t bindInfoCount,
2044                               const VkBindBufferMemoryInfo *pBindInfos)
2045{
2046   PVR_FROM_HANDLE(pvr_device, device, _device);
2047   uint32_t i;
2048
2049   for (i = 0; i < bindInfoCount; i++) {
2050      PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2051      PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2052
2053      VkResult result = pvr_bind_memory(device,
2054                                        mem,
2055                                        pBindInfos[i].memoryOffset,
2056                                        buffer->vk.size,
2057                                        buffer->alignment,
2058                                        &buffer->vma,
2059                                        &buffer->dev_addr);
2060      if (result != VK_SUCCESS) {
2061         while (i--) {
2062            PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2063            pvr_unbind_memory(device, buffer->vma);
2064         }
2065
2066         return result;
2067      }
2068   }
2069
2070   return VK_SUCCESS;
2071}
2072
2073VkResult pvr_QueueBindSparse(VkQueue _queue,
2074                             uint32_t bindInfoCount,
2075                             const VkBindSparseInfo *pBindInfo,
2076                             VkFence fence)
2077{
2078   return VK_SUCCESS;
2079}
2080
2081/* Event functions. */
2082
2083VkResult pvr_CreateEvent(VkDevice _device,
2084                         const VkEventCreateInfo *pCreateInfo,
2085                         const VkAllocationCallbacks *pAllocator,
2086                         VkEvent *pEvent)
2087{
2088   assert(!"Unimplemented");
2089   return VK_SUCCESS;
2090}
2091
2092void pvr_DestroyEvent(VkDevice _device,
2093                      VkEvent _event,
2094                      const VkAllocationCallbacks *pAllocator)
2095{
2096   assert(!"Unimplemented");
2097}
2098
2099VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2100{
2101   assert(!"Unimplemented");
2102   return VK_SUCCESS;
2103}
2104
2105VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2106{
2107   assert(!"Unimplemented");
2108   return VK_SUCCESS;
2109}
2110
2111VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2112{
2113   assert(!"Unimplemented");
2114   return VK_SUCCESS;
2115}
2116
2117/* Buffer functions. */
2118
2119VkResult pvr_CreateBuffer(VkDevice _device,
2120                          const VkBufferCreateInfo *pCreateInfo,
2121                          const VkAllocationCallbacks *pAllocator,
2122                          VkBuffer *pBuffer)
2123{
2124   PVR_FROM_HANDLE(pvr_device, device, _device);
2125   const uint32_t alignment = 4096;
2126   struct pvr_buffer *buffer;
2127
2128   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2129   assert(pCreateInfo->usage != 0);
2130
2131   /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2132   if (pCreateInfo->size >= ULONG_MAX - alignment)
2133      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2134
2135   buffer =
2136      vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2137   if (!buffer)
2138      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2139
2140   buffer->alignment = alignment;
2141
2142   *pBuffer = pvr_buffer_to_handle(buffer);
2143
2144   return VK_SUCCESS;
2145}
2146
2147void pvr_DestroyBuffer(VkDevice _device,
2148                       VkBuffer _buffer,
2149                       const VkAllocationCallbacks *pAllocator)
2150{
2151   PVR_FROM_HANDLE(pvr_device, device, _device);
2152   PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2153
2154   if (!buffer)
2155      return;
2156
2157   if (buffer->vma)
2158      pvr_unbind_memory(device, buffer->vma);
2159
2160   vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2161}
2162
2163VkResult pvr_gpu_upload(struct pvr_device *device,
2164                        struct pvr_winsys_heap *heap,
2165                        const void *data,
2166                        size_t size,
2167                        uint64_t alignment,
2168                        struct pvr_bo **const pvr_bo_out)
2169{
2170   struct pvr_bo *pvr_bo = NULL;
2171   VkResult result;
2172
2173   assert(size > 0);
2174
2175   result = pvr_bo_alloc(device,
2176                         heap,
2177                         size,
2178                         alignment,
2179                         PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2180                         &pvr_bo);
2181   if (result != VK_SUCCESS)
2182      return result;
2183
2184   memcpy(pvr_bo->bo->map, data, size);
2185   pvr_bo_cpu_unmap(device, pvr_bo);
2186
2187   *pvr_bo_out = pvr_bo;
2188
2189   return VK_SUCCESS;
2190}
2191
2192VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2193                            const void *code,
2194                            size_t code_size,
2195                            uint64_t code_alignment,
2196                            struct pvr_bo **const pvr_bo_out)
2197{
2198   struct pvr_bo *pvr_bo = NULL;
2199   VkResult result;
2200
2201   assert(code_size > 0);
2202
2203   /* The USC will prefetch the next instruction, so over allocate by 1
2204    * instruction to prevent reading off the end of a page into a potentially
2205    * unallocated page.
2206    */
2207   result = pvr_bo_alloc(device,
2208                         device->heaps.usc_heap,
2209                         code_size + ROGUE_MAX_INSTR_BYTES,
2210                         code_alignment,
2211                         PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2212                         &pvr_bo);
2213   if (result != VK_SUCCESS)
2214      return result;
2215
2216   memcpy(pvr_bo->bo->map, code, code_size);
2217   pvr_bo_cpu_unmap(device, pvr_bo);
2218
2219   *pvr_bo_out = pvr_bo;
2220
2221   return VK_SUCCESS;
2222}
2223
2224/**
2225 * \brief Upload PDS program data and code segments from host memory to device
2226 * memory.
2227 *
2228 * \param[in] device            Logical device pointer.
2229 * \param[in] data              Pointer to PDS data segment to upload.
2230 * \param[in] data_size_dwords  Size of PDS data segment in dwords.
2231 * \param[in] data_alignment    Required alignment of the PDS data segment in
2232 *                              bytes. Must be a power of two.
2233 * \param[in] code              Pointer to PDS code segment to upload.
2234 * \param[in] code_size_dwords  Size of PDS code segment in dwords.
2235 * \param[in] code_alignment    Required alignment of the PDS code segment in
2236 *                              bytes. Must be a power of two.
2237 * \param[in] min_alignment     Minimum alignment of the bo holding the PDS
2238 *                              program in bytes.
2239 * \param[out] pds_upload_out   On success will be initialized based on the
2240 *                              uploaded PDS program.
2241 * \return VK_SUCCESS on success, or error code otherwise.
2242 */
2243VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2244                            const uint32_t *data,
2245                            uint32_t data_size_dwords,
2246                            uint32_t data_alignment,
2247                            const uint32_t *code,
2248                            uint32_t code_size_dwords,
2249                            uint32_t code_alignment,
2250                            uint64_t min_alignment,
2251                            struct pvr_pds_upload *const pds_upload_out)
2252{
2253   /* All alignment and sizes below are in bytes. */
2254   const size_t data_size = data_size_dwords * sizeof(*data);
2255   const size_t code_size = code_size_dwords * sizeof(*code);
2256   const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2257   const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2258   const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2259   const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2260   const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2261                                     : data_aligned_size;
2262   const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_CPU_MAPPED |
2263                             PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
2264   VkResult result;
2265
2266   assert(code || data);
2267   assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2268   assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2269
2270   result = pvr_bo_alloc(device,
2271                         device->heaps.pds_heap,
2272                         bo_size,
2273                         bo_alignment,
2274                         bo_flags,
2275                         &pds_upload_out->pvr_bo);
2276   if (result != VK_SUCCESS)
2277      return result;
2278
2279   if (data) {
2280      memcpy(pds_upload_out->pvr_bo->bo->map, data, data_size);
2281
2282      pds_upload_out->data_offset = pds_upload_out->pvr_bo->vma->dev_addr.addr -
2283                                    device->heaps.pds_heap->base_addr.addr;
2284
2285      /* Store data size in dwords. */
2286      assert(data_aligned_size % 4 == 0);
2287      pds_upload_out->data_size = data_aligned_size / 4;
2288   } else {
2289      pds_upload_out->data_offset = 0;
2290      pds_upload_out->data_size = 0;
2291   }
2292
2293   if (code) {
2294      memcpy((uint8_t *)pds_upload_out->pvr_bo->bo->map + code_offset,
2295             code,
2296             code_size);
2297
2298      pds_upload_out->code_offset =
2299         (pds_upload_out->pvr_bo->vma->dev_addr.addr + code_offset) -
2300         device->heaps.pds_heap->base_addr.addr;
2301
2302      /* Store code size in dwords. */
2303      assert(code_aligned_size % 4 == 0);
2304      pds_upload_out->code_size = code_aligned_size / 4;
2305   } else {
2306      pds_upload_out->code_offset = 0;
2307      pds_upload_out->code_size = 0;
2308   }
2309
2310   pvr_bo_cpu_unmap(device, pds_upload_out->pvr_bo);
2311
2312   return VK_SUCCESS;
2313}
2314
2315static VkResult
2316pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2317                                 struct pvr_framebuffer *framebuffer)
2318{
2319   const uint32_t cache_line_size =
2320      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2321   uint32_t ppp_state[3];
2322   VkResult result;
2323
2324   pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2325      header.pres_terminate = true;
2326   }
2327
2328   pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2329      term0.clip_right =
2330         DIV_ROUND_UP(
2331            framebuffer->width,
2332            PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
2333         1;
2334      term0.clip_bottom =
2335         DIV_ROUND_UP(
2336            framebuffer->height,
2337            PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
2338         1;
2339   }
2340
2341   pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2342      term1.render_target = 0;
2343      term1.clip_left = 0;
2344   }
2345
2346   result = pvr_gpu_upload(device,
2347                           device->heaps.general_heap,
2348                           ppp_state,
2349                           sizeof(ppp_state),
2350                           cache_line_size,
2351                           &framebuffer->ppp_state_bo);
2352   if (result != VK_SUCCESS)
2353      return result;
2354
2355   /* Calculate the size of PPP state in dwords. */
2356   framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2357
2358   return VK_SUCCESS;
2359}
2360
2361static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2362                                    uint32_t render_targets_count)
2363{
2364   uint32_t i;
2365
2366   for (i = 0; i < render_targets_count; i++) {
2367      if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2368         goto err_mutex_destroy;
2369   }
2370
2371   return true;
2372
2373err_mutex_destroy:
2374   while (i--)
2375      pthread_mutex_destroy(&render_targets[i].mutex);
2376
2377   return false;
2378}
2379
2380static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2381                                    uint32_t render_targets_count)
2382{
2383   for (uint32_t i = 0; i < render_targets_count; i++) {
2384      if (render_targets[i].valid) {
2385         pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2386         render_targets[i].valid = false;
2387      }
2388
2389      pthread_mutex_destroy(&render_targets[i].mutex);
2390   }
2391}
2392
2393VkResult pvr_CreateFramebuffer(VkDevice _device,
2394                               const VkFramebufferCreateInfo *pCreateInfo,
2395                               const VkAllocationCallbacks *pAllocator,
2396                               VkFramebuffer *pFramebuffer)
2397{
2398   PVR_FROM_HANDLE(pvr_device, device, _device);
2399   struct pvr_render_target *render_targets;
2400   struct pvr_framebuffer *framebuffer;
2401   struct pvr_image_view **attachments;
2402   uint32_t render_targets_count;
2403   VkResult result;
2404
2405   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2406
2407   render_targets_count =
2408      PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2409
2410   VK_MULTIALLOC(ma);
2411   vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2412   vk_multialloc_add(&ma,
2413                     &attachments,
2414                     __typeof__(*attachments),
2415                     pCreateInfo->attachmentCount);
2416   vk_multialloc_add(&ma,
2417                     &render_targets,
2418                     __typeof__(*render_targets),
2419                     render_targets_count);
2420
2421   if (!vk_multialloc_zalloc2(&ma,
2422                              &device->vk.alloc,
2423                              pAllocator,
2424                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2425      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2426
2427   vk_object_base_init(&device->vk,
2428                       &framebuffer->base,
2429                       VK_OBJECT_TYPE_FRAMEBUFFER);
2430
2431   framebuffer->width = pCreateInfo->width;
2432   framebuffer->height = pCreateInfo->height;
2433   framebuffer->layers = pCreateInfo->layers;
2434
2435   framebuffer->attachments = attachments;
2436   framebuffer->attachment_count = pCreateInfo->attachmentCount;
2437   for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2438      framebuffer->attachments[i] =
2439         pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2440   }
2441
2442   result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2443   if (result != VK_SUCCESS)
2444      goto err_free_framebuffer;
2445
2446   framebuffer->render_targets = render_targets;
2447   framebuffer->render_targets_count = render_targets_count;
2448   if (!pvr_render_targets_init(framebuffer->render_targets,
2449                                render_targets_count)) {
2450      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2451      goto err_free_ppp_state_bo;
2452   }
2453
2454   *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
2455
2456   return VK_SUCCESS;
2457
2458err_free_ppp_state_bo:
2459   pvr_bo_free(device, framebuffer->ppp_state_bo);
2460
2461err_free_framebuffer:
2462   vk_object_base_finish(&framebuffer->base);
2463   vk_free2(&device->vk.alloc, pAllocator, framebuffer);
2464
2465   return result;
2466}
2467
2468void pvr_DestroyFramebuffer(VkDevice _device,
2469                            VkFramebuffer _fb,
2470                            const VkAllocationCallbacks *pAllocator)
2471{
2472   PVR_FROM_HANDLE(pvr_device, device, _device);
2473   PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
2474
2475   if (!framebuffer)
2476      return;
2477
2478   pvr_render_targets_fini(framebuffer->render_targets,
2479                           framebuffer->render_targets_count);
2480   pvr_bo_free(device, framebuffer->ppp_state_bo);
2481   vk_object_base_finish(&framebuffer->base);
2482   vk_free2(&device->vk.alloc, pAllocator, framebuffer);
2483}
2484
2485PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2486vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2487{
2488   /* For the full details on loader interface versioning, see
2489    * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2490    * What follows is a condensed summary, to help you navigate the large and
2491    * confusing official doc.
2492    *
2493    *   - Loader interface v0 is incompatible with later versions. We don't
2494    *     support it.
2495    *
2496    *   - In loader interface v1:
2497    *       - The first ICD entrypoint called by the loader is
2498    *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2499    *         entrypoint.
2500    *       - The ICD must statically expose no other Vulkan symbol unless it
2501    *         is linked with -Bsymbolic.
2502    *       - Each dispatchable Vulkan handle created by the ICD must be
2503    *         a pointer to a struct whose first member is VK_LOADER_DATA. The
2504    *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2505    *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2506    *         vkDestroySurfaceKHR(). The ICD must be capable of working with
2507    *         such loader-managed surfaces.
2508    *
2509    *    - Loader interface v2 differs from v1 in:
2510    *       - The first ICD entrypoint called by the loader is
2511    *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2512    *         statically expose this entrypoint.
2513    *
2514    *    - Loader interface v3 differs from v2 in:
2515    *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2516    *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2517    *          because the loader no longer does so.
2518    *
2519    *    - Loader interface v4 differs from v3 in:
2520    *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
2521    */
2522   *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
2523   return VK_SUCCESS;
2524}
2525
2526static uint32_t
2527pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
2528                                  VkFilter filter)
2529{
2530   switch (filter) {
2531   case VK_FILTER_NEAREST:
2532      return PVRX(TEXSTATE_FILTER_POINT);
2533   case VK_FILTER_LINEAR:
2534      return PVRX(TEXSTATE_FILTER_LINEAR);
2535   default:
2536      unreachable("Unknown filter type.");
2537   }
2538}
2539
2540static uint32_t
2541pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
2542{
2543   switch (addr_mode) {
2544   case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2545      return PVRX(TEXSTATE_ADDRMODE_REPEAT);
2546   case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2547      return PVRX(TEXSTATE_ADDRMODE_FLIP);
2548   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2549      return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
2550   case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2551      return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP);
2552   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2553      return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER);
2554   default:
2555      unreachable("Invalid sampler address mode.");
2556   }
2557}
2558
2559VkResult pvr_CreateSampler(VkDevice _device,
2560                           const VkSamplerCreateInfo *pCreateInfo,
2561                           const VkAllocationCallbacks *pAllocator,
2562                           VkSampler *pSampler)
2563{
2564   PVR_FROM_HANDLE(pvr_device, device, _device);
2565   struct pvr_sampler *sampler;
2566   float lod_rounding_bias;
2567   VkFilter min_filter;
2568   VkFilter mag_filter;
2569   float min_lod;
2570   float max_lod;
2571
2572   STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
2573                 sizeof(((union pvr_sampler_descriptor *)NULL)->words));
2574
2575   sampler = vk_object_alloc(&device->vk,
2576                             pAllocator,
2577                             sizeof(*sampler),
2578                             VK_OBJECT_TYPE_SAMPLER);
2579   if (!sampler)
2580      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2581
2582   mag_filter = pCreateInfo->magFilter;
2583   min_filter = pCreateInfo->minFilter;
2584
2585   if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
2586      /* The min/mag filters may need adjustment here, the GPU should decide
2587       * which of the two filters to use based on the clamped LOD value: LOD
2588       * <= 0 implies magnification, while LOD > 0 implies minification.
2589       *
2590       * As a workaround, we override magFilter with minFilter if we know that
2591       * the magnification filter will never be used due to clamping anyway
2592       * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
2593       * if maxLod <= 0.
2594       */
2595      if (pCreateInfo->minLod > 0.0f) {
2596         /* The clamped LOD will always be positive => always minify. */
2597         mag_filter = pCreateInfo->minFilter;
2598      }
2599
2600      if (pCreateInfo->maxLod <= 0.0f) {
2601         /* The clamped LOD will always be negative or zero => always
2602          * magnify.
2603          */
2604         min_filter = pCreateInfo->magFilter;
2605      }
2606   }
2607
2608   if (pCreateInfo->compareEnable) {
2609      sampler->descriptor.data.compare_op =
2610         (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
2611   } else {
2612      sampler->descriptor.data.compare_op =
2613         (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
2614   }
2615
2616   sampler->descriptor.data.word3 = 0;
2617   pvr_csb_pack (&sampler->descriptor.data.sampler_word,
2618                 TEXSTATE_SAMPLER,
2619                 word) {
2620      const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
2621      const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) /
2622                                  (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2623      const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) -
2624                                         PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
2625                                (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2626      const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) -
2627                                         PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
2628                                (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2629
2630      word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
2631      word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
2632
2633      if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
2634         word.mipfilter = true;
2635
2636      word.addrmode_u =
2637         pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
2638      word.addrmode_v =
2639         pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
2640      word.addrmode_w =
2641         pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
2642
2643      /* TODO: Figure out defines for these. */
2644      if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP))
2645         sampler->descriptor.data.word3 |= 0x40000000;
2646
2647      if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP))
2648         sampler->descriptor.data.word3 |= 0x20000000;
2649
2650      /* The Vulkan 1.0.205 spec says:
2651       *
2652       *    The absolute value of mipLodBias must be less than or equal to
2653       *    VkPhysicalDeviceLimits::maxSamplerLodBias.
2654       */
2655      word.dadjust =
2656         PVRX(TEXSTATE_DADJUST_ZERO_UINT) +
2657         util_signed_fixed(
2658            CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
2659            PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2660
2661      /* Anisotropy is not supported for now. */
2662      word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
2663
2664      if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
2665          pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
2666         /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
2667          * selected by adding 0.5 and then truncating the input LOD value.
2668          * This hardware adds the 0.5 bias before clamping against
2669          * lodmin/lodmax, while Vulkan specifies the bias to be added after
2670          * clamping. We compensate for this difference by adding the 0.5
2671          * bias to the LOD bounds, too.
2672          */
2673         lod_rounding_bias = 0.5f;
2674      } else {
2675         lod_rounding_bias = 0.0f;
2676      }
2677
2678      min_lod = pCreateInfo->minLod + lod_rounding_bias;
2679      word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
2680                                        PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2681
2682      max_lod = pCreateInfo->maxLod + lod_rounding_bias;
2683      word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
2684                                        PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2685
2686      word.bordercolor_index = pCreateInfo->borderColor;
2687
2688      if (pCreateInfo->unnormalizedCoordinates)
2689         word.non_normalized_coords = true;
2690   }
2691
2692   *pSampler = pvr_sampler_to_handle(sampler);
2693
2694   return VK_SUCCESS;
2695}
2696
2697void pvr_DestroySampler(VkDevice _device,
2698                        VkSampler _sampler,
2699                        const VkAllocationCallbacks *pAllocator)
2700{
2701   PVR_FROM_HANDLE(pvr_device, device, _device);
2702   PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
2703
2704   if (!sampler)
2705      return;
2706
2707   vk_object_free(&device->vk, pAllocator, sampler);
2708}
2709
2710void pvr_GetBufferMemoryRequirements2(
2711   VkDevice _device,
2712   const VkBufferMemoryRequirementsInfo2 *pInfo,
2713   VkMemoryRequirements2 *pMemoryRequirements)
2714{
2715   PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
2716   PVR_FROM_HANDLE(pvr_device, device, _device);
2717
2718   /* The Vulkan 1.0.166 spec says:
2719    *
2720    *    memoryTypeBits is a bitmask and contains one bit set for every
2721    *    supported memory type for the resource. Bit 'i' is set if and only
2722    *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
2723    *    structure for the physical device is supported for the resource.
2724    *
2725    * All types are currently supported for buffers.
2726    */
2727   pMemoryRequirements->memoryRequirements.memoryTypeBits =
2728      (1ul << device->pdevice->memory.memoryTypeCount) - 1;
2729
2730   pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
2731   pMemoryRequirements->memoryRequirements.size =
2732      ALIGN_POT(buffer->vk.size, buffer->alignment);
2733}
2734
2735void pvr_GetImageMemoryRequirements2(VkDevice _device,
2736                                     const VkImageMemoryRequirementsInfo2 *pInfo,
2737                                     VkMemoryRequirements2 *pMemoryRequirements)
2738{
2739   PVR_FROM_HANDLE(pvr_device, device, _device);
2740   PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
2741
2742   /* The Vulkan 1.0.166 spec says:
2743    *
2744    *    memoryTypeBits is a bitmask and contains one bit set for every
2745    *    supported memory type for the resource. Bit 'i' is set if and only
2746    *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
2747    *    structure for the physical device is supported for the resource.
2748    *
2749    * All types are currently supported for images.
2750    */
2751   const uint32_t memory_types =
2752      (1ul << device->pdevice->memory.memoryTypeCount) - 1;
2753
2754   /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
2755    * in GetImageMemoryRequirements()), but this should be known at image
2756    * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
2757    * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
2758    * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
2759    * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
2760    *
2761    * Note: Presumably the 4096 alignment requirement comes from the Vulkan
2762    * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
2763    * render and compute jobs.
2764    */
2765   pMemoryRequirements->memoryRequirements.alignment = image->alignment;
2766   pMemoryRequirements->memoryRequirements.size =
2767      ALIGN(image->size, image->alignment);
2768   pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
2769}
2770