virtio/vulkan/vn_renderer_virtgpu.c

/*
 * Copyright 2020 Google LLC
 * SPDX-License-Identifier: MIT
 */

#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <xf86drm.h>

#ifdef MAJOR_IN_MKDEV
#include <sys/mkdev.h>
#endif
#ifdef MAJOR_IN_SYSMACROS
#include <sys/sysmacros.h>
#endif

#include "drm-uapi/virtgpu_drm.h"
#include "util/sparse_array.h"
#define VIRGL_RENDERER_UNSTABLE_APIS
#include "virtio-gpu/virglrenderer_hw.h"

#include "vn_renderer_internal.h"

/* XXX WIP kernel uapi */
#ifndef VIRTGPU_PARAM_CONTEXT_INIT
#define VIRTGPU_PARAM_CONTEXT_INIT 6
#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001
struct drm_virtgpu_context_set_param {
   __u64 param;
   __u64 value;
};
struct drm_virtgpu_context_init {
   __u32 num_params;
   __u32 pad;
   __u64 ctx_set_params;
};
#define DRM_VIRTGPU_CONTEXT_INIT 0xb
#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT                                       \
   DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT,                     \
            struct drm_virtgpu_context_init)
#endif /* VIRTGPU_PARAM_CONTEXT_INIT */
#ifndef VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT
#define VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT 100
#endif /* VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT */

#ifndef VIRTGPU_PARAM_GUEST_VRAM
/* All guest allocations happen via virtgpu dedicated heap. */
#define VIRTGPU_PARAM_GUEST_VRAM 9
#endif

#ifndef VIRTGPU_BLOB_MEM_GUEST_VRAM
#define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
#endif

/* XXX comment these out to really use kernel uapi */
#define SIMULATE_BO_SIZE_FIX 1
//#define SIMULATE_CONTEXT_INIT 1
#define SIMULATE_SYNCOBJ 1
#define SIMULATE_SUBMIT 1

#define VIRTGPU_PCI_VENDOR_ID 0x1af4
#define VIRTGPU_PCI_DEVICE_ID 0x1050

struct virtgpu;

struct virtgpu_shmem {
   struct vn_renderer_shmem base;
   uint32_t gem_handle;
};

struct virtgpu_bo {
   struct vn_renderer_bo base;
   uint32_t gem_handle;
   uint32_t blob_flags;
};

struct virtgpu_sync {
   struct vn_renderer_sync base;

   /*
    * drm_syncobj is in one of these states
    *
    *  - value N:      drm_syncobj has a signaled fence chain with seqno N
    *  - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M
    *                  (which may point to another unsignaled fence chain with
    *                   seqno between N and M, and so on)
    *
    * TODO Do we want to use binary drm_syncobjs?  They would be
    *
    *  - value 0: drm_syncobj has no fence
    *  - value 1: drm_syncobj has a signaled fence with seqno 0
    *
    * They are cheaper but require special care.
    */
   uint32_t syncobj_handle;
};

struct virtgpu {
   struct vn_renderer base;

   struct vn_instance *instance;

   int fd;

   bool has_primary;
   int primary_major;
   int primary_minor;
   int render_major;
   int render_minor;

   int bustype;
   drmPciBusInfo pci_bus_info;

   uint32_t max_sync_queue_count;

   struct {
      enum virgl_renderer_capset id;
      uint32_t version;
      struct virgl_renderer_capset_venus data;
   } capset;

   uint32_t shmem_blob_mem;
   uint32_t bo_blob_mem;

   /* note that we use gem_handle instead of res_id to index because
    * res_id is monotonically increasing by default (see
    * virtio_gpu_resource_id_get)
    */
   struct util_sparse_array shmem_array;
   struct util_sparse_array bo_array;

   mtx_t dma_buf_import_mutex;

   struct vn_renderer_shmem_cache shmem_cache;
};

#ifdef SIMULATE_SYNCOBJ

#include "util/hash_table.h"
#include "util/u_idalloc.h"

static struct {
   mtx_t mutex;
   struct hash_table *syncobjs;
   struct util_idalloc ida;

   int signaled_fd;
} sim;

struct sim_syncobj {
   mtx_t mutex;
   uint64_t point;

   int pending_fd;
   uint64_t pending_point;
   bool pending_cpu;
};

static uint32_t
sim_syncobj_create(struct virtgpu *gpu, bool signaled)
{
   struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj));
   if (!syncobj)
      return 0;

   mtx_init(&syncobj->mutex, mtx_plain);
   syncobj->pending_fd = -1;

   mtx_lock(&sim.mutex);

   /* initialize lazily */
   if (!sim.syncobjs) {
      sim.syncobjs = _mesa_pointer_hash_table_create(NULL);
      if (!sim.syncobjs) {
         mtx_unlock(&sim.mutex);
         return 0;
      }

      util_idalloc_init(&sim.ida, 32);

      struct drm_virtgpu_execbuffer args = {
         .flags = VIRTGPU_EXECBUF_FENCE_FD_OUT,
      };
      int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
      if (ret || args.fence_fd < 0) {
         _mesa_hash_table_destroy(sim.syncobjs, NULL);
         sim.syncobjs = NULL;
         mtx_unlock(&sim.mutex);
         return 0;
      }

      sim.signaled_fd = args.fence_fd;
   }

   const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1;
   _mesa_hash_table_insert(sim.syncobjs,
                           (const void *)(uintptr_t)syncobj_handle, syncobj);

   mtx_unlock(&sim.mutex);

   return syncobj_handle;
}

static void
sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
{
   struct sim_syncobj *syncobj = NULL;

   mtx_lock(&sim.mutex);

   struct hash_entry *entry = _mesa_hash_table_search(
      sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
   if (entry) {
      syncobj = entry->data;
      _mesa_hash_table_remove(sim.syncobjs, entry);
      util_idalloc_free(&sim.ida, syncobj_handle - 1);
   }

   mtx_unlock(&sim.mutex);

   if (syncobj) {
      if (syncobj->pending_fd >= 0)
         close(syncobj->pending_fd);
      mtx_destroy(&syncobj->mutex);
      free(syncobj);
   }
}

static VkResult
sim_syncobj_poll(int fd, int poll_timeout)
{
   struct pollfd pollfd = {
      .fd = fd,
      .events = POLLIN,
   };
   int ret;
   do {
      ret = poll(&pollfd, 1, poll_timeout);
   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));

   if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
      return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
                                          : VK_ERROR_DEVICE_LOST;
   }

   return ret ? VK_SUCCESS : VK_TIMEOUT;
}

static void
sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point)
{
   syncobj->point = point;

   if (syncobj->pending_fd >= 0) {
      close(syncobj->pending_fd);
      syncobj->pending_fd = -1;
      syncobj->pending_point = point;
   }
}

static void
sim_syncobj_update_point_locked(struct sim_syncobj *syncobj, int poll_timeout)
{
   if (syncobj->pending_fd >= 0) {
      VkResult result;
      if (syncobj->pending_cpu) {
         if (poll_timeout == -1) {
            const int max_cpu_timeout = 2000;
            poll_timeout = max_cpu_timeout;
            result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
            if (result == VK_TIMEOUT) {
               vn_log(NULL, "cpu sync timed out after %dms; ignoring",
                      poll_timeout);
               result = VK_SUCCESS;
            }
         } else {
            result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
         }
      } else {
         result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
      }
      if (result == VK_SUCCESS) {
         close(syncobj->pending_fd);
         syncobj->pending_fd = -1;
         syncobj->point = syncobj->pending_point;
      }
   }
}

static struct sim_syncobj *
sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle)
{
   struct sim_syncobj *syncobj = NULL;

   mtx_lock(&sim.mutex);
   struct hash_entry *entry = _mesa_hash_table_search(
      sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
   if (entry)
      syncobj = entry->data;
   mtx_unlock(&sim.mutex);

   return syncobj;
}

static int
sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
{
   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
   if (!syncobj)
      return -1;

   mtx_lock(&syncobj->mutex);
   sim_syncobj_set_point_locked(syncobj, 0);
   mtx_unlock(&syncobj->mutex);

   return 0;
}

static int
sim_syncobj_query(struct virtgpu *gpu,
                  uint32_t syncobj_handle,
                  uint64_t *point)
{
   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
   if (!syncobj)
      return -1;

   mtx_lock(&syncobj->mutex);
   sim_syncobj_update_point_locked(syncobj, 0);
   *point = syncobj->point;
   mtx_unlock(&syncobj->mutex);

   return 0;
}

static int
sim_syncobj_signal(struct virtgpu *gpu,
                   uint32_t syncobj_handle,
                   uint64_t point)
{
   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
   if (!syncobj)
      return -1;

   mtx_lock(&syncobj->mutex);
   sim_syncobj_set_point_locked(syncobj, point);
   mtx_unlock(&syncobj->mutex);

   return 0;
}

static int
sim_syncobj_submit(struct virtgpu *gpu,
                   uint32_t syncobj_handle,
                   int sync_fd,
                   uint64_t point,
                   bool cpu)
{
   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
   if (!syncobj)
      return -1;

   int pending_fd = dup(sync_fd);
   if (pending_fd < 0) {
      vn_log(gpu->instance, "failed to dup sync fd");
      return -1;
   }

   mtx_lock(&syncobj->mutex);

   if (syncobj->pending_fd >= 0) {
      mtx_unlock(&syncobj->mutex);

      /* TODO */
      vn_log(gpu->instance, "sorry, no simulated timeline semaphore");
      close(pending_fd);
      return -1;
   }
   if (syncobj->point >= point)
      vn_log(gpu->instance, "non-monotonic signaling");

   syncobj->pending_fd = pending_fd;
   syncobj->pending_point = point;
   syncobj->pending_cpu = cpu;

   mtx_unlock(&syncobj->mutex);

   return 0;
}

static int
timeout_to_poll_timeout(uint64_t timeout)
{
   const uint64_t ns_per_ms = 1000000;
   const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
   if (!ms && timeout)
      return -1;
   return ms <= INT_MAX ? ms : -1;
}

static int
sim_syncobj_wait(struct virtgpu *gpu,
                 const struct vn_renderer_wait *wait,
                 bool wait_avail)
{
   if (wait_avail)
      return -1;

   const int poll_timeout = timeout_to_poll_timeout(wait->timeout);

   /* TODO poll all fds at the same time */
   for (uint32_t i = 0; i < wait->sync_count; i++) {
      struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
      const uint64_t point = wait->sync_values[i];

      struct sim_syncobj *syncobj =
         sim_syncobj_lookup(gpu, sync->syncobj_handle);
      if (!syncobj)
         return -1;

      mtx_lock(&syncobj->mutex);

      if (syncobj->point < point)
         sim_syncobj_update_point_locked(syncobj, poll_timeout);

      if (syncobj->point < point) {
         if (wait->wait_any && i < wait->sync_count - 1 &&
             syncobj->pending_fd < 0) {
            mtx_unlock(&syncobj->mutex);
            continue;
         }
         errno = ETIME;
         mtx_unlock(&syncobj->mutex);
         return -1;
      }

      mtx_unlock(&syncobj->mutex);

      if (wait->wait_any)
         break;

      /* TODO adjust poll_timeout */
   }

   return 0;
}

static int
sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle)
{
   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
   if (!syncobj)
      return -1;

   int fd = -1;
   mtx_lock(&syncobj->mutex);
   if (syncobj->pending_fd >= 0)
      fd = dup(syncobj->pending_fd);
   else
      fd = dup(sim.signaled_fd);
   mtx_unlock(&syncobj->mutex);

   return fd;
}

static uint32_t
sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, int fd)
{
   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
   if (!syncobj)
      return 0;

   if (sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false))
      return 0;

   return syncobj_handle;
}

#endif /* SIMULATE_SYNCOBJ */

#ifdef SIMULATE_SUBMIT

static int
sim_submit_signal_syncs(struct virtgpu *gpu,
                        int sync_fd,
                        struct vn_renderer_sync *const *syncs,
                        const uint64_t *sync_values,
                        uint32_t sync_count,
                        bool cpu)
{
   for (uint32_t i = 0; i < sync_count; i++) {
      struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i];
      const uint64_t pending_point = sync_values[i];

#ifdef SIMULATE_SYNCOBJ
      int ret = sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd,
                                   pending_point, cpu);
      if (ret)
         return ret;
#else
      /* we can in theory do a DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE followed by a
       * DRM_IOCTL_SYNCOBJ_TRANSFER
       */
      return -1;
#endif
   }

   return 0;
}

static uint32_t *
sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
                             uint32_t bo_count)
{
   uint32_t *gem_handles = malloc(sizeof(*gem_handles) * bo_count);
   if (!gem_handles)
      return NULL;

   for (uint32_t i = 0; i < bo_count; i++) {
      struct virtgpu_bo *bo = (struct virtgpu_bo *)bos[i];
      gem_handles[i] = bo->gem_handle;
   }

   return gem_handles;
}

static int
sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
{
   /* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
   uint32_t *gem_handles = NULL;
   if (submit->bo_count) {
      gem_handles =
         sim_submit_alloc_gem_handles(submit->bos, submit->bo_count);
      if (!gem_handles)
         return -1;
   }

   int ret = 0;
   for (uint32_t i = 0; i < submit->batch_count; i++) {
      const struct vn_renderer_submit_batch *batch = &submit->batches[i];

      struct drm_virtgpu_execbuffer args = {
         .flags = batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0,
         .size = batch->cs_size,
         .command = (uintptr_t)batch->cs_data,
         .bo_handles = (uintptr_t)gem_handles,
         .num_bo_handles = submit->bo_count,
      };

      ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
      if (ret) {
         vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
         break;
      }

      if (batch->sync_count) {
         ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
                                       batch->sync_values, batch->sync_count,
                                       batch->sync_queue_cpu);
         close(args.fence_fd);
         if (ret)
            break;
      }
   }

   if (!submit->batch_count && submit->bo_count) {
      struct drm_virtgpu_execbuffer args = {
         .bo_handles = (uintptr_t)gem_handles,
         .num_bo_handles = submit->bo_count,
      };

      ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
      if (ret)
         vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
   }

   free(gem_handles);

   return ret;
}

#endif /* SIMULATE_SUBMIT */

static int
virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args)
{
   return drmIoctl(gpu->fd, request, args);
}

static uint64_t
virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param)
{
#ifdef SIMULATE_CONTEXT_INIT
   if (param == VIRTGPU_PARAM_CONTEXT_INIT)
      return 1;
#endif
#ifdef SIMULATE_SUBMIT
   if (param == VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT)
      return 16;
#endif

   /* val must be zeroed because kernel only writes the lower 32 bits */
   uint64_t val = 0;
   struct drm_virtgpu_getparam args = {
      .param = param,
      .value = (uintptr_t)&val,
   };

   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
   return ret ? 0 : val;
}

static int
virtgpu_ioctl_get_caps(struct virtgpu *gpu,
                       enum virgl_renderer_capset id,
                       uint32_t version,
                       void *capset,
                       size_t capset_size)
{
#ifdef SIMULATE_CONTEXT_INIT
   if (id == VIRGL_RENDERER_CAPSET_VENUS && version == 0)
      return 0;
#endif

   struct drm_virtgpu_get_caps args = {
      .cap_set_id = id,
      .cap_set_ver = version,
      .addr = (uintptr_t)capset,
      .size = capset_size,
   };

   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
}

static int
virtgpu_ioctl_context_init(struct virtgpu *gpu,
                           enum virgl_renderer_capset capset_id)
{
#ifdef SIMULATE_CONTEXT_INIT
   if (capset_id == VIRGL_RENDERER_CAPSET_VENUS)
      return 0;
#endif

   struct drm_virtgpu_context_init args = {
      .num_params = 1,
      .ctx_set_params = (uintptr_t) &
                        (struct drm_virtgpu_context_set_param){
                           .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
                           .value = capset_id,
                        },
   };

   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
}

static uint32_t
virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu,
                                   uint32_t blob_mem,
                                   uint32_t blob_flags,
                                   size_t blob_size,
                                   uint64_t blob_id,
                                   uint32_t *res_id)
{
#ifdef SIMULATE_BO_SIZE_FIX
   blob_size = align64(blob_size, 4096);
#endif

   struct drm_virtgpu_resource_create_blob args = {
      .blob_mem = blob_mem,
      .blob_flags = blob_flags,
      .size = blob_size,
      .blob_id = blob_id,
   };

   if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args))
      return 0;

   *res_id = args.res_handle;
   return args.bo_handle;
}

static int
virtgpu_ioctl_resource_info(struct virtgpu *gpu,
                            uint32_t gem_handle,
                            struct drm_virtgpu_resource_info *info)
{
   *info = (struct drm_virtgpu_resource_info){
      .bo_handle = gem_handle,
   };

   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, info);
}

static void
virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle)
{
   struct drm_gem_close args = {
      .handle = gem_handle,
   };

   ASSERTED const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
   assert(!ret);
}

static int
virtgpu_ioctl_prime_handle_to_fd(struct virtgpu *gpu,
                                 uint32_t gem_handle,
                                 bool mappable)
{
   struct drm_prime_handle args = {
      .handle = gem_handle,
      .flags = DRM_CLOEXEC | (mappable ? DRM_RDWR : 0),
   };

   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
   return ret ? -1 : args.fd;
}

static uint32_t
virtgpu_ioctl_prime_fd_to_handle(struct virtgpu *gpu, int fd)
{
   struct drm_prime_handle args = {
      .fd = fd,
   };

   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
   return ret ? 0 : args.handle;
}

static void *
virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size)
{
   struct drm_virtgpu_map args = {
      .handle = gem_handle,
   };

   if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args))
      return NULL;

   void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd,
                    args.offset);
   if (ptr == MAP_FAILED)
      return NULL;

   return ptr;
}

static uint32_t
virtgpu_ioctl_syncobj_create(struct virtgpu *gpu, bool signaled)
{
#ifdef SIMULATE_SYNCOBJ
   return sim_syncobj_create(gpu, signaled);
#endif

   struct drm_syncobj_create args = {
      .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0,
   };

   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_CREATE, &args);
   return ret ? 0 : args.handle;
}

static void
virtgpu_ioctl_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
{
#ifdef SIMULATE_SYNCOBJ
   sim_syncobj_destroy(gpu, syncobj_handle);
   return;
#endif

   struct drm_syncobj_destroy args = {
      .handle = syncobj_handle,
   };

   ASSERTED const int ret =
      virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
   assert(!ret);
}

static int
virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu *gpu,
                                   uint32_t syncobj_handle,
                                   bool sync_file)
{
#ifdef SIMULATE_SYNCOBJ
   return sync_file ? sim_syncobj_export(gpu, syncobj_handle) : -1;
#endif

   struct drm_syncobj_handle args = {
      .handle = syncobj_handle,
      .flags =
         sync_file ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0,
   };

   int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
   if (ret)
      return -1;

   return args.fd;
}

static uint32_t
virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu *gpu,
                                   int fd,
                                   uint32_t syncobj_handle)
{
#ifdef SIMULATE_SYNCOBJ
   return syncobj_handle ? sim_syncobj_import(gpu, syncobj_handle, fd) : 0;
#endif

   struct drm_syncobj_handle args = {
      .handle = syncobj_handle,
      .flags =
         syncobj_handle ? DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE : 0,
      .fd = fd,
   };

   int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args);
   if (ret)
      return 0;

   return args.handle;
}

static int
virtgpu_ioctl_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
{
#ifdef SIMULATE_SYNCOBJ
   return sim_syncobj_reset(gpu, syncobj_handle);
#endif

   struct drm_syncobj_array args = {
      .handles = (uintptr_t)&syncobj_handle,
      .count_handles = 1,
   };

   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_RESET, &args);
}

static int
virtgpu_ioctl_syncobj_query(struct virtgpu *gpu,
                            uint32_t syncobj_handle,
                            uint64_t *point)
{
#ifdef SIMULATE_SYNCOBJ
   return sim_syncobj_query(gpu, syncobj_handle, point);
#endif

   struct drm_syncobj_timeline_array args = {
      .handles = (uintptr_t)&syncobj_handle,
      .points = (uintptr_t)point,
      .count_handles = 1,
   };

   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_QUERY, &args);
}

static int
virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu *gpu,
                                      uint32_t syncobj_handle,
                                      uint64_t point)
{
#ifdef SIMULATE_SYNCOBJ
   return sim_syncobj_signal(gpu, syncobj_handle, point);
#endif

   struct drm_syncobj_timeline_array args = {
      .handles = (uintptr_t)&syncobj_handle,
      .points = (uintptr_t)&point,
      .count_handles = 1,
   };

   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &args);
}

static int
virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu *gpu,
                                    const struct vn_renderer_wait *wait,
                                    bool wait_avail)
{
#ifdef SIMULATE_SYNCOBJ
   return sim_syncobj_wait(gpu, wait, wait_avail);
#endif

   /* always enable wait-before-submit */
   uint32_t flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
   if (!wait->wait_any)
      flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
   /* wait for fences to appear instead of signaling */
   if (wait_avail)
      flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE;

   /* TODO replace wait->syncs by wait->sync_handles to avoid malloc/loop */
   uint32_t *syncobj_handles =
      malloc(sizeof(*syncobj_handles) * wait->sync_count);
   if (!syncobj_handles)
      return -1;
   for (uint32_t i = 0; i < wait->sync_count; i++) {
      struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
      syncobj_handles[i] = sync->syncobj_handle;
   }

   struct drm_syncobj_timeline_wait args = {
      .handles = (uintptr_t)syncobj_handles,
      .points = (uintptr_t)wait->sync_values,
      .timeout_nsec = os_time_get_absolute_timeout(wait->timeout),
      .count_handles = wait->sync_count,
      .flags = flags,
   };

   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &args);

   free(syncobj_handles);

   return ret;
}

static int
virtgpu_ioctl_submit(struct virtgpu *gpu,
                     const struct vn_renderer_submit *submit)
{
#ifdef SIMULATE_SUBMIT
   return sim_submit(gpu, submit);
#endif
   return -1;
}

static VkResult
virtgpu_sync_write(struct vn_renderer *renderer,
                   struct vn_renderer_sync *_sync,
                   uint64_t val)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;

   const int ret =
      virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle, val);

   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
}

static VkResult
virtgpu_sync_read(struct vn_renderer *renderer,
                  struct vn_renderer_sync *_sync,
                  uint64_t *val)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;

   const int ret =
      virtgpu_ioctl_syncobj_query(gpu, sync->syncobj_handle, val);

   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
}

static VkResult
virtgpu_sync_reset(struct vn_renderer *renderer,
                   struct vn_renderer_sync *_sync,
                   uint64_t initial_val)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;

   int ret = virtgpu_ioctl_syncobj_reset(gpu, sync->syncobj_handle);
   if (!ret) {
      ret = virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle,
                                                  initial_val);
   }

   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
}

static int
virtgpu_sync_export_syncobj(struct vn_renderer *renderer,
                            struct vn_renderer_sync *_sync,
                            bool sync_file)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;

   return virtgpu_ioctl_syncobj_handle_to_fd(gpu, sync->syncobj_handle,
                                             sync_file);
}

static void
virtgpu_sync_destroy(struct vn_renderer *renderer,
                     struct vn_renderer_sync *_sync)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;

   virtgpu_ioctl_syncobj_destroy(gpu, sync->syncobj_handle);

   free(sync);
}

static VkResult
virtgpu_sync_create_from_syncobj(struct vn_renderer *renderer,
                                 int fd,
                                 bool sync_file,
                                 struct vn_renderer_sync **out_sync)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;

   uint32_t syncobj_handle;
   if (sync_file) {
      syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, false);
      if (!syncobj_handle)
         return VK_ERROR_OUT_OF_HOST_MEMORY;
      if (!virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, syncobj_handle)) {
         virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
      }
   } else {
      syncobj_handle = virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, 0);
      if (!syncobj_handle)
         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
   }

   struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
   if (!sync) {
      virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
      return VK_ERROR_OUT_OF_HOST_MEMORY;
   }

   sync->syncobj_handle = syncobj_handle;
   sync->base.sync_id = 0; /* TODO */

   *out_sync = &sync->base;

   return VK_SUCCESS;
}

static VkResult
virtgpu_sync_create(struct vn_renderer *renderer,
                    uint64_t initial_val,
                    uint32_t flags,
                    struct vn_renderer_sync **out_sync)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;

   /* TODO */
   if (flags & VN_RENDERER_SYNC_SHAREABLE)
      return VK_ERROR_OUT_OF_DEVICE_MEMORY;

   /* always false because we don't use binary drm_syncobjs */
   const bool signaled = false;
   const uint32_t syncobj_handle =
      virtgpu_ioctl_syncobj_create(gpu, signaled);
   if (!syncobj_handle)
      return VK_ERROR_OUT_OF_DEVICE_MEMORY;

   /* add a signaled fence chain with seqno initial_val */
   const int ret =
      virtgpu_ioctl_syncobj_timeline_signal(gpu, syncobj_handle, initial_val);
   if (ret) {
      virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
   }

   struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
   if (!sync) {
      virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
      return VK_ERROR_OUT_OF_HOST_MEMORY;
   }

   sync->syncobj_handle = syncobj_handle;
   /* we will have a sync_id when shareable is true and virtio-gpu associates
    * a host sync object with guest drm_syncobj
    */
   sync->base.sync_id = 0;

   *out_sync = &sync->base;

   return VK_SUCCESS;
}

static void
virtgpu_bo_invalidate(struct vn_renderer *renderer,
                      struct vn_renderer_bo *bo,
                      VkDeviceSize offset,
                      VkDeviceSize size)
{
   /* nop because kernel makes every mapping coherent */
}

static void
virtgpu_bo_flush(struct vn_renderer *renderer,
                 struct vn_renderer_bo *bo,
                 VkDeviceSize offset,
                 VkDeviceSize size)
{
   /* nop because kernel makes every mapping coherent */
}

static void *
virtgpu_bo_map(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
   const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;

   /* not thread-safe but is fine */
   if (!bo->base.mmap_ptr && mappable) {
      bo->base.mmap_ptr =
         virtgpu_ioctl_map(gpu, bo->gem_handle, bo->base.mmap_size);
   }

   return bo->base.mmap_ptr;
}

static int
virtgpu_bo_export_dma_buf(struct vn_renderer *renderer,
                          struct vn_renderer_bo *_bo)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
   const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
   const bool shareable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_SHAREABLE;

   return shareable
             ? virtgpu_ioctl_prime_handle_to_fd(gpu, bo->gem_handle, mappable)
             : -1;
}

static bool
virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;

   mtx_lock(&gpu->dma_buf_import_mutex);

   /* Check the refcount again after the import lock is grabbed.  Yes, we use
    * the double-checked locking anti-pattern.
    */
   if (vn_refcount_is_valid(&bo->base.refcount)) {
      mtx_unlock(&gpu->dma_buf_import_mutex);
      return false;
   }

   if (bo->base.mmap_ptr)
      munmap(bo->base.mmap_ptr, bo->base.mmap_size);
   virtgpu_ioctl_gem_close(gpu, bo->gem_handle);

   /* set gem_handle to 0 to indicate that the bo is invalid */
   bo->gem_handle = 0;

   mtx_unlock(&gpu->dma_buf_import_mutex);

   return true;
}

static uint32_t
virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,
                      VkExternalMemoryHandleTypeFlags external_handles)
{
   uint32_t blob_flags = 0;
   if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
      blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
   if (external_handles)
      blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
   if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)
      blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;

   return blob_flags;
}

static VkResult
virtgpu_bo_create_from_dma_buf(struct vn_renderer *renderer,
                               VkDeviceSize size,
                               int fd,
                               VkMemoryPropertyFlags flags,
                               struct vn_renderer_bo **out_bo)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct drm_virtgpu_resource_info info;
   uint32_t gem_handle = 0;
   struct virtgpu_bo *bo = NULL;

   mtx_lock(&gpu->dma_buf_import_mutex);

   gem_handle = virtgpu_ioctl_prime_fd_to_handle(gpu, fd);
   if (!gem_handle)
      goto fail;
   bo = util_sparse_array_get(&gpu->bo_array, gem_handle);

   if (virtgpu_ioctl_resource_info(gpu, gem_handle, &info))
      goto fail;

   uint32_t blob_flags;
   size_t mmap_size;
   if (info.blob_mem) {
      /* must be VIRTGPU_BLOB_MEM_HOST3D or VIRTGPU_BLOB_MEM_GUEST_VRAM */
      if (info.blob_mem != gpu->bo_blob_mem)
         goto fail;

      /* blob_flags is not passed to the kernel and is only for internal use
       * on imports.  Set it to what works best for us.
       */
      blob_flags = virtgpu_bo_blob_flags(flags, 0);
      blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;

      /* mmap_size is only used when mappable */
      mmap_size = 0;
      if (blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE) {
         if (info.size < size)
            goto fail;

         mmap_size = size;
      }
   } else {
      /* must be classic resource here
       * set blob_flags to 0 to fail virtgpu_bo_map
       * set mmap_size to 0 since mapping is not allowed
       */
      blob_flags = 0;
      mmap_size = 0;
   }

   /* we check bo->gem_handle instead of bo->refcount because bo->refcount
    * might only be memset to 0 and is not considered initialized in theory
    */
   if (bo->gem_handle == gem_handle) {
      if (bo->base.mmap_size < mmap_size)
         goto fail;
      if (blob_flags & ~bo->blob_flags)
         goto fail;

      /* we can't use vn_renderer_bo_ref as the refcount may drop to 0
       * temporarily before virtgpu_bo_destroy grabs the lock
       */
      vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1);
   } else {
      *bo = (struct virtgpu_bo){
         .base = {
            .refcount = VN_REFCOUNT_INIT(1),
            .res_id = info.res_handle,
            .mmap_size = mmap_size,
         },
         .gem_handle = gem_handle,
         .blob_flags = blob_flags,
      };
   }

   mtx_unlock(&gpu->dma_buf_import_mutex);

   *out_bo = &bo->base;

   return VK_SUCCESS;

fail:
   if (gem_handle && bo->gem_handle != gem_handle)
      virtgpu_ioctl_gem_close(gpu, gem_handle);
   mtx_unlock(&gpu->dma_buf_import_mutex);
   return VK_ERROR_INVALID_EXTERNAL_HANDLE;
}

static VkResult
virtgpu_bo_create_from_device_memory(
   struct vn_renderer *renderer,
   VkDeviceSize size,
   vn_object_id mem_id,
   VkMemoryPropertyFlags flags,
   VkExternalMemoryHandleTypeFlags external_handles,
   struct vn_renderer_bo **out_bo)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   const uint32_t blob_flags = virtgpu_bo_blob_flags(flags, external_handles);

   uint32_t res_id;
   uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
      gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id);
   if (!gem_handle)
      return VK_ERROR_OUT_OF_DEVICE_MEMORY;

   struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
   *bo = (struct virtgpu_bo){
      .base = {
         .refcount = VN_REFCOUNT_INIT(1),
         .res_id = res_id,
         .mmap_size = size,
      },
      .gem_handle = gem_handle,
      .blob_flags = blob_flags,
   };

   *out_bo = &bo->base;

   return VK_SUCCESS;
}

static void
virtgpu_shmem_destroy_now(struct vn_renderer *renderer,
                          struct vn_renderer_shmem *_shmem)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;
   struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem;

   munmap(shmem->base.mmap_ptr, shmem->base.mmap_size);
   virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
}

static void
virtgpu_shmem_destroy(struct vn_renderer *renderer,
                      struct vn_renderer_shmem *shmem)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;

   if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem))
      return;

   virtgpu_shmem_destroy_now(&gpu->base, shmem);
}

static struct vn_renderer_shmem *
virtgpu_shmem_create(struct vn_renderer *renderer, size_t size)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;

   struct vn_renderer_shmem *cached_shmem =
      vn_renderer_shmem_cache_get(&gpu->shmem_cache, size);
   if (cached_shmem) {
      cached_shmem->refcount = VN_REFCOUNT_INIT(1);
      return cached_shmem;
   }

   uint32_t res_id;
   uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
      gpu, gpu->shmem_blob_mem, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0,
      &res_id);
   if (!gem_handle)
      return NULL;

   void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
   if (!ptr) {
      virtgpu_ioctl_gem_close(gpu, gem_handle);
      return NULL;
   }

   struct virtgpu_shmem *shmem =
      util_sparse_array_get(&gpu->shmem_array, gem_handle);
   *shmem = (struct virtgpu_shmem){
      .base = {
         .refcount = VN_REFCOUNT_INIT(1),
         .res_id = res_id,
         .mmap_size = size,
         .mmap_ptr = ptr,
      },
      .gem_handle = gem_handle,
   };

   return &shmem->base;
}

static VkResult
virtgpu_wait(struct vn_renderer *renderer,
             const struct vn_renderer_wait *wait)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;

   const int ret = virtgpu_ioctl_syncobj_timeline_wait(gpu, wait, false);
   if (ret && errno != ETIME)
      return VK_ERROR_DEVICE_LOST;

   return ret ? VK_TIMEOUT : VK_SUCCESS;
}

static VkResult
virtgpu_submit(struct vn_renderer *renderer,
               const struct vn_renderer_submit *submit)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;

   const int ret = virtgpu_ioctl_submit(gpu, submit);
   return ret ? VK_ERROR_DEVICE_LOST : VK_SUCCESS;
}

static void
virtgpu_init_renderer_info(struct virtgpu *gpu)
{
   struct vn_renderer_info *info = &gpu->base.info;

   info->drm.has_primary = gpu->has_primary;
   info->drm.primary_major = gpu->primary_major;
   info->drm.primary_minor = gpu->primary_minor;
   info->drm.has_render = true;
   info->drm.render_major = gpu->render_major;
   info->drm.render_minor = gpu->render_minor;

   info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID;
   info->pci.device_id = VIRTGPU_PCI_DEVICE_ID;

   if (gpu->bustype == DRM_BUS_PCI) {
      info->pci.has_bus_info = true;
      info->pci.domain = gpu->pci_bus_info.domain;
      info->pci.bus = gpu->pci_bus_info.bus;
      info->pci.device = gpu->pci_bus_info.dev;
      info->pci.function = gpu->pci_bus_info.func;
   } else {
      info->pci.has_bus_info = false;
   }

   info->has_dma_buf_import = true;
   /* Kernel makes every mapping coherent.  We are better off filtering
    * incoherent memory types out than silently making them coherent.
    */
   info->has_cache_management = false;
   /* TODO drm_syncobj */
   info->has_external_sync = false;

   info->has_implicit_fencing = false;

   info->max_sync_queue_count = gpu->max_sync_queue_count;

   const struct virgl_renderer_capset_venus *capset = &gpu->capset.data;
   info->wire_format_version = capset->wire_format_version;
   info->vk_xml_version = capset->vk_xml_version;
   info->vk_ext_command_serialization_spec_version =
      capset->vk_ext_command_serialization_spec_version;
   info->vk_mesa_venus_protocol_spec_version =
      capset->vk_mesa_venus_protocol_spec_version;
   info->supports_blob_id_0 = capset->supports_blob_id_0;

   /* ensure vk_extension_mask is large enough to hold all capset masks */
   STATIC_ASSERT(sizeof(info->vk_extension_mask) >=
                 sizeof(capset->vk_extension_mask1));
   memcpy(info->vk_extension_mask, capset->vk_extension_mask1,
          sizeof(capset->vk_extension_mask1));

   info->allow_vk_wait_syncs = capset->allow_vk_wait_syncs;

   if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_GUEST_VRAM)
      info->has_guest_vram = true;
}

static void
virtgpu_destroy(struct vn_renderer *renderer,
                const VkAllocationCallbacks *alloc)
{
   struct virtgpu *gpu = (struct virtgpu *)renderer;

   vn_renderer_shmem_cache_fini(&gpu->shmem_cache);

   if (gpu->fd >= 0)
      close(gpu->fd);

   mtx_destroy(&gpu->dma_buf_import_mutex);

   util_sparse_array_finish(&gpu->shmem_array);
   util_sparse_array_finish(&gpu->bo_array);

   vk_free(alloc, gpu);
}

static void
virtgpu_init_shmem_blob_mem(struct virtgpu *gpu)
{
   /* VIRTGPU_BLOB_MEM_GUEST allocates from the guest system memory.  They are
    * logically contiguous in the guest but are sglists (iovecs) in the host.
    * That makes them slower to process in the host.  With host process
    * isolation, it also becomes impossible for the host to access sglists
    * directly.
    *
    * While there are ideas (and shipped code in some cases) such as creating
    * udmabufs from sglists, or having a dedicated guest heap, it seems the
    * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D.  That is, when the
    * renderer sees a request to export a blob where
    *
    *  - blob_mem is VIRTGPU_BLOB_MEM_HOST3D
    *  - blob_flags is VIRTGPU_BLOB_FLAG_USE_MAPPABLE
    *  - blob_id is 0
    *
    * it allocates a host shmem.
    *
    * TODO cache shmems as they are costly to set up and usually require syncs
    */
   gpu->shmem_blob_mem = gpu->capset.data.supports_blob_id_0
                            ? VIRTGPU_BLOB_MEM_HOST3D
                            : VIRTGPU_BLOB_MEM_GUEST;
}

static VkResult
virtgpu_init_context(struct virtgpu *gpu)
{
   assert(!gpu->capset.version);
   const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
   if (ret) {
      if (VN_DEBUG(INIT)) {
         vn_log(gpu->instance, "failed to initialize context: %s",
                strerror(errno));
      }
      return VK_ERROR_INITIALIZATION_FAILED;
   }

   return VK_SUCCESS;
}

static VkResult
virtgpu_init_capset(struct virtgpu *gpu)
{
   gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
   gpu->capset.version = 0;

   const int ret =
      virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version,
                             &gpu->capset.data, sizeof(gpu->capset.data));
   if (ret) {
      if (VN_DEBUG(INIT)) {
         vn_log(gpu->instance, "failed to get venus v%d capset: %s",
                gpu->capset.version, strerror(errno));
      }
      return VK_ERROR_INITIALIZATION_FAILED;
   }

   return VK_SUCCESS;
}

static VkResult
virtgpu_init_params(struct virtgpu *gpu)
{
   const uint64_t required_params[] = {
      VIRTGPU_PARAM_3D_FEATURES,   VIRTGPU_PARAM_CAPSET_QUERY_FIX,
      VIRTGPU_PARAM_RESOURCE_BLOB, VIRTGPU_PARAM_CROSS_DEVICE,
      VIRTGPU_PARAM_CONTEXT_INIT,
   };
   uint64_t val;
   for (uint32_t i = 0; i < ARRAY_SIZE(required_params); i++) {
      val = virtgpu_ioctl_getparam(gpu, required_params[i]);
      if (!val) {
         if (VN_DEBUG(INIT)) {
            vn_log(gpu->instance, "required kernel param %d is missing",
                   (int)required_params[i]);
         }
         return VK_ERROR_INITIALIZATION_FAILED;
      }
   }

   val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_HOST_VISIBLE);
   if (val) {
      gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
   } else {
      val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_GUEST_VRAM);
      if (val) {
         gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_GUEST_VRAM;
      }
   }

   if (!val) {
      vn_log(gpu->instance,
             "one of required kernel params (%d or %d) is missing",
             (int)VIRTGPU_PARAM_HOST_VISIBLE, (int)VIRTGPU_PARAM_GUEST_VRAM);
      return VK_ERROR_INITIALIZATION_FAILED;
   }

   val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT);
   if (!val) {
      if (VN_DEBUG(INIT))
         vn_log(gpu->instance, "no sync queue support");
      return VK_ERROR_INITIALIZATION_FAILED;
   }
   gpu->max_sync_queue_count = val;

   return VK_SUCCESS;
}

static VkResult
virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev)
{
   bool supported_bus = false;

   switch (dev->bustype) {
   case DRM_BUS_PCI:
      if (dev->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID &&
         dev->deviceinfo.pci->device_id == VIRTGPU_PCI_DEVICE_ID)
         supported_bus = true;
      break;
   case DRM_BUS_PLATFORM:
      supported_bus = true;
      break;
   default:
      break;
   }

   if (!supported_bus || !(dev->available_nodes & (1 << DRM_NODE_RENDER))) {
      if (VN_DEBUG(INIT)) {
         const char *name = "unknown";
         for (uint32_t i = 0; i < DRM_NODE_MAX; i++) {
            if (dev->available_nodes & (1 << i)) {
               name = dev->nodes[i];
               break;
            }
         }
         vn_log(gpu->instance, "skipping DRM device %s", name);
      }
      return VK_ERROR_INITIALIZATION_FAILED;
   }

   const char *primary_path = dev->nodes[DRM_NODE_PRIMARY];
   const char *node_path = dev->nodes[DRM_NODE_RENDER];

   int fd = open(node_path, O_RDWR | O_CLOEXEC);
   if (fd < 0) {
      if (VN_DEBUG(INIT))
         vn_log(gpu->instance, "failed to open %s", node_path);
      return VK_ERROR_INITIALIZATION_FAILED;
   }

   drmVersionPtr version = drmGetVersion(fd);
   if (!version || strcmp(version->name, "virtio_gpu") ||
       version->version_major != 0) {
      if (VN_DEBUG(INIT)) {
         if (version) {
            vn_log(gpu->instance, "unknown DRM driver %s version %d",
                   version->name, version->version_major);
         } else {
            vn_log(gpu->instance, "failed to get DRM driver version");
         }
      }
      if (version)
         drmFreeVersion(version);
      close(fd);
      return VK_ERROR_INITIALIZATION_FAILED;
   }

   gpu->fd = fd;

   struct stat st;
   if (stat(primary_path, &st) == 0) {
      gpu->has_primary = true;
      gpu->primary_major = major(st.st_rdev);
      gpu->primary_minor = minor(st.st_rdev);
   } else {
      gpu->has_primary = false;
      gpu->primary_major = 0;
      gpu->primary_minor = 0;
   }
   stat(node_path, &st);
   gpu->render_major = major(st.st_rdev);
   gpu->render_minor = minor(st.st_rdev);

   gpu->bustype = dev->bustype;
   if (dev->bustype == DRM_BUS_PCI)
      gpu->pci_bus_info = *dev->businfo.pci;

   drmFreeVersion(version);

   if (VN_DEBUG(INIT))
      vn_log(gpu->instance, "using DRM device %s", node_path);

   return VK_SUCCESS;
}

static VkResult
virtgpu_open(struct virtgpu *gpu)
{
   drmDevicePtr devs[8];
   int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
   if (count < 0) {
      if (VN_DEBUG(INIT))
         vn_log(gpu->instance, "failed to enumerate DRM devices");
      return VK_ERROR_INITIALIZATION_FAILED;
   }

   VkResult result = VK_ERROR_INITIALIZATION_FAILED;
   for (int i = 0; i < count; i++) {
      result = virtgpu_open_device(gpu, devs[i]);
      if (result == VK_SUCCESS)
         break;
   }

   drmFreeDevices(devs, count);

   return result;
}

static VkResult
virtgpu_init(struct virtgpu *gpu)
{
   util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem),
                          1024);
   util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024);

   mtx_init(&gpu->dma_buf_import_mutex, mtx_plain);

   VkResult result = virtgpu_open(gpu);
   if (result == VK_SUCCESS)
      result = virtgpu_init_params(gpu);
   if (result == VK_SUCCESS)
      result = virtgpu_init_capset(gpu);
   if (result == VK_SUCCESS)
      result = virtgpu_init_context(gpu);
   if (result != VK_SUCCESS)
      return result;

   virtgpu_init_shmem_blob_mem(gpu);

   vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base,
                                virtgpu_shmem_destroy_now);

   virtgpu_init_renderer_info(gpu);

   gpu->base.ops.destroy = virtgpu_destroy;
   gpu->base.ops.submit = virtgpu_submit;
   gpu->base.ops.wait = virtgpu_wait;

   gpu->base.shmem_ops.create = virtgpu_shmem_create;
   gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy;

   gpu->base.bo_ops.create_from_device_memory =
      virtgpu_bo_create_from_device_memory;
   gpu->base.bo_ops.create_from_dma_buf = virtgpu_bo_create_from_dma_buf;
   gpu->base.bo_ops.destroy = virtgpu_bo_destroy;
   gpu->base.bo_ops.export_dma_buf = virtgpu_bo_export_dma_buf;
   gpu->base.bo_ops.map = virtgpu_bo_map;
   gpu->base.bo_ops.flush = virtgpu_bo_flush;
   gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate;

   gpu->base.sync_ops.create = virtgpu_sync_create;
   gpu->base.sync_ops.create_from_syncobj = virtgpu_sync_create_from_syncobj;
   gpu->base.sync_ops.destroy = virtgpu_sync_destroy;
   gpu->base.sync_ops.export_syncobj = virtgpu_sync_export_syncobj;
   gpu->base.sync_ops.reset = virtgpu_sync_reset;
   gpu->base.sync_ops.read = virtgpu_sync_read;
   gpu->base.sync_ops.write = virtgpu_sync_write;

   return VK_SUCCESS;
}

VkResult
vn_renderer_create_virtgpu(struct vn_instance *instance,
                           const VkAllocationCallbacks *alloc,
                           struct vn_renderer **renderer)
{
   struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN,
                                   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
   if (!gpu)
      return VK_ERROR_OUT_OF_HOST_MEMORY;

   gpu->instance = instance;
   gpu->fd = -1;

   VkResult result = virtgpu_init(gpu);
   if (result != VK_SUCCESS) {
      virtgpu_destroy(&gpu->base, alloc);
      return result;
   }

   *renderer = &gpu->base;

   return VK_SUCCESS;
}