1/*
2 * Copyright 2020 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6#include <errno.h>
7#include <fcntl.h>
8#include <poll.h>
9#include <sys/mman.h>
10#include <sys/stat.h>
11#include <sys/types.h>
12#include <unistd.h>
13#include <xf86drm.h>
14
15#ifdef MAJOR_IN_MKDEV
16#include <sys/mkdev.h>
17#endif
18#ifdef MAJOR_IN_SYSMACROS
19#include <sys/sysmacros.h>
20#endif
21
22#include "drm-uapi/virtgpu_drm.h"
23#include "util/sparse_array.h"
24#define VIRGL_RENDERER_UNSTABLE_APIS
25#include "virtio-gpu/virglrenderer_hw.h"
26
27#include "vn_renderer_internal.h"
28
29/* XXX WIP kernel uapi */
30#ifndef VIRTGPU_PARAM_CONTEXT_INIT
31#define VIRTGPU_PARAM_CONTEXT_INIT 6
32#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001
33struct drm_virtgpu_context_set_param {
34   __u64 param;
35   __u64 value;
36};
37struct drm_virtgpu_context_init {
38   __u32 num_params;
39   __u32 pad;
40   __u64 ctx_set_params;
41};
42#define DRM_VIRTGPU_CONTEXT_INIT 0xb
43#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT                                       \
44   DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT,                     \
45            struct drm_virtgpu_context_init)
46#endif /* VIRTGPU_PARAM_CONTEXT_INIT */
47#ifndef VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT
48#define VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT 100
49#endif /* VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT */
50
51#ifndef VIRTGPU_PARAM_GUEST_VRAM
52/* All guest allocations happen via virtgpu dedicated heap. */
53#define VIRTGPU_PARAM_GUEST_VRAM 9
54#endif
55
56#ifndef VIRTGPU_BLOB_MEM_GUEST_VRAM
57#define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
58#endif
59
60/* XXX comment these out to really use kernel uapi */
61#define SIMULATE_BO_SIZE_FIX 1
62//#define SIMULATE_CONTEXT_INIT 1
63#define SIMULATE_SYNCOBJ 1
64#define SIMULATE_SUBMIT 1
65
66#define VIRTGPU_PCI_VENDOR_ID 0x1af4
67#define VIRTGPU_PCI_DEVICE_ID 0x1050
68
69struct virtgpu;
70
71struct virtgpu_shmem {
72   struct vn_renderer_shmem base;
73   uint32_t gem_handle;
74};
75
76struct virtgpu_bo {
77   struct vn_renderer_bo base;
78   uint32_t gem_handle;
79   uint32_t blob_flags;
80};
81
82struct virtgpu_sync {
83   struct vn_renderer_sync base;
84
85   /*
86    * drm_syncobj is in one of these states
87    *
88    *  - value N:      drm_syncobj has a signaled fence chain with seqno N
89    *  - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M
90    *                  (which may point to another unsignaled fence chain with
91    *                   seqno between N and M, and so on)
92    *
93    * TODO Do we want to use binary drm_syncobjs?  They would be
94    *
95    *  - value 0: drm_syncobj has no fence
96    *  - value 1: drm_syncobj has a signaled fence with seqno 0
97    *
98    * They are cheaper but require special care.
99    */
100   uint32_t syncobj_handle;
101};
102
103struct virtgpu {
104   struct vn_renderer base;
105
106   struct vn_instance *instance;
107
108   int fd;
109
110   bool has_primary;
111   int primary_major;
112   int primary_minor;
113   int render_major;
114   int render_minor;
115
116   int bustype;
117   drmPciBusInfo pci_bus_info;
118
119   uint32_t max_sync_queue_count;
120
121   struct {
122      enum virgl_renderer_capset id;
123      uint32_t version;
124      struct virgl_renderer_capset_venus data;
125   } capset;
126
127   uint32_t shmem_blob_mem;
128   uint32_t bo_blob_mem;
129
130   /* note that we use gem_handle instead of res_id to index because
131    * res_id is monotonically increasing by default (see
132    * virtio_gpu_resource_id_get)
133    */
134   struct util_sparse_array shmem_array;
135   struct util_sparse_array bo_array;
136
137   mtx_t dma_buf_import_mutex;
138
139   struct vn_renderer_shmem_cache shmem_cache;
140};
141
142#ifdef SIMULATE_SYNCOBJ
143
144#include "util/hash_table.h"
145#include "util/u_idalloc.h"
146
147static struct {
148   mtx_t mutex;
149   struct hash_table *syncobjs;
150   struct util_idalloc ida;
151
152   int signaled_fd;
153} sim;
154
155struct sim_syncobj {
156   mtx_t mutex;
157   uint64_t point;
158
159   int pending_fd;
160   uint64_t pending_point;
161   bool pending_cpu;
162};
163
164static uint32_t
165sim_syncobj_create(struct virtgpu *gpu, bool signaled)
166{
167   struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj));
168   if (!syncobj)
169      return 0;
170
171   mtx_init(&syncobj->mutex, mtx_plain);
172   syncobj->pending_fd = -1;
173
174   mtx_lock(&sim.mutex);
175
176   /* initialize lazily */
177   if (!sim.syncobjs) {
178      sim.syncobjs = _mesa_pointer_hash_table_create(NULL);
179      if (!sim.syncobjs) {
180         mtx_unlock(&sim.mutex);
181         return 0;
182      }
183
184      util_idalloc_init(&sim.ida, 32);
185
186      struct drm_virtgpu_execbuffer args = {
187         .flags = VIRTGPU_EXECBUF_FENCE_FD_OUT,
188      };
189      int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
190      if (ret || args.fence_fd < 0) {
191         _mesa_hash_table_destroy(sim.syncobjs, NULL);
192         sim.syncobjs = NULL;
193         mtx_unlock(&sim.mutex);
194         return 0;
195      }
196
197      sim.signaled_fd = args.fence_fd;
198   }
199
200   const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1;
201   _mesa_hash_table_insert(sim.syncobjs,
202                           (const void *)(uintptr_t)syncobj_handle, syncobj);
203
204   mtx_unlock(&sim.mutex);
205
206   return syncobj_handle;
207}
208
209static void
210sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
211{
212   struct sim_syncobj *syncobj = NULL;
213
214   mtx_lock(&sim.mutex);
215
216   struct hash_entry *entry = _mesa_hash_table_search(
217      sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
218   if (entry) {
219      syncobj = entry->data;
220      _mesa_hash_table_remove(sim.syncobjs, entry);
221      util_idalloc_free(&sim.ida, syncobj_handle - 1);
222   }
223
224   mtx_unlock(&sim.mutex);
225
226   if (syncobj) {
227      if (syncobj->pending_fd >= 0)
228         close(syncobj->pending_fd);
229      mtx_destroy(&syncobj->mutex);
230      free(syncobj);
231   }
232}
233
234static VkResult
235sim_syncobj_poll(int fd, int poll_timeout)
236{
237   struct pollfd pollfd = {
238      .fd = fd,
239      .events = POLLIN,
240   };
241   int ret;
242   do {
243      ret = poll(&pollfd, 1, poll_timeout);
244   } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
245
246   if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
247      return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
248                                          : VK_ERROR_DEVICE_LOST;
249   }
250
251   return ret ? VK_SUCCESS : VK_TIMEOUT;
252}
253
254static void
255sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point)
256{
257   syncobj->point = point;
258
259   if (syncobj->pending_fd >= 0) {
260      close(syncobj->pending_fd);
261      syncobj->pending_fd = -1;
262      syncobj->pending_point = point;
263   }
264}
265
266static void
267sim_syncobj_update_point_locked(struct sim_syncobj *syncobj, int poll_timeout)
268{
269   if (syncobj->pending_fd >= 0) {
270      VkResult result;
271      if (syncobj->pending_cpu) {
272         if (poll_timeout == -1) {
273            const int max_cpu_timeout = 2000;
274            poll_timeout = max_cpu_timeout;
275            result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
276            if (result == VK_TIMEOUT) {
277               vn_log(NULL, "cpu sync timed out after %dms; ignoring",
278                      poll_timeout);
279               result = VK_SUCCESS;
280            }
281         } else {
282            result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
283         }
284      } else {
285         result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
286      }
287      if (result == VK_SUCCESS) {
288         close(syncobj->pending_fd);
289         syncobj->pending_fd = -1;
290         syncobj->point = syncobj->pending_point;
291      }
292   }
293}
294
295static struct sim_syncobj *
296sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle)
297{
298   struct sim_syncobj *syncobj = NULL;
299
300   mtx_lock(&sim.mutex);
301   struct hash_entry *entry = _mesa_hash_table_search(
302      sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
303   if (entry)
304      syncobj = entry->data;
305   mtx_unlock(&sim.mutex);
306
307   return syncobj;
308}
309
310static int
311sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
312{
313   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
314   if (!syncobj)
315      return -1;
316
317   mtx_lock(&syncobj->mutex);
318   sim_syncobj_set_point_locked(syncobj, 0);
319   mtx_unlock(&syncobj->mutex);
320
321   return 0;
322}
323
324static int
325sim_syncobj_query(struct virtgpu *gpu,
326                  uint32_t syncobj_handle,
327                  uint64_t *point)
328{
329   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
330   if (!syncobj)
331      return -1;
332
333   mtx_lock(&syncobj->mutex);
334   sim_syncobj_update_point_locked(syncobj, 0);
335   *point = syncobj->point;
336   mtx_unlock(&syncobj->mutex);
337
338   return 0;
339}
340
341static int
342sim_syncobj_signal(struct virtgpu *gpu,
343                   uint32_t syncobj_handle,
344                   uint64_t point)
345{
346   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
347   if (!syncobj)
348      return -1;
349
350   mtx_lock(&syncobj->mutex);
351   sim_syncobj_set_point_locked(syncobj, point);
352   mtx_unlock(&syncobj->mutex);
353
354   return 0;
355}
356
357static int
358sim_syncobj_submit(struct virtgpu *gpu,
359                   uint32_t syncobj_handle,
360                   int sync_fd,
361                   uint64_t point,
362                   bool cpu)
363{
364   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
365   if (!syncobj)
366      return -1;
367
368   int pending_fd = dup(sync_fd);
369   if (pending_fd < 0) {
370      vn_log(gpu->instance, "failed to dup sync fd");
371      return -1;
372   }
373
374   mtx_lock(&syncobj->mutex);
375
376   if (syncobj->pending_fd >= 0) {
377      mtx_unlock(&syncobj->mutex);
378
379      /* TODO */
380      vn_log(gpu->instance, "sorry, no simulated timeline semaphore");
381      close(pending_fd);
382      return -1;
383   }
384   if (syncobj->point >= point)
385      vn_log(gpu->instance, "non-monotonic signaling");
386
387   syncobj->pending_fd = pending_fd;
388   syncobj->pending_point = point;
389   syncobj->pending_cpu = cpu;
390
391   mtx_unlock(&syncobj->mutex);
392
393   return 0;
394}
395
396static int
397timeout_to_poll_timeout(uint64_t timeout)
398{
399   const uint64_t ns_per_ms = 1000000;
400   const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
401   if (!ms && timeout)
402      return -1;
403   return ms <= INT_MAX ? ms : -1;
404}
405
406static int
407sim_syncobj_wait(struct virtgpu *gpu,
408                 const struct vn_renderer_wait *wait,
409                 bool wait_avail)
410{
411   if (wait_avail)
412      return -1;
413
414   const int poll_timeout = timeout_to_poll_timeout(wait->timeout);
415
416   /* TODO poll all fds at the same time */
417   for (uint32_t i = 0; i < wait->sync_count; i++) {
418      struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
419      const uint64_t point = wait->sync_values[i];
420
421      struct sim_syncobj *syncobj =
422         sim_syncobj_lookup(gpu, sync->syncobj_handle);
423      if (!syncobj)
424         return -1;
425
426      mtx_lock(&syncobj->mutex);
427
428      if (syncobj->point < point)
429         sim_syncobj_update_point_locked(syncobj, poll_timeout);
430
431      if (syncobj->point < point) {
432         if (wait->wait_any && i < wait->sync_count - 1 &&
433             syncobj->pending_fd < 0) {
434            mtx_unlock(&syncobj->mutex);
435            continue;
436         }
437         errno = ETIME;
438         mtx_unlock(&syncobj->mutex);
439         return -1;
440      }
441
442      mtx_unlock(&syncobj->mutex);
443
444      if (wait->wait_any)
445         break;
446
447      /* TODO adjust poll_timeout */
448   }
449
450   return 0;
451}
452
453static int
454sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle)
455{
456   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
457   if (!syncobj)
458      return -1;
459
460   int fd = -1;
461   mtx_lock(&syncobj->mutex);
462   if (syncobj->pending_fd >= 0)
463      fd = dup(syncobj->pending_fd);
464   else
465      fd = dup(sim.signaled_fd);
466   mtx_unlock(&syncobj->mutex);
467
468   return fd;
469}
470
471static uint32_t
472sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, int fd)
473{
474   struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
475   if (!syncobj)
476      return 0;
477
478   if (sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false))
479      return 0;
480
481   return syncobj_handle;
482}
483
484#endif /* SIMULATE_SYNCOBJ */
485
486#ifdef SIMULATE_SUBMIT
487
488static int
489sim_submit_signal_syncs(struct virtgpu *gpu,
490                        int sync_fd,
491                        struct vn_renderer_sync *const *syncs,
492                        const uint64_t *sync_values,
493                        uint32_t sync_count,
494                        bool cpu)
495{
496   for (uint32_t i = 0; i < sync_count; i++) {
497      struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i];
498      const uint64_t pending_point = sync_values[i];
499
500#ifdef SIMULATE_SYNCOBJ
501      int ret = sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd,
502                                   pending_point, cpu);
503      if (ret)
504         return ret;
505#else
506      /* we can in theory do a DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE followed by a
507       * DRM_IOCTL_SYNCOBJ_TRANSFER
508       */
509      return -1;
510#endif
511   }
512
513   return 0;
514}
515
516static uint32_t *
517sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
518                             uint32_t bo_count)
519{
520   uint32_t *gem_handles = malloc(sizeof(*gem_handles) * bo_count);
521   if (!gem_handles)
522      return NULL;
523
524   for (uint32_t i = 0; i < bo_count; i++) {
525      struct virtgpu_bo *bo = (struct virtgpu_bo *)bos[i];
526      gem_handles[i] = bo->gem_handle;
527   }
528
529   return gem_handles;
530}
531
532static int
533sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
534{
535   /* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
536   uint32_t *gem_handles = NULL;
537   if (submit->bo_count) {
538      gem_handles =
539         sim_submit_alloc_gem_handles(submit->bos, submit->bo_count);
540      if (!gem_handles)
541         return -1;
542   }
543
544   int ret = 0;
545   for (uint32_t i = 0; i < submit->batch_count; i++) {
546      const struct vn_renderer_submit_batch *batch = &submit->batches[i];
547
548      struct drm_virtgpu_execbuffer args = {
549         .flags = batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0,
550         .size = batch->cs_size,
551         .command = (uintptr_t)batch->cs_data,
552         .bo_handles = (uintptr_t)gem_handles,
553         .num_bo_handles = submit->bo_count,
554      };
555
556      ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
557      if (ret) {
558         vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
559         break;
560      }
561
562      if (batch->sync_count) {
563         ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
564                                       batch->sync_values, batch->sync_count,
565                                       batch->sync_queue_cpu);
566         close(args.fence_fd);
567         if (ret)
568            break;
569      }
570   }
571
572   if (!submit->batch_count && submit->bo_count) {
573      struct drm_virtgpu_execbuffer args = {
574         .bo_handles = (uintptr_t)gem_handles,
575         .num_bo_handles = submit->bo_count,
576      };
577
578      ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
579      if (ret)
580         vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
581   }
582
583   free(gem_handles);
584
585   return ret;
586}
587
588#endif /* SIMULATE_SUBMIT */
589
590static int
591virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args)
592{
593   return drmIoctl(gpu->fd, request, args);
594}
595
596static uint64_t
597virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param)
598{
599#ifdef SIMULATE_CONTEXT_INIT
600   if (param == VIRTGPU_PARAM_CONTEXT_INIT)
601      return 1;
602#endif
603#ifdef SIMULATE_SUBMIT
604   if (param == VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT)
605      return 16;
606#endif
607
608   /* val must be zeroed because kernel only writes the lower 32 bits */
609   uint64_t val = 0;
610   struct drm_virtgpu_getparam args = {
611      .param = param,
612      .value = (uintptr_t)&val,
613   };
614
615   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
616   return ret ? 0 : val;
617}
618
619static int
620virtgpu_ioctl_get_caps(struct virtgpu *gpu,
621                       enum virgl_renderer_capset id,
622                       uint32_t version,
623                       void *capset,
624                       size_t capset_size)
625{
626#ifdef SIMULATE_CONTEXT_INIT
627   if (id == VIRGL_RENDERER_CAPSET_VENUS && version == 0)
628      return 0;
629#endif
630
631   struct drm_virtgpu_get_caps args = {
632      .cap_set_id = id,
633      .cap_set_ver = version,
634      .addr = (uintptr_t)capset,
635      .size = capset_size,
636   };
637
638   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
639}
640
641static int
642virtgpu_ioctl_context_init(struct virtgpu *gpu,
643                           enum virgl_renderer_capset capset_id)
644{
645#ifdef SIMULATE_CONTEXT_INIT
646   if (capset_id == VIRGL_RENDERER_CAPSET_VENUS)
647      return 0;
648#endif
649
650   struct drm_virtgpu_context_init args = {
651      .num_params = 1,
652      .ctx_set_params = (uintptr_t) &
653                        (struct drm_virtgpu_context_set_param){
654                           .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
655                           .value = capset_id,
656                        },
657   };
658
659   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
660}
661
662static uint32_t
663virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu,
664                                   uint32_t blob_mem,
665                                   uint32_t blob_flags,
666                                   size_t blob_size,
667                                   uint64_t blob_id,
668                                   uint32_t *res_id)
669{
670#ifdef SIMULATE_BO_SIZE_FIX
671   blob_size = align64(blob_size, 4096);
672#endif
673
674   struct drm_virtgpu_resource_create_blob args = {
675      .blob_mem = blob_mem,
676      .blob_flags = blob_flags,
677      .size = blob_size,
678      .blob_id = blob_id,
679   };
680
681   if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args))
682      return 0;
683
684   *res_id = args.res_handle;
685   return args.bo_handle;
686}
687
688static int
689virtgpu_ioctl_resource_info(struct virtgpu *gpu,
690                            uint32_t gem_handle,
691                            struct drm_virtgpu_resource_info *info)
692{
693   *info = (struct drm_virtgpu_resource_info){
694      .bo_handle = gem_handle,
695   };
696
697   return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, info);
698}
699
700static void
701virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle)
702{
703   struct drm_gem_close args = {
704      .handle = gem_handle,
705   };
706
707   ASSERTED const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
708   assert(!ret);
709}
710
711static int
712virtgpu_ioctl_prime_handle_to_fd(struct virtgpu *gpu,
713                                 uint32_t gem_handle,
714                                 bool mappable)
715{
716   struct drm_prime_handle args = {
717      .handle = gem_handle,
718      .flags = DRM_CLOEXEC | (mappable ? DRM_RDWR : 0),
719   };
720
721   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
722   return ret ? -1 : args.fd;
723}
724
725static uint32_t
726virtgpu_ioctl_prime_fd_to_handle(struct virtgpu *gpu, int fd)
727{
728   struct drm_prime_handle args = {
729      .fd = fd,
730   };
731
732   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
733   return ret ? 0 : args.handle;
734}
735
736static void *
737virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size)
738{
739   struct drm_virtgpu_map args = {
740      .handle = gem_handle,
741   };
742
743   if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args))
744      return NULL;
745
746   void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd,
747                    args.offset);
748   if (ptr == MAP_FAILED)
749      return NULL;
750
751   return ptr;
752}
753
754static uint32_t
755virtgpu_ioctl_syncobj_create(struct virtgpu *gpu, bool signaled)
756{
757#ifdef SIMULATE_SYNCOBJ
758   return sim_syncobj_create(gpu, signaled);
759#endif
760
761   struct drm_syncobj_create args = {
762      .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0,
763   };
764
765   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_CREATE, &args);
766   return ret ? 0 : args.handle;
767}
768
769static void
770virtgpu_ioctl_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
771{
772#ifdef SIMULATE_SYNCOBJ
773   sim_syncobj_destroy(gpu, syncobj_handle);
774   return;
775#endif
776
777   struct drm_syncobj_destroy args = {
778      .handle = syncobj_handle,
779   };
780
781   ASSERTED const int ret =
782      virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
783   assert(!ret);
784}
785
786static int
787virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu *gpu,
788                                   uint32_t syncobj_handle,
789                                   bool sync_file)
790{
791#ifdef SIMULATE_SYNCOBJ
792   return sync_file ? sim_syncobj_export(gpu, syncobj_handle) : -1;
793#endif
794
795   struct drm_syncobj_handle args = {
796      .handle = syncobj_handle,
797      .flags =
798         sync_file ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0,
799   };
800
801   int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
802   if (ret)
803      return -1;
804
805   return args.fd;
806}
807
808static uint32_t
809virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu *gpu,
810                                   int fd,
811                                   uint32_t syncobj_handle)
812{
813#ifdef SIMULATE_SYNCOBJ
814   return syncobj_handle ? sim_syncobj_import(gpu, syncobj_handle, fd) : 0;
815#endif
816
817   struct drm_syncobj_handle args = {
818      .handle = syncobj_handle,
819      .flags =
820         syncobj_handle ? DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE : 0,
821      .fd = fd,
822   };
823
824   int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args);
825   if (ret)
826      return 0;
827
828   return args.handle;
829}
830
831static int
832virtgpu_ioctl_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
833{
834#ifdef SIMULATE_SYNCOBJ
835   return sim_syncobj_reset(gpu, syncobj_handle);
836#endif
837
838   struct drm_syncobj_array args = {
839      .handles = (uintptr_t)&syncobj_handle,
840      .count_handles = 1,
841   };
842
843   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_RESET, &args);
844}
845
846static int
847virtgpu_ioctl_syncobj_query(struct virtgpu *gpu,
848                            uint32_t syncobj_handle,
849                            uint64_t *point)
850{
851#ifdef SIMULATE_SYNCOBJ
852   return sim_syncobj_query(gpu, syncobj_handle, point);
853#endif
854
855   struct drm_syncobj_timeline_array args = {
856      .handles = (uintptr_t)&syncobj_handle,
857      .points = (uintptr_t)point,
858      .count_handles = 1,
859   };
860
861   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_QUERY, &args);
862}
863
864static int
865virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu *gpu,
866                                      uint32_t syncobj_handle,
867                                      uint64_t point)
868{
869#ifdef SIMULATE_SYNCOBJ
870   return sim_syncobj_signal(gpu, syncobj_handle, point);
871#endif
872
873   struct drm_syncobj_timeline_array args = {
874      .handles = (uintptr_t)&syncobj_handle,
875      .points = (uintptr_t)&point,
876      .count_handles = 1,
877   };
878
879   return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &args);
880}
881
882static int
883virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu *gpu,
884                                    const struct vn_renderer_wait *wait,
885                                    bool wait_avail)
886{
887#ifdef SIMULATE_SYNCOBJ
888   return sim_syncobj_wait(gpu, wait, wait_avail);
889#endif
890
891   /* always enable wait-before-submit */
892   uint32_t flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
893   if (!wait->wait_any)
894      flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
895   /* wait for fences to appear instead of signaling */
896   if (wait_avail)
897      flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE;
898
899   /* TODO replace wait->syncs by wait->sync_handles to avoid malloc/loop */
900   uint32_t *syncobj_handles =
901      malloc(sizeof(*syncobj_handles) * wait->sync_count);
902   if (!syncobj_handles)
903      return -1;
904   for (uint32_t i = 0; i < wait->sync_count; i++) {
905      struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
906      syncobj_handles[i] = sync->syncobj_handle;
907   }
908
909   struct drm_syncobj_timeline_wait args = {
910      .handles = (uintptr_t)syncobj_handles,
911      .points = (uintptr_t)wait->sync_values,
912      .timeout_nsec = os_time_get_absolute_timeout(wait->timeout),
913      .count_handles = wait->sync_count,
914      .flags = flags,
915   };
916
917   const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &args);
918
919   free(syncobj_handles);
920
921   return ret;
922}
923
924static int
925virtgpu_ioctl_submit(struct virtgpu *gpu,
926                     const struct vn_renderer_submit *submit)
927{
928#ifdef SIMULATE_SUBMIT
929   return sim_submit(gpu, submit);
930#endif
931   return -1;
932}
933
934static VkResult
935virtgpu_sync_write(struct vn_renderer *renderer,
936                   struct vn_renderer_sync *_sync,
937                   uint64_t val)
938{
939   struct virtgpu *gpu = (struct virtgpu *)renderer;
940   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
941
942   const int ret =
943      virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle, val);
944
945   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
946}
947
948static VkResult
949virtgpu_sync_read(struct vn_renderer *renderer,
950                  struct vn_renderer_sync *_sync,
951                  uint64_t *val)
952{
953   struct virtgpu *gpu = (struct virtgpu *)renderer;
954   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
955
956   const int ret =
957      virtgpu_ioctl_syncobj_query(gpu, sync->syncobj_handle, val);
958
959   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
960}
961
962static VkResult
963virtgpu_sync_reset(struct vn_renderer *renderer,
964                   struct vn_renderer_sync *_sync,
965                   uint64_t initial_val)
966{
967   struct virtgpu *gpu = (struct virtgpu *)renderer;
968   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
969
970   int ret = virtgpu_ioctl_syncobj_reset(gpu, sync->syncobj_handle);
971   if (!ret) {
972      ret = virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle,
973                                                  initial_val);
974   }
975
976   return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
977}
978
979static int
980virtgpu_sync_export_syncobj(struct vn_renderer *renderer,
981                            struct vn_renderer_sync *_sync,
982                            bool sync_file)
983{
984   struct virtgpu *gpu = (struct virtgpu *)renderer;
985   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
986
987   return virtgpu_ioctl_syncobj_handle_to_fd(gpu, sync->syncobj_handle,
988                                             sync_file);
989}
990
991static void
992virtgpu_sync_destroy(struct vn_renderer *renderer,
993                     struct vn_renderer_sync *_sync)
994{
995   struct virtgpu *gpu = (struct virtgpu *)renderer;
996   struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
997
998   virtgpu_ioctl_syncobj_destroy(gpu, sync->syncobj_handle);
999
1000   free(sync);
1001}
1002
1003static VkResult
1004virtgpu_sync_create_from_syncobj(struct vn_renderer *renderer,
1005                                 int fd,
1006                                 bool sync_file,
1007                                 struct vn_renderer_sync **out_sync)
1008{
1009   struct virtgpu *gpu = (struct virtgpu *)renderer;
1010
1011   uint32_t syncobj_handle;
1012   if (sync_file) {
1013      syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, false);
1014      if (!syncobj_handle)
1015         return VK_ERROR_OUT_OF_HOST_MEMORY;
1016      if (!virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, syncobj_handle)) {
1017         virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1018         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1019      }
1020   } else {
1021      syncobj_handle = virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, 0);
1022      if (!syncobj_handle)
1023         return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1024   }
1025
1026   struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
1027   if (!sync) {
1028      virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1029      return VK_ERROR_OUT_OF_HOST_MEMORY;
1030   }
1031
1032   sync->syncobj_handle = syncobj_handle;
1033   sync->base.sync_id = 0; /* TODO */
1034
1035   *out_sync = &sync->base;
1036
1037   return VK_SUCCESS;
1038}
1039
1040static VkResult
1041virtgpu_sync_create(struct vn_renderer *renderer,
1042                    uint64_t initial_val,
1043                    uint32_t flags,
1044                    struct vn_renderer_sync **out_sync)
1045{
1046   struct virtgpu *gpu = (struct virtgpu *)renderer;
1047
1048   /* TODO */
1049   if (flags & VN_RENDERER_SYNC_SHAREABLE)
1050      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1051
1052   /* always false because we don't use binary drm_syncobjs */
1053   const bool signaled = false;
1054   const uint32_t syncobj_handle =
1055      virtgpu_ioctl_syncobj_create(gpu, signaled);
1056   if (!syncobj_handle)
1057      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1058
1059   /* add a signaled fence chain with seqno initial_val */
1060   const int ret =
1061      virtgpu_ioctl_syncobj_timeline_signal(gpu, syncobj_handle, initial_val);
1062   if (ret) {
1063      virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1064      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1065   }
1066
1067   struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
1068   if (!sync) {
1069      virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1070      return VK_ERROR_OUT_OF_HOST_MEMORY;
1071   }
1072
1073   sync->syncobj_handle = syncobj_handle;
1074   /* we will have a sync_id when shareable is true and virtio-gpu associates
1075    * a host sync object with guest drm_syncobj
1076    */
1077   sync->base.sync_id = 0;
1078
1079   *out_sync = &sync->base;
1080
1081   return VK_SUCCESS;
1082}
1083
1084static void
1085virtgpu_bo_invalidate(struct vn_renderer *renderer,
1086                      struct vn_renderer_bo *bo,
1087                      VkDeviceSize offset,
1088                      VkDeviceSize size)
1089{
1090   /* nop because kernel makes every mapping coherent */
1091}
1092
1093static void
1094virtgpu_bo_flush(struct vn_renderer *renderer,
1095                 struct vn_renderer_bo *bo,
1096                 VkDeviceSize offset,
1097                 VkDeviceSize size)
1098{
1099   /* nop because kernel makes every mapping coherent */
1100}
1101
1102static void *
1103virtgpu_bo_map(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1104{
1105   struct virtgpu *gpu = (struct virtgpu *)renderer;
1106   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1107   const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1108
1109   /* not thread-safe but is fine */
1110   if (!bo->base.mmap_ptr && mappable) {
1111      bo->base.mmap_ptr =
1112         virtgpu_ioctl_map(gpu, bo->gem_handle, bo->base.mmap_size);
1113   }
1114
1115   return bo->base.mmap_ptr;
1116}
1117
1118static int
1119virtgpu_bo_export_dma_buf(struct vn_renderer *renderer,
1120                          struct vn_renderer_bo *_bo)
1121{
1122   struct virtgpu *gpu = (struct virtgpu *)renderer;
1123   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1124   const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1125   const bool shareable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1126
1127   return shareable
1128             ? virtgpu_ioctl_prime_handle_to_fd(gpu, bo->gem_handle, mappable)
1129             : -1;
1130}
1131
1132static bool
1133virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1134{
1135   struct virtgpu *gpu = (struct virtgpu *)renderer;
1136   struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1137
1138   mtx_lock(&gpu->dma_buf_import_mutex);
1139
1140   /* Check the refcount again after the import lock is grabbed.  Yes, we use
1141    * the double-checked locking anti-pattern.
1142    */
1143   if (vn_refcount_is_valid(&bo->base.refcount)) {
1144      mtx_unlock(&gpu->dma_buf_import_mutex);
1145      return false;
1146   }
1147
1148   if (bo->base.mmap_ptr)
1149      munmap(bo->base.mmap_ptr, bo->base.mmap_size);
1150   virtgpu_ioctl_gem_close(gpu, bo->gem_handle);
1151
1152   /* set gem_handle to 0 to indicate that the bo is invalid */
1153   bo->gem_handle = 0;
1154
1155   mtx_unlock(&gpu->dma_buf_import_mutex);
1156
1157   return true;
1158}
1159
1160static uint32_t
1161virtgpu_bo_blob_flags(VkMemoryPropertyFlags flags,
1162                      VkExternalMemoryHandleTypeFlags external_handles)
1163{
1164   uint32_t blob_flags = 0;
1165   if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1166      blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1167   if (external_handles)
1168      blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1169   if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)
1170      blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
1171
1172   return blob_flags;
1173}
1174
1175static VkResult
1176virtgpu_bo_create_from_dma_buf(struct vn_renderer *renderer,
1177                               VkDeviceSize size,
1178                               int fd,
1179                               VkMemoryPropertyFlags flags,
1180                               struct vn_renderer_bo **out_bo)
1181{
1182   struct virtgpu *gpu = (struct virtgpu *)renderer;
1183   struct drm_virtgpu_resource_info info;
1184   uint32_t gem_handle = 0;
1185   struct virtgpu_bo *bo = NULL;
1186
1187   mtx_lock(&gpu->dma_buf_import_mutex);
1188
1189   gem_handle = virtgpu_ioctl_prime_fd_to_handle(gpu, fd);
1190   if (!gem_handle)
1191      goto fail;
1192   bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1193
1194   if (virtgpu_ioctl_resource_info(gpu, gem_handle, &info))
1195      goto fail;
1196
1197   uint32_t blob_flags;
1198   size_t mmap_size;
1199   if (info.blob_mem) {
1200      /* must be VIRTGPU_BLOB_MEM_HOST3D or VIRTGPU_BLOB_MEM_GUEST_VRAM */
1201      if (info.blob_mem != gpu->bo_blob_mem)
1202         goto fail;
1203
1204      /* blob_flags is not passed to the kernel and is only for internal use
1205       * on imports.  Set it to what works best for us.
1206       */
1207      blob_flags = virtgpu_bo_blob_flags(flags, 0);
1208      blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1209
1210      /* mmap_size is only used when mappable */
1211      mmap_size = 0;
1212      if (blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE) {
1213         if (info.size < size)
1214            goto fail;
1215
1216         mmap_size = size;
1217      }
1218   } else {
1219      /* must be classic resource here
1220       * set blob_flags to 0 to fail virtgpu_bo_map
1221       * set mmap_size to 0 since mapping is not allowed
1222       */
1223      blob_flags = 0;
1224      mmap_size = 0;
1225   }
1226
1227   /* we check bo->gem_handle instead of bo->refcount because bo->refcount
1228    * might only be memset to 0 and is not considered initialized in theory
1229    */
1230   if (bo->gem_handle == gem_handle) {
1231      if (bo->base.mmap_size < mmap_size)
1232         goto fail;
1233      if (blob_flags & ~bo->blob_flags)
1234         goto fail;
1235
1236      /* we can't use vn_renderer_bo_ref as the refcount may drop to 0
1237       * temporarily before virtgpu_bo_destroy grabs the lock
1238       */
1239      vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1);
1240   } else {
1241      *bo = (struct virtgpu_bo){
1242         .base = {
1243            .refcount = VN_REFCOUNT_INIT(1),
1244            .res_id = info.res_handle,
1245            .mmap_size = mmap_size,
1246         },
1247         .gem_handle = gem_handle,
1248         .blob_flags = blob_flags,
1249      };
1250   }
1251
1252   mtx_unlock(&gpu->dma_buf_import_mutex);
1253
1254   *out_bo = &bo->base;
1255
1256   return VK_SUCCESS;
1257
1258fail:
1259   if (gem_handle && bo->gem_handle != gem_handle)
1260      virtgpu_ioctl_gem_close(gpu, gem_handle);
1261   mtx_unlock(&gpu->dma_buf_import_mutex);
1262   return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1263}
1264
1265static VkResult
1266virtgpu_bo_create_from_device_memory(
1267   struct vn_renderer *renderer,
1268   VkDeviceSize size,
1269   vn_object_id mem_id,
1270   VkMemoryPropertyFlags flags,
1271   VkExternalMemoryHandleTypeFlags external_handles,
1272   struct vn_renderer_bo **out_bo)
1273{
1274   struct virtgpu *gpu = (struct virtgpu *)renderer;
1275   const uint32_t blob_flags = virtgpu_bo_blob_flags(flags, external_handles);
1276
1277   uint32_t res_id;
1278   uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1279      gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id);
1280   if (!gem_handle)
1281      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1282
1283   struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1284   *bo = (struct virtgpu_bo){
1285      .base = {
1286         .refcount = VN_REFCOUNT_INIT(1),
1287         .res_id = res_id,
1288         .mmap_size = size,
1289      },
1290      .gem_handle = gem_handle,
1291      .blob_flags = blob_flags,
1292   };
1293
1294   *out_bo = &bo->base;
1295
1296   return VK_SUCCESS;
1297}
1298
1299static void
1300virtgpu_shmem_destroy_now(struct vn_renderer *renderer,
1301                          struct vn_renderer_shmem *_shmem)
1302{
1303   struct virtgpu *gpu = (struct virtgpu *)renderer;
1304   struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem;
1305
1306   munmap(shmem->base.mmap_ptr, shmem->base.mmap_size);
1307   virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
1308}
1309
1310static void
1311virtgpu_shmem_destroy(struct vn_renderer *renderer,
1312                      struct vn_renderer_shmem *shmem)
1313{
1314   struct virtgpu *gpu = (struct virtgpu *)renderer;
1315
1316   if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem))
1317      return;
1318
1319   virtgpu_shmem_destroy_now(&gpu->base, shmem);
1320}
1321
1322static struct vn_renderer_shmem *
1323virtgpu_shmem_create(struct vn_renderer *renderer, size_t size)
1324{
1325   struct virtgpu *gpu = (struct virtgpu *)renderer;
1326
1327   struct vn_renderer_shmem *cached_shmem =
1328      vn_renderer_shmem_cache_get(&gpu->shmem_cache, size);
1329   if (cached_shmem) {
1330      cached_shmem->refcount = VN_REFCOUNT_INIT(1);
1331      return cached_shmem;
1332   }
1333
1334   uint32_t res_id;
1335   uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1336      gpu, gpu->shmem_blob_mem, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0,
1337      &res_id);
1338   if (!gem_handle)
1339      return NULL;
1340
1341   void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
1342   if (!ptr) {
1343      virtgpu_ioctl_gem_close(gpu, gem_handle);
1344      return NULL;
1345   }
1346
1347   struct virtgpu_shmem *shmem =
1348      util_sparse_array_get(&gpu->shmem_array, gem_handle);
1349   *shmem = (struct virtgpu_shmem){
1350      .base = {
1351         .refcount = VN_REFCOUNT_INIT(1),
1352         .res_id = res_id,
1353         .mmap_size = size,
1354         .mmap_ptr = ptr,
1355      },
1356      .gem_handle = gem_handle,
1357   };
1358
1359   return &shmem->base;
1360}
1361
1362static VkResult
1363virtgpu_wait(struct vn_renderer *renderer,
1364             const struct vn_renderer_wait *wait)
1365{
1366   struct virtgpu *gpu = (struct virtgpu *)renderer;
1367
1368   const int ret = virtgpu_ioctl_syncobj_timeline_wait(gpu, wait, false);
1369   if (ret && errno != ETIME)
1370      return VK_ERROR_DEVICE_LOST;
1371
1372   return ret ? VK_TIMEOUT : VK_SUCCESS;
1373}
1374
1375static VkResult
1376virtgpu_submit(struct vn_renderer *renderer,
1377               const struct vn_renderer_submit *submit)
1378{
1379   struct virtgpu *gpu = (struct virtgpu *)renderer;
1380
1381   const int ret = virtgpu_ioctl_submit(gpu, submit);
1382   return ret ? VK_ERROR_DEVICE_LOST : VK_SUCCESS;
1383}
1384
1385static void
1386virtgpu_init_renderer_info(struct virtgpu *gpu)
1387{
1388   struct vn_renderer_info *info = &gpu->base.info;
1389
1390   info->drm.has_primary = gpu->has_primary;
1391   info->drm.primary_major = gpu->primary_major;
1392   info->drm.primary_minor = gpu->primary_minor;
1393   info->drm.has_render = true;
1394   info->drm.render_major = gpu->render_major;
1395   info->drm.render_minor = gpu->render_minor;
1396
1397   info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID;
1398   info->pci.device_id = VIRTGPU_PCI_DEVICE_ID;
1399
1400   if (gpu->bustype == DRM_BUS_PCI) {
1401      info->pci.has_bus_info = true;
1402      info->pci.domain = gpu->pci_bus_info.domain;
1403      info->pci.bus = gpu->pci_bus_info.bus;
1404      info->pci.device = gpu->pci_bus_info.dev;
1405      info->pci.function = gpu->pci_bus_info.func;
1406   } else {
1407      info->pci.has_bus_info = false;
1408   }
1409
1410   info->has_dma_buf_import = true;
1411   /* Kernel makes every mapping coherent.  We are better off filtering
1412    * incoherent memory types out than silently making them coherent.
1413    */
1414   info->has_cache_management = false;
1415   /* TODO drm_syncobj */
1416   info->has_external_sync = false;
1417
1418   info->has_implicit_fencing = false;
1419
1420   info->max_sync_queue_count = gpu->max_sync_queue_count;
1421
1422   const struct virgl_renderer_capset_venus *capset = &gpu->capset.data;
1423   info->wire_format_version = capset->wire_format_version;
1424   info->vk_xml_version = capset->vk_xml_version;
1425   info->vk_ext_command_serialization_spec_version =
1426      capset->vk_ext_command_serialization_spec_version;
1427   info->vk_mesa_venus_protocol_spec_version =
1428      capset->vk_mesa_venus_protocol_spec_version;
1429   info->supports_blob_id_0 = capset->supports_blob_id_0;
1430
1431   /* ensure vk_extension_mask is large enough to hold all capset masks */
1432   STATIC_ASSERT(sizeof(info->vk_extension_mask) >=
1433                 sizeof(capset->vk_extension_mask1));
1434   memcpy(info->vk_extension_mask, capset->vk_extension_mask1,
1435          sizeof(capset->vk_extension_mask1));
1436
1437   info->allow_vk_wait_syncs = capset->allow_vk_wait_syncs;
1438
1439   if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_GUEST_VRAM)
1440      info->has_guest_vram = true;
1441}
1442
1443static void
1444virtgpu_destroy(struct vn_renderer *renderer,
1445                const VkAllocationCallbacks *alloc)
1446{
1447   struct virtgpu *gpu = (struct virtgpu *)renderer;
1448
1449   vn_renderer_shmem_cache_fini(&gpu->shmem_cache);
1450
1451   if (gpu->fd >= 0)
1452      close(gpu->fd);
1453
1454   mtx_destroy(&gpu->dma_buf_import_mutex);
1455
1456   util_sparse_array_finish(&gpu->shmem_array);
1457   util_sparse_array_finish(&gpu->bo_array);
1458
1459   vk_free(alloc, gpu);
1460}
1461
1462static void
1463virtgpu_init_shmem_blob_mem(struct virtgpu *gpu)
1464{
1465   /* VIRTGPU_BLOB_MEM_GUEST allocates from the guest system memory.  They are
1466    * logically contiguous in the guest but are sglists (iovecs) in the host.
1467    * That makes them slower to process in the host.  With host process
1468    * isolation, it also becomes impossible for the host to access sglists
1469    * directly.
1470    *
1471    * While there are ideas (and shipped code in some cases) such as creating
1472    * udmabufs from sglists, or having a dedicated guest heap, it seems the
1473    * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D.  That is, when the
1474    * renderer sees a request to export a blob where
1475    *
1476    *  - blob_mem is VIRTGPU_BLOB_MEM_HOST3D
1477    *  - blob_flags is VIRTGPU_BLOB_FLAG_USE_MAPPABLE
1478    *  - blob_id is 0
1479    *
1480    * it allocates a host shmem.
1481    *
1482    * TODO cache shmems as they are costly to set up and usually require syncs
1483    */
1484   gpu->shmem_blob_mem = gpu->capset.data.supports_blob_id_0
1485                            ? VIRTGPU_BLOB_MEM_HOST3D
1486                            : VIRTGPU_BLOB_MEM_GUEST;
1487}
1488
1489static VkResult
1490virtgpu_init_context(struct virtgpu *gpu)
1491{
1492   assert(!gpu->capset.version);
1493   const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
1494   if (ret) {
1495      if (VN_DEBUG(INIT)) {
1496         vn_log(gpu->instance, "failed to initialize context: %s",
1497                strerror(errno));
1498      }
1499      return VK_ERROR_INITIALIZATION_FAILED;
1500   }
1501
1502   return VK_SUCCESS;
1503}
1504
1505static VkResult
1506virtgpu_init_capset(struct virtgpu *gpu)
1507{
1508   gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
1509   gpu->capset.version = 0;
1510
1511   const int ret =
1512      virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version,
1513                             &gpu->capset.data, sizeof(gpu->capset.data));
1514   if (ret) {
1515      if (VN_DEBUG(INIT)) {
1516         vn_log(gpu->instance, "failed to get venus v%d capset: %s",
1517                gpu->capset.version, strerror(errno));
1518      }
1519      return VK_ERROR_INITIALIZATION_FAILED;
1520   }
1521
1522   return VK_SUCCESS;
1523}
1524
1525static VkResult
1526virtgpu_init_params(struct virtgpu *gpu)
1527{
1528   const uint64_t required_params[] = {
1529      VIRTGPU_PARAM_3D_FEATURES,   VIRTGPU_PARAM_CAPSET_QUERY_FIX,
1530      VIRTGPU_PARAM_RESOURCE_BLOB, VIRTGPU_PARAM_CROSS_DEVICE,
1531      VIRTGPU_PARAM_CONTEXT_INIT,
1532   };
1533   uint64_t val;
1534   for (uint32_t i = 0; i < ARRAY_SIZE(required_params); i++) {
1535      val = virtgpu_ioctl_getparam(gpu, required_params[i]);
1536      if (!val) {
1537         if (VN_DEBUG(INIT)) {
1538            vn_log(gpu->instance, "required kernel param %d is missing",
1539                   (int)required_params[i]);
1540         }
1541         return VK_ERROR_INITIALIZATION_FAILED;
1542      }
1543   }
1544
1545   val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_HOST_VISIBLE);
1546   if (val) {
1547      gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1548   } else {
1549      val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_GUEST_VRAM);
1550      if (val) {
1551         gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_GUEST_VRAM;
1552      }
1553   }
1554
1555   if (!val) {
1556      vn_log(gpu->instance,
1557             "one of required kernel params (%d or %d) is missing",
1558             (int)VIRTGPU_PARAM_HOST_VISIBLE, (int)VIRTGPU_PARAM_GUEST_VRAM);
1559      return VK_ERROR_INITIALIZATION_FAILED;
1560   }
1561
1562   val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_MAX_SYNC_QUEUE_COUNT);
1563   if (!val) {
1564      if (VN_DEBUG(INIT))
1565         vn_log(gpu->instance, "no sync queue support");
1566      return VK_ERROR_INITIALIZATION_FAILED;
1567   }
1568   gpu->max_sync_queue_count = val;
1569
1570   return VK_SUCCESS;
1571}
1572
1573static VkResult
1574virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev)
1575{
1576   bool supported_bus = false;
1577
1578   switch (dev->bustype) {
1579   case DRM_BUS_PCI:
1580      if (dev->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID &&
1581         dev->deviceinfo.pci->device_id == VIRTGPU_PCI_DEVICE_ID)
1582         supported_bus = true;
1583      break;
1584   case DRM_BUS_PLATFORM:
1585      supported_bus = true;
1586      break;
1587   default:
1588      break;
1589   }
1590
1591   if (!supported_bus || !(dev->available_nodes & (1 << DRM_NODE_RENDER))) {
1592      if (VN_DEBUG(INIT)) {
1593         const char *name = "unknown";
1594         for (uint32_t i = 0; i < DRM_NODE_MAX; i++) {
1595            if (dev->available_nodes & (1 << i)) {
1596               name = dev->nodes[i];
1597               break;
1598            }
1599         }
1600         vn_log(gpu->instance, "skipping DRM device %s", name);
1601      }
1602      return VK_ERROR_INITIALIZATION_FAILED;
1603   }
1604
1605   const char *primary_path = dev->nodes[DRM_NODE_PRIMARY];
1606   const char *node_path = dev->nodes[DRM_NODE_RENDER];
1607
1608   int fd = open(node_path, O_RDWR | O_CLOEXEC);
1609   if (fd < 0) {
1610      if (VN_DEBUG(INIT))
1611         vn_log(gpu->instance, "failed to open %s", node_path);
1612      return VK_ERROR_INITIALIZATION_FAILED;
1613   }
1614
1615   drmVersionPtr version = drmGetVersion(fd);
1616   if (!version || strcmp(version->name, "virtio_gpu") ||
1617       version->version_major != 0) {
1618      if (VN_DEBUG(INIT)) {
1619         if (version) {
1620            vn_log(gpu->instance, "unknown DRM driver %s version %d",
1621                   version->name, version->version_major);
1622         } else {
1623            vn_log(gpu->instance, "failed to get DRM driver version");
1624         }
1625      }
1626      if (version)
1627         drmFreeVersion(version);
1628      close(fd);
1629      return VK_ERROR_INITIALIZATION_FAILED;
1630   }
1631
1632   gpu->fd = fd;
1633
1634   struct stat st;
1635   if (stat(primary_path, &st) == 0) {
1636      gpu->has_primary = true;
1637      gpu->primary_major = major(st.st_rdev);
1638      gpu->primary_minor = minor(st.st_rdev);
1639   } else {
1640      gpu->has_primary = false;
1641      gpu->primary_major = 0;
1642      gpu->primary_minor = 0;
1643   }
1644   stat(node_path, &st);
1645   gpu->render_major = major(st.st_rdev);
1646   gpu->render_minor = minor(st.st_rdev);
1647
1648   gpu->bustype = dev->bustype;
1649   if (dev->bustype == DRM_BUS_PCI)
1650      gpu->pci_bus_info = *dev->businfo.pci;
1651
1652   drmFreeVersion(version);
1653
1654   if (VN_DEBUG(INIT))
1655      vn_log(gpu->instance, "using DRM device %s", node_path);
1656
1657   return VK_SUCCESS;
1658}
1659
1660static VkResult
1661virtgpu_open(struct virtgpu *gpu)
1662{
1663   drmDevicePtr devs[8];
1664   int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
1665   if (count < 0) {
1666      if (VN_DEBUG(INIT))
1667         vn_log(gpu->instance, "failed to enumerate DRM devices");
1668      return VK_ERROR_INITIALIZATION_FAILED;
1669   }
1670
1671   VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1672   for (int i = 0; i < count; i++) {
1673      result = virtgpu_open_device(gpu, devs[i]);
1674      if (result == VK_SUCCESS)
1675         break;
1676   }
1677
1678   drmFreeDevices(devs, count);
1679
1680   return result;
1681}
1682
1683static VkResult
1684virtgpu_init(struct virtgpu *gpu)
1685{
1686   util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem),
1687                          1024);
1688   util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024);
1689
1690   mtx_init(&gpu->dma_buf_import_mutex, mtx_plain);
1691
1692   VkResult result = virtgpu_open(gpu);
1693   if (result == VK_SUCCESS)
1694      result = virtgpu_init_params(gpu);
1695   if (result == VK_SUCCESS)
1696      result = virtgpu_init_capset(gpu);
1697   if (result == VK_SUCCESS)
1698      result = virtgpu_init_context(gpu);
1699   if (result != VK_SUCCESS)
1700      return result;
1701
1702   virtgpu_init_shmem_blob_mem(gpu);
1703
1704   vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base,
1705                                virtgpu_shmem_destroy_now);
1706
1707   virtgpu_init_renderer_info(gpu);
1708
1709   gpu->base.ops.destroy = virtgpu_destroy;
1710   gpu->base.ops.submit = virtgpu_submit;
1711   gpu->base.ops.wait = virtgpu_wait;
1712
1713   gpu->base.shmem_ops.create = virtgpu_shmem_create;
1714   gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy;
1715
1716   gpu->base.bo_ops.create_from_device_memory =
1717      virtgpu_bo_create_from_device_memory;
1718   gpu->base.bo_ops.create_from_dma_buf = virtgpu_bo_create_from_dma_buf;
1719   gpu->base.bo_ops.destroy = virtgpu_bo_destroy;
1720   gpu->base.bo_ops.export_dma_buf = virtgpu_bo_export_dma_buf;
1721   gpu->base.bo_ops.map = virtgpu_bo_map;
1722   gpu->base.bo_ops.flush = virtgpu_bo_flush;
1723   gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate;
1724
1725   gpu->base.sync_ops.create = virtgpu_sync_create;
1726   gpu->base.sync_ops.create_from_syncobj = virtgpu_sync_create_from_syncobj;
1727   gpu->base.sync_ops.destroy = virtgpu_sync_destroy;
1728   gpu->base.sync_ops.export_syncobj = virtgpu_sync_export_syncobj;
1729   gpu->base.sync_ops.reset = virtgpu_sync_reset;
1730   gpu->base.sync_ops.read = virtgpu_sync_read;
1731   gpu->base.sync_ops.write = virtgpu_sync_write;
1732
1733   return VK_SUCCESS;
1734}
1735
1736VkResult
1737vn_renderer_create_virtgpu(struct vn_instance *instance,
1738                           const VkAllocationCallbacks *alloc,
1739                           struct vn_renderer **renderer)
1740{
1741   struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN,
1742                                   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1743   if (!gpu)
1744      return VK_ERROR_OUT_OF_HOST_MEMORY;
1745
1746   gpu->instance = instance;
1747   gpu->fd = -1;
1748
1749   VkResult result = virtgpu_init(gpu);
1750   if (result != VK_SUCCESS) {
1751      virtgpu_destroy(&gpu->base, alloc);
1752      return result;
1753   }
1754
1755   *renderer = &gpu->base;
1756
1757   return VK_SUCCESS;
1758}
1759