1/*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23#ifndef VK_DEVICE_H
24#define VK_DEVICE_H
25
26#include "vk_dispatch_table.h"
27#include "vk_extensions.h"
28#include "vk_object.h"
29
30#include "util/list.h"
31#include "util/u_atomic.h"
32
33#ifdef __cplusplus
34extern "C" {
35#endif
36
37struct vk_sync;
38
39enum vk_queue_submit_mode {
40   /** Submits happen immediately
41    *
42    * `vkQueueSubmit()` and `vkQueueBindSparse()` call
43    * `vk_queue::driver_submit` directly for all submits and the last call to
44    * `vk_queue::driver_submit` will have completed by the time
45    * `vkQueueSubmit()` or `vkQueueBindSparse()` return.
46    */
47   VK_QUEUE_SUBMIT_MODE_IMMEDIATE,
48
49   /** Submits may be deferred until a future `vk_queue_flush()`
50    *
51    * Submits are added to the queue and `vk_queue_flush()` is called.
52    * However, any submits with unsatisfied dependencies will be left on the
53    * queue until a future `vk_queue_flush()` call.  This is used for
54    * implementing emulated timeline semaphores without threading.
55    */
56   VK_QUEUE_SUBMIT_MODE_DEFERRED,
57
58   /** Submits will be added to the queue and handled later by a thread
59    *
60    * This places additional requirements on the vk_sync types used by the
61    * driver:
62    *
63    *    1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also
64    *       support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can
65    *       sort out when a given submit has all its dependencies resolved.
66    *
67    *    2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT`
68    *       also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset
69    *       semaphores after waiting on them.
70    *
71    *    3. All vk_sync types used as permanent payloads of semaphores support
72    *       `vk_sync_type::move` so that it can move the pending signal into a
73    *       temporary vk_sync and reset the semaphore.
74    *
75    * This is requied for shared timeline semaphores where we need to handle
76    * wait-before-signal by threading in the driver if we ever see an
77    * unresolve dependency.
78    */
79   VK_QUEUE_SUBMIT_MODE_THREADED,
80
81   /** Threaded but only if we need it to resolve dependencies
82    *
83    * This imposes all the same requirements on `vk_sync` types as
84    * `VK_QUEUE_SUBMIT_MODE_THREADED`.
85    */
86   VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND,
87};
88
89/** Base struct for VkDevice */
90struct vk_device {
91   struct vk_object_base base;
92
93   /** Allocator used to create this device
94    *
95    * This is used as a fall-back for when a NULL pAllocator is passed into a
96    * device-level create function such as vkCreateImage().
97    */
98   VkAllocationCallbacks alloc;
99
100   /** Pointer to the physical device */
101   struct vk_physical_device *physical;
102
103   /** Table of enabled extensions */
104   struct vk_device_extension_table enabled_extensions;
105
106   struct {
107      bool robustBufferAccess;
108   } enabled_features;
109
110   /** Device-level dispatch table */
111   struct vk_device_dispatch_table dispatch_table;
112
113   /** Command dispatch table
114    *
115    * This is used for emulated secondary command buffer support.  To use
116    * emulated (trace/replay) secondary command buffers:
117    *
118    *  1. Provide your "real" command buffer dispatch table here.  Because
119    *     this doesn't get populated by vk_device_init(), the driver will have
120    *     to add the vk_common entrypoints to this table itself.
121    *
122    *  2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device
123    *     level dispatch table.
124    */
125   const struct vk_device_dispatch_table *command_dispatch_table;
126
127   /* For VK_EXT_private_data */
128   uint32_t private_data_next_index;
129
130   struct list_head queues;
131
132   struct {
133      int lost;
134      bool reported;
135   } _lost;
136
137   /** Checks the status of this device
138    *
139    * This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
140    * It is called before vk_queue::driver_submit and after every non-trivial
141    * wait operation to ensure the device is still around.  This gives the
142    * driver a hook to ask the kernel if its device is still valid.  If the
143    * kernel says the device has been lost, it MUST call vk_device_set_lost().
144    *
145    * This function may be called from any thread at any time.
146    */
147   VkResult (*check_status)(struct vk_device *device);
148
149   /** Creates a vk_sync that wraps a memory object
150    *
151    * This is always a one-shot object so it need not track any additional
152    * state.  Since it's intended for synchronizing between processes using
153    * implicit synchronization mechanisms, no such tracking would be valid
154    * anyway.
155    *
156    * If `signal_memory` is set, the resulting vk_sync will be used to signal
157    * the memory object from a queue via vk_queue_submit::signals.  The common
158    * code guarantees that, by the time vkQueueSubmit() returns, the signal
159    * operation has been submitted to the kernel via the driver's
160    * vk_queue::driver_submit hook.  This means that any vkQueueSubmit() call
161    * which needs implicit synchronization may block.
162    *
163    * If `signal_memory` is not set, it can be assumed that memory object
164    * already has a signal operation pending from some other process and we
165    * need only wait on it.
166    */
167   VkResult (*create_sync_for_memory)(struct vk_device *device,
168                                      VkDeviceMemory memory,
169                                      bool signal_memory,
170                                      struct vk_sync **sync_out);
171
172   /* Set by vk_device_set_drm_fd() */
173   int drm_fd;
174
175   /** An enum describing how timeline semaphores work */
176   enum vk_device_timeline_mode {
177      /** Timeline semaphores are not supported */
178      VK_DEVICE_TIMELINE_MODE_NONE,
179
180      /** Timeline semaphores are emulated with vk_timeline
181       *
182       * In this mode, timeline semaphores are emulated using vk_timeline
183       * which is a collection of binary semaphores, one per time point.
184       * These timeline semaphores cannot be shared because the data structure
185       * exists entirely in userspace.  These timelines are virtually
186       * invisible to the driver; all it sees are the binary vk_syncs, one per
187       * time point.
188       *
189       * To handle wait-before-signal, we place all vk_queue_submits in the
190       * queue's submit list in vkQueueSubmit() and call vk_device_flush() at
191       * key points such as the end of vkQueueSubmit() and vkSemaphoreSignal().
192       * This ensures that, as soon as a given submit's dependencies are fully
193       * resolvable, it gets submitted to the driver.
194       */
195      VK_DEVICE_TIMELINE_MODE_EMULATED,
196
197      /** Timeline semaphores are a kernel-assisted emulation
198       *
199       * In this mode, timeline semaphores are still technically an emulation
200       * in the sense that they don't support wait-before-signal natively.
201       * Instead, all GPU-waitable objects support a CPU wait-for-pending
202       * operation which lets the userspace driver wait until a given event
203       * on the (possibly shared) vk_sync is pending.  The event is "pending"
204       * if a job has been submitted to the kernel (possibly from a different
205       * process) which will signal it.  In vkQueueSubit, we use this wait
206       * mode to detect waits which are not yet pending and, the first time we
207       * do, spawn a thread to manage the queue.  That thread waits for each
208       * submit's waits to all be pending before submitting to the driver
209       * queue.
210       *
211       * We have to be a bit more careful about a few things in this mode.
212       * In particular, we can never assume that any given wait operation is
213       * pending.  For instance, when we go to export a sync file from a
214       * binary semaphore, we need to first wait for it to be pending.  The
215       * spec guarantees that the vast majority of these waits return almost
216       * immediately, but we do need to insert them for correctness.
217       */
218      VK_DEVICE_TIMELINE_MODE_ASSISTED,
219
220      /** Timeline semaphores are 100% native
221       *
222       * In this mode, wait-before-signal is natively supported by the
223       * underlying timeline implementation.  We can submit-and-forget and
224       * assume that dependencies will get resolved for us by the kernel.
225       * Currently, this isn't supported by any Linux primitives.
226       */
227      VK_DEVICE_TIMELINE_MODE_NATIVE,
228   } timeline_mode;
229
230   /** Per-device submit mode
231    *
232    * This represents the device-wide submit strategy which may be different
233    * from the per-queue submit mode.  See vk_queue.submit.mode for more
234    * details.
235    */
236   enum vk_queue_submit_mode submit_mode;
237
238#ifdef ANDROID
239   mtx_t swapchain_private_mtx;
240   struct hash_table *swapchain_private;
241#endif
242};
243
244VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice,
245                       VK_OBJECT_TYPE_DEVICE);
246
247/** Initialize a vk_device
248 *
249 * Along with initializing the data structures in `vk_device`, this function
250 * checks that every extension specified by
251 * `VkInstanceCreateInfo::ppEnabledExtensionNames` is actually supported by
252 * the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an
253 * unsupported extension is requested.  It also checks all the feature struct
254 * chained into the `pCreateInfo->pNext` chain against the features returned
255 * by `vkGetPhysicalDeviceFeatures2` and returns
256 * `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested.
257 *
258 * @param[out] device               The device to initialize
259 * @param[in]  physical_device      The physical device
260 * @param[in]  dispatch_table       Device-level dispatch table
261 * @param[in]  pCreateInfo          VkDeviceCreateInfo pointer passed to
262 *                                  `vkCreateDevice()`
263 * @param[in]  alloc                Allocation callbacks passed to
264 *                                  `vkCreateDevice()`
265 */
266VkResult MUST_CHECK
267vk_device_init(struct vk_device *device,
268               struct vk_physical_device *physical_device,
269               const struct vk_device_dispatch_table *dispatch_table,
270               const VkDeviceCreateInfo *pCreateInfo,
271               const VkAllocationCallbacks *alloc);
272
273static inline void
274vk_device_set_drm_fd(struct vk_device *device, int drm_fd)
275{
276   device->drm_fd = drm_fd;
277}
278
279/** Tears down a vk_device
280 *
281 * @param[out] device               The device to tear down
282 */
283void
284vk_device_finish(struct vk_device *device);
285
286/** Enables threaded submit on this device
287 *
288 * This doesn't ensure that threaded submit will be used.  It just disables
289 * the deferred submit option for emulated timeline semaphores and forces them
290 * to always use the threaded path.  It also does some checks that the vk_sync
291 * types used by the driver work for threaded submit.
292 *
293 * This must be called before any queues are created.
294 */
295void vk_device_enable_threaded_submit(struct vk_device *device);
296
297static inline bool
298vk_device_supports_threaded_submit(const struct vk_device *device)
299{
300   return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED ||
301          device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
302}
303
304VkResult vk_device_flush(struct vk_device *device);
305
306VkResult PRINTFLIKE(4, 5)
307_vk_device_set_lost(struct vk_device *device,
308                    const char *file, int line,
309                    const char *msg, ...);
310
311#define vk_device_set_lost(device, ...) \
312   _vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__)
313
314void _vk_device_report_lost(struct vk_device *device);
315
316static inline bool
317vk_device_is_lost_no_report(struct vk_device *device)
318{
319   return p_atomic_read(&device->_lost.lost) > 0;
320}
321
322static inline bool
323vk_device_is_lost(struct vk_device *device)
324{
325   int lost = vk_device_is_lost_no_report(device);
326   if (unlikely(lost && !device->_lost.reported))
327      _vk_device_report_lost(device);
328   return lost;
329}
330
331static inline VkResult
332vk_device_check_status(struct vk_device *device)
333{
334   if (vk_device_is_lost(device))
335      return VK_ERROR_DEVICE_LOST;
336
337   if (!device->check_status)
338      return VK_SUCCESS;
339
340   VkResult result = device->check_status(device);
341
342   assert(result == VK_SUCCESS || result == VK_ERROR_DEVICE_LOST);
343   if (result == VK_ERROR_DEVICE_LOST)
344      assert(vk_device_is_lost_no_report(device));
345
346   return result;
347}
348
349PFN_vkVoidFunction
350vk_device_get_proc_addr(const struct vk_device *device,
351                        const char *name);
352
353bool vk_get_physical_device_core_1_1_feature_ext(struct VkBaseOutStructure *ext,
354                                                 const VkPhysicalDeviceVulkan11Features *core);
355bool vk_get_physical_device_core_1_2_feature_ext(struct VkBaseOutStructure *ext,
356                                                 const VkPhysicalDeviceVulkan12Features *core);
357bool vk_get_physical_device_core_1_3_feature_ext(struct VkBaseOutStructure *ext,
358                                                 const VkPhysicalDeviceVulkan13Features *core);
359
360bool vk_get_physical_device_core_1_1_property_ext(struct VkBaseOutStructure *ext,
361                                                     const VkPhysicalDeviceVulkan11Properties *core);
362bool vk_get_physical_device_core_1_2_property_ext(struct VkBaseOutStructure *ext,
363                                                     const VkPhysicalDeviceVulkan12Properties *core);
364bool vk_get_physical_device_core_1_3_property_ext(struct VkBaseOutStructure *ext,
365                                                     const VkPhysicalDeviceVulkan13Properties *core);
366
367#ifdef __cplusplus
368}
369#endif
370
371#endif /* VK_DEVICE_H */
372