1/* 2 * Copyright © 2020 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23#ifndef VK_DEVICE_H 24#define VK_DEVICE_H 25 26#include "vk_dispatch_table.h" 27#include "vk_extensions.h" 28#include "vk_object.h" 29 30#include "util/list.h" 31#include "util/u_atomic.h" 32 33#ifdef __cplusplus 34extern "C" { 35#endif 36 37struct vk_sync; 38 39enum vk_queue_submit_mode { 40 /** Submits happen immediately 41 * 42 * `vkQueueSubmit()` and `vkQueueBindSparse()` call 43 * `vk_queue::driver_submit` directly for all submits and the last call to 44 * `vk_queue::driver_submit` will have completed by the time 45 * `vkQueueSubmit()` or `vkQueueBindSparse()` return. 46 */ 47 VK_QUEUE_SUBMIT_MODE_IMMEDIATE, 48 49 /** Submits may be deferred until a future `vk_queue_flush()` 50 * 51 * Submits are added to the queue and `vk_queue_flush()` is called. 52 * However, any submits with unsatisfied dependencies will be left on the 53 * queue until a future `vk_queue_flush()` call. This is used for 54 * implementing emulated timeline semaphores without threading. 55 */ 56 VK_QUEUE_SUBMIT_MODE_DEFERRED, 57 58 /** Submits will be added to the queue and handled later by a thread 59 * 60 * This places additional requirements on the vk_sync types used by the 61 * driver: 62 * 63 * 1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also 64 * support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can 65 * sort out when a given submit has all its dependencies resolved. 66 * 67 * 2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` 68 * also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset 69 * semaphores after waiting on them. 70 * 71 * 3. All vk_sync types used as permanent payloads of semaphores support 72 * `vk_sync_type::move` so that it can move the pending signal into a 73 * temporary vk_sync and reset the semaphore. 74 * 75 * This is requied for shared timeline semaphores where we need to handle 76 * wait-before-signal by threading in the driver if we ever see an 77 * unresolve dependency. 78 */ 79 VK_QUEUE_SUBMIT_MODE_THREADED, 80 81 /** Threaded but only if we need it to resolve dependencies 82 * 83 * This imposes all the same requirements on `vk_sync` types as 84 * `VK_QUEUE_SUBMIT_MODE_THREADED`. 85 */ 86 VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND, 87}; 88 89/** Base struct for VkDevice */ 90struct vk_device { 91 struct vk_object_base base; 92 93 /** Allocator used to create this device 94 * 95 * This is used as a fall-back for when a NULL pAllocator is passed into a 96 * device-level create function such as vkCreateImage(). 97 */ 98 VkAllocationCallbacks alloc; 99 100 /** Pointer to the physical device */ 101 struct vk_physical_device *physical; 102 103 /** Table of enabled extensions */ 104 struct vk_device_extension_table enabled_extensions; 105 106 struct { 107 bool robustBufferAccess; 108 } enabled_features; 109 110 /** Device-level dispatch table */ 111 struct vk_device_dispatch_table dispatch_table; 112 113 /** Command dispatch table 114 * 115 * This is used for emulated secondary command buffer support. To use 116 * emulated (trace/replay) secondary command buffers: 117 * 118 * 1. Provide your "real" command buffer dispatch table here. Because 119 * this doesn't get populated by vk_device_init(), the driver will have 120 * to add the vk_common entrypoints to this table itself. 121 * 122 * 2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device 123 * level dispatch table. 124 */ 125 const struct vk_device_dispatch_table *command_dispatch_table; 126 127 /* For VK_EXT_private_data */ 128 uint32_t private_data_next_index; 129 130 struct list_head queues; 131 132 struct { 133 int lost; 134 bool reported; 135 } _lost; 136 137 /** Checks the status of this device 138 * 139 * This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST. 140 * It is called before vk_queue::driver_submit and after every non-trivial 141 * wait operation to ensure the device is still around. This gives the 142 * driver a hook to ask the kernel if its device is still valid. If the 143 * kernel says the device has been lost, it MUST call vk_device_set_lost(). 144 * 145 * This function may be called from any thread at any time. 146 */ 147 VkResult (*check_status)(struct vk_device *device); 148 149 /** Creates a vk_sync that wraps a memory object 150 * 151 * This is always a one-shot object so it need not track any additional 152 * state. Since it's intended for synchronizing between processes using 153 * implicit synchronization mechanisms, no such tracking would be valid 154 * anyway. 155 * 156 * If `signal_memory` is set, the resulting vk_sync will be used to signal 157 * the memory object from a queue via vk_queue_submit::signals. The common 158 * code guarantees that, by the time vkQueueSubmit() returns, the signal 159 * operation has been submitted to the kernel via the driver's 160 * vk_queue::driver_submit hook. This means that any vkQueueSubmit() call 161 * which needs implicit synchronization may block. 162 * 163 * If `signal_memory` is not set, it can be assumed that memory object 164 * already has a signal operation pending from some other process and we 165 * need only wait on it. 166 */ 167 VkResult (*create_sync_for_memory)(struct vk_device *device, 168 VkDeviceMemory memory, 169 bool signal_memory, 170 struct vk_sync **sync_out); 171 172 /* Set by vk_device_set_drm_fd() */ 173 int drm_fd; 174 175 /** An enum describing how timeline semaphores work */ 176 enum vk_device_timeline_mode { 177 /** Timeline semaphores are not supported */ 178 VK_DEVICE_TIMELINE_MODE_NONE, 179 180 /** Timeline semaphores are emulated with vk_timeline 181 * 182 * In this mode, timeline semaphores are emulated using vk_timeline 183 * which is a collection of binary semaphores, one per time point. 184 * These timeline semaphores cannot be shared because the data structure 185 * exists entirely in userspace. These timelines are virtually 186 * invisible to the driver; all it sees are the binary vk_syncs, one per 187 * time point. 188 * 189 * To handle wait-before-signal, we place all vk_queue_submits in the 190 * queue's submit list in vkQueueSubmit() and call vk_device_flush() at 191 * key points such as the end of vkQueueSubmit() and vkSemaphoreSignal(). 192 * This ensures that, as soon as a given submit's dependencies are fully 193 * resolvable, it gets submitted to the driver. 194 */ 195 VK_DEVICE_TIMELINE_MODE_EMULATED, 196 197 /** Timeline semaphores are a kernel-assisted emulation 198 * 199 * In this mode, timeline semaphores are still technically an emulation 200 * in the sense that they don't support wait-before-signal natively. 201 * Instead, all GPU-waitable objects support a CPU wait-for-pending 202 * operation which lets the userspace driver wait until a given event 203 * on the (possibly shared) vk_sync is pending. The event is "pending" 204 * if a job has been submitted to the kernel (possibly from a different 205 * process) which will signal it. In vkQueueSubit, we use this wait 206 * mode to detect waits which are not yet pending and, the first time we 207 * do, spawn a thread to manage the queue. That thread waits for each 208 * submit's waits to all be pending before submitting to the driver 209 * queue. 210 * 211 * We have to be a bit more careful about a few things in this mode. 212 * In particular, we can never assume that any given wait operation is 213 * pending. For instance, when we go to export a sync file from a 214 * binary semaphore, we need to first wait for it to be pending. The 215 * spec guarantees that the vast majority of these waits return almost 216 * immediately, but we do need to insert them for correctness. 217 */ 218 VK_DEVICE_TIMELINE_MODE_ASSISTED, 219 220 /** Timeline semaphores are 100% native 221 * 222 * In this mode, wait-before-signal is natively supported by the 223 * underlying timeline implementation. We can submit-and-forget and 224 * assume that dependencies will get resolved for us by the kernel. 225 * Currently, this isn't supported by any Linux primitives. 226 */ 227 VK_DEVICE_TIMELINE_MODE_NATIVE, 228 } timeline_mode; 229 230 /** Per-device submit mode 231 * 232 * This represents the device-wide submit strategy which may be different 233 * from the per-queue submit mode. See vk_queue.submit.mode for more 234 * details. 235 */ 236 enum vk_queue_submit_mode submit_mode; 237 238#ifdef ANDROID 239 mtx_t swapchain_private_mtx; 240 struct hash_table *swapchain_private; 241#endif 242}; 243 244VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice, 245 VK_OBJECT_TYPE_DEVICE); 246 247/** Initialize a vk_device 248 * 249 * Along with initializing the data structures in `vk_device`, this function 250 * checks that every extension specified by 251 * `VkInstanceCreateInfo::ppEnabledExtensionNames` is actually supported by 252 * the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an 253 * unsupported extension is requested. It also checks all the feature struct 254 * chained into the `pCreateInfo->pNext` chain against the features returned 255 * by `vkGetPhysicalDeviceFeatures2` and returns 256 * `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested. 257 * 258 * @param[out] device The device to initialize 259 * @param[in] physical_device The physical device 260 * @param[in] dispatch_table Device-level dispatch table 261 * @param[in] pCreateInfo VkDeviceCreateInfo pointer passed to 262 * `vkCreateDevice()` 263 * @param[in] alloc Allocation callbacks passed to 264 * `vkCreateDevice()` 265 */ 266VkResult MUST_CHECK 267vk_device_init(struct vk_device *device, 268 struct vk_physical_device *physical_device, 269 const struct vk_device_dispatch_table *dispatch_table, 270 const VkDeviceCreateInfo *pCreateInfo, 271 const VkAllocationCallbacks *alloc); 272 273static inline void 274vk_device_set_drm_fd(struct vk_device *device, int drm_fd) 275{ 276 device->drm_fd = drm_fd; 277} 278 279/** Tears down a vk_device 280 * 281 * @param[out] device The device to tear down 282 */ 283void 284vk_device_finish(struct vk_device *device); 285 286/** Enables threaded submit on this device 287 * 288 * This doesn't ensure that threaded submit will be used. It just disables 289 * the deferred submit option for emulated timeline semaphores and forces them 290 * to always use the threaded path. It also does some checks that the vk_sync 291 * types used by the driver work for threaded submit. 292 * 293 * This must be called before any queues are created. 294 */ 295void vk_device_enable_threaded_submit(struct vk_device *device); 296 297static inline bool 298vk_device_supports_threaded_submit(const struct vk_device *device) 299{ 300 return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED || 301 device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND; 302} 303 304VkResult vk_device_flush(struct vk_device *device); 305 306VkResult PRINTFLIKE(4, 5) 307_vk_device_set_lost(struct vk_device *device, 308 const char *file, int line, 309 const char *msg, ...); 310 311#define vk_device_set_lost(device, ...) \ 312 _vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__) 313 314void _vk_device_report_lost(struct vk_device *device); 315 316static inline bool 317vk_device_is_lost_no_report(struct vk_device *device) 318{ 319 return p_atomic_read(&device->_lost.lost) > 0; 320} 321 322static inline bool 323vk_device_is_lost(struct vk_device *device) 324{ 325 int lost = vk_device_is_lost_no_report(device); 326 if (unlikely(lost && !device->_lost.reported)) 327 _vk_device_report_lost(device); 328 return lost; 329} 330 331static inline VkResult 332vk_device_check_status(struct vk_device *device) 333{ 334 if (vk_device_is_lost(device)) 335 return VK_ERROR_DEVICE_LOST; 336 337 if (!device->check_status) 338 return VK_SUCCESS; 339 340 VkResult result = device->check_status(device); 341 342 assert(result == VK_SUCCESS || result == VK_ERROR_DEVICE_LOST); 343 if (result == VK_ERROR_DEVICE_LOST) 344 assert(vk_device_is_lost_no_report(device)); 345 346 return result; 347} 348 349PFN_vkVoidFunction 350vk_device_get_proc_addr(const struct vk_device *device, 351 const char *name); 352 353bool vk_get_physical_device_core_1_1_feature_ext(struct VkBaseOutStructure *ext, 354 const VkPhysicalDeviceVulkan11Features *core); 355bool vk_get_physical_device_core_1_2_feature_ext(struct VkBaseOutStructure *ext, 356 const VkPhysicalDeviceVulkan12Features *core); 357bool vk_get_physical_device_core_1_3_feature_ext(struct VkBaseOutStructure *ext, 358 const VkPhysicalDeviceVulkan13Features *core); 359 360bool vk_get_physical_device_core_1_1_property_ext(struct VkBaseOutStructure *ext, 361 const VkPhysicalDeviceVulkan11Properties *core); 362bool vk_get_physical_device_core_1_2_property_ext(struct VkBaseOutStructure *ext, 363 const VkPhysicalDeviceVulkan12Properties *core); 364bool vk_get_physical_device_core_1_3_property_ext(struct VkBaseOutStructure *ext, 365 const VkPhysicalDeviceVulkan13Properties *core); 366 367#ifdef __cplusplus 368} 369#endif 370 371#endif /* VK_DEVICE_H */ 372