1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #define VK_NO_PROTOTYPES
20 #define VK_ENABLE_BETA_EXTENSIONS
21
22 #ifdef _WIN32
23 #include <windows.h> /* Included to prevent conflicts with CreateSemaphore */
24 #include <versionhelpers.h>
25 #include "compat/w32dlfcn.h"
26 #else
27 #include <dlfcn.h>
28 #endif
29
30 #include <unistd.h>
31
32 #include "config.h"
33 #include "pixdesc.h"
34 #include "avstring.h"
35 #include "imgutils.h"
36 #include "hwcontext.h"
37 #include "avassert.h"
38 #include "hwcontext_internal.h"
39 #include "hwcontext_vulkan.h"
40
41 #include "vulkan.h"
42 #include "vulkan_loader.h"
43
44 #if CONFIG_LIBDRM
45 #include <xf86drm.h>
46 #include <drm_fourcc.h>
47 #include "hwcontext_drm.h"
48 #if CONFIG_VAAPI
49 #include <va/va_drmcommon.h>
50 #include "hwcontext_vaapi.h"
51 #endif
52 #endif
53
54 #if CONFIG_CUDA
55 #include "hwcontext_cuda_internal.h"
56 #include "cuda_check.h"
57 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
58 #endif
59
60 typedef struct VulkanQueueCtx {
61 VkFence fence;
62 VkQueue queue;
63 int was_synchronous;
64
65 /* Buffer dependencies */
66 AVBufferRef **buf_deps;
67 int nb_buf_deps;
68 int buf_deps_alloc_size;
69 } VulkanQueueCtx;
70
71 typedef struct VulkanExecCtx {
72 VkCommandPool pool;
73 VkCommandBuffer *bufs;
74 VulkanQueueCtx *queues;
75 int nb_queues;
76 int cur_queue_idx;
77 } VulkanExecCtx;
78
79 typedef struct VulkanDevicePriv {
80 /* Vulkan library and loader functions */
81 void *libvulkan;
82 FFVulkanFunctions vkfn;
83
84 /* Properties */
85 VkPhysicalDeviceProperties2 props;
86 VkPhysicalDeviceMemoryProperties mprops;
87 VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
88
89 /* Features */
90 VkPhysicalDeviceVulkan11Features device_features_1_1;
91 VkPhysicalDeviceVulkan12Features device_features_1_2;
92
93 /* Queues */
94 uint32_t qfs[5];
95 int num_qfs;
96
97 /* Debug callback */
98 VkDebugUtilsMessengerEXT debug_ctx;
99
100 /* Extensions */
101 FFVulkanExtensions extensions;
102
103 /* Settings */
104 int use_linear_images;
105
106 /* Option to allocate all image planes in a single allocation */
107 int contiguous_planes;
108
109 /* Nvidia */
110 int dev_is_nvidia;
111
112 /* Intel */
113 int dev_is_intel;
114 } VulkanDevicePriv;
115
116 typedef struct VulkanFramesPriv {
117 /* Image conversions */
118 VulkanExecCtx conv_ctx;
119
120 /* Image transfers */
121 VulkanExecCtx upload_ctx;
122 VulkanExecCtx download_ctx;
123
124 /* Modifier info list to free at uninit */
125 VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
126 } VulkanFramesPriv;
127
128 typedef struct AVVkFrameInternal {
129 #if CONFIG_CUDA
130 /* Importing external memory into cuda is really expensive so we keep the
131 * memory imported all the time */
132 AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
133 CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
134 CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
135 CUarray cu_array[AV_NUM_DATA_POINTERS];
136 CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
137 #ifdef _WIN32
138 HANDLE ext_mem_handle[AV_NUM_DATA_POINTERS];
139 HANDLE ext_sem_handle[AV_NUM_DATA_POINTERS];
140 #endif
141 #endif
142 } AVVkFrameInternal;
143
144 #define ADD_VAL_TO_LIST(list, count, val) \
145 do { \
146 list = av_realloc_array(list, sizeof(*list), ++count); \
147 if (!list) { \
148 err = AVERROR(ENOMEM); \
149 goto fail; \
150 } \
151 list[count - 1] = av_strdup(val); \
152 if (!list[count - 1]) { \
153 err = AVERROR(ENOMEM); \
154 goto fail; \
155 } \
156 } while(0)
157
158 #define RELEASE_PROPS(props, count) \
159 if (props) { \
160 for (int i = 0; i < count; i++) \
161 av_free((void *)((props)[i])); \
162 av_free((void *)props); \
163 }
164
165 static const struct {
166 enum AVPixelFormat pixfmt;
167 const VkFormat vkfmts[4];
168 } vk_pixfmt_map[] = {
169 { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
170 { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
171 { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
172
173 { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
174 { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
175 { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
176 { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
177
178 { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
179
180 { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
181 { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
182
183 { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
184 { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
185 { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
186 { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
187
188 { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
189 { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
190 { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
191 { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
192
193 { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
194 { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
195 { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
196 { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
197
198 { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
199 { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
200 /* There is no AV_PIX_FMT_YUVA420P12 */
201 { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
202
203 { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
204 { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
205 { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
206 { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
207
208 { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
209 { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
210 { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
211 { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
212
213 { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
214 { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
215 { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
216 { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
217 { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
218 { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
219 { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
220 { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
221 { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
222 { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
223 { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
224
225 /* Lower priority as there's an endianess-dependent overlap between these
226 * and rgba/bgr0, and PACK32 formats are more limited */
227 { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
228 { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
229
230 { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
231
232 { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
233 { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
234 { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
235 { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
236 };
237
av_vkfmt_from_pixfmt(enum AVPixelFormat p)238 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
239 {
240 for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
241 if (vk_pixfmt_map[i].pixfmt == p)
242 return vk_pixfmt_map[i].vkfmts;
243 return NULL;
244 }
245
vk_find_struct(const void *chain, VkStructureType stype)246 static const void *vk_find_struct(const void *chain, VkStructureType stype)
247 {
248 const VkBaseInStructure *in = chain;
249 while (in) {
250 if (in->sType == stype)
251 return in;
252
253 in = in->pNext;
254 }
255
256 return NULL;
257 }
258
vk_link_struct(void *chain, void *in)259 static void vk_link_struct(void *chain, void *in)
260 {
261 VkBaseOutStructure *out = chain;
262 if (!in)
263 return;
264
265 while (out->pNext)
266 out = out->pNext;
267
268 out->pNext = in;
269 }
270
pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, int linear)271 static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
272 int linear)
273 {
274 AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
275 VulkanDevicePriv *priv = dev_ctx->internal->priv;
276 FFVulkanFunctions *vk = &priv->vkfn;
277 const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
278 int planes = av_pix_fmt_count_planes(p);
279
280 if (!fmt)
281 return 0;
282
283 for (int i = 0; i < planes; i++) {
284 VkFormatFeatureFlags flags;
285 VkFormatProperties2 prop = {
286 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
287 };
288 vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
289 flags = linear ? prop.formatProperties.linearTilingFeatures :
290 prop.formatProperties.optimalTilingFeatures;
291 if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS))
292 return 0;
293 }
294
295 return 1;
296 }
297
load_libvulkan(AVHWDeviceContext *ctx)298 static int load_libvulkan(AVHWDeviceContext *ctx)
299 {
300 AVVulkanDeviceContext *hwctx = ctx->hwctx;
301 VulkanDevicePriv *p = ctx->internal->priv;
302
303 static const char *lib_names[] = {
304 #if defined(_WIN32)
305 "vulkan-1.dll",
306 #elif defined(__APPLE__)
307 "libvulkan.dylib",
308 "libvulkan.1.dylib",
309 "libMoltenVK.dylib",
310 #else
311 "libvulkan.so.1",
312 "libvulkan.so",
313 #endif
314 };
315
316 for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) {
317 p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL);
318 if (p->libvulkan)
319 break;
320 }
321
322 if (!p->libvulkan) {
323 av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n");
324 return AVERROR_UNKNOWN;
325 }
326
327 hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr");
328
329 return 0;
330 }
331
332 typedef struct VulkanOptExtension {
333 const char *name;
334 FFVulkanExtensions flag;
335 } VulkanOptExtension;
336
337 static const VulkanOptExtension optional_instance_exts[] = {
338 /* For future use */
339 };
340
341 static const VulkanOptExtension optional_device_exts[] = {
342 /* Misc or required by other extensions */
343 { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
344 { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
345 { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
346
347 /* Imports/exports */
348 { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
349 { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY },
350 { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS },
351 { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM },
352 { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY },
353 #ifdef _WIN32
354 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
355 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
356 #endif
357 };
358
359 /* Converts return values to strings */
vk_ret2str(VkResult res)360 static const char *vk_ret2str(VkResult res)
361 {
362 #define CASE(VAL) case VAL: return #VAL
363 switch (res) {
364 CASE(VK_SUCCESS);
365 CASE(VK_NOT_READY);
366 CASE(VK_TIMEOUT);
367 CASE(VK_EVENT_SET);
368 CASE(VK_EVENT_RESET);
369 CASE(VK_INCOMPLETE);
370 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
371 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
372 CASE(VK_ERROR_INITIALIZATION_FAILED);
373 CASE(VK_ERROR_DEVICE_LOST);
374 CASE(VK_ERROR_MEMORY_MAP_FAILED);
375 CASE(VK_ERROR_LAYER_NOT_PRESENT);
376 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
377 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
378 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
379 CASE(VK_ERROR_TOO_MANY_OBJECTS);
380 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
381 CASE(VK_ERROR_FRAGMENTED_POOL);
382 CASE(VK_ERROR_SURFACE_LOST_KHR);
383 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
384 CASE(VK_SUBOPTIMAL_KHR);
385 CASE(VK_ERROR_OUT_OF_DATE_KHR);
386 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
387 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
388 CASE(VK_ERROR_INVALID_SHADER_NV);
389 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
390 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
391 CASE(VK_ERROR_NOT_PERMITTED_EXT);
392 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
393 CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
394 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
395 default: return "Unknown error";
396 }
397 #undef CASE
398 }
399
vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT *data, void *priv)400 static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
401 VkDebugUtilsMessageTypeFlagsEXT messageType,
402 const VkDebugUtilsMessengerCallbackDataEXT *data,
403 void *priv)
404 {
405 int l;
406 AVHWDeviceContext *ctx = priv;
407
408 switch (severity) {
409 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
410 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
411 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
412 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
413 default: l = AV_LOG_DEBUG; break;
414 }
415
416 av_log(ctx, l, "%s\n", data->pMessage);
417 for (int i = 0; i < data->cmdBufLabelCount; i++)
418 av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
419
420 return 0;
421 }
422
check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, const char * const **dst, uint32_t *num, int debug)423 static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
424 const char * const **dst, uint32_t *num, int debug)
425 {
426 const char *tstr;
427 const char **extension_names = NULL;
428 VulkanDevicePriv *p = ctx->internal->priv;
429 FFVulkanFunctions *vk = &p->vkfn;
430 AVVulkanDeviceContext *hwctx = ctx->hwctx;
431 int err = 0, found, extensions_found = 0;
432
433 const char *mod;
434 int optional_exts_num;
435 uint32_t sup_ext_count;
436 char *user_exts_str = NULL;
437 AVDictionaryEntry *user_exts;
438 VkExtensionProperties *sup_ext;
439 const VulkanOptExtension *optional_exts;
440
441 if (!dev) {
442 mod = "instance";
443 optional_exts = optional_instance_exts;
444 optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
445 user_exts = av_dict_get(opts, "instance_extensions", NULL, 0);
446 if (user_exts) {
447 user_exts_str = av_strdup(user_exts->value);
448 if (!user_exts_str) {
449 err = AVERROR(ENOMEM);
450 goto fail;
451 }
452 }
453 vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
454 sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
455 if (!sup_ext)
456 return AVERROR(ENOMEM);
457 vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
458 } else {
459 mod = "device";
460 optional_exts = optional_device_exts;
461 optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
462 user_exts = av_dict_get(opts, "device_extensions", NULL, 0);
463 if (user_exts) {
464 user_exts_str = av_strdup(user_exts->value);
465 if (!user_exts_str) {
466 err = AVERROR(ENOMEM);
467 goto fail;
468 }
469 }
470 vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
471 &sup_ext_count, NULL);
472 sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
473 if (!sup_ext)
474 return AVERROR(ENOMEM);
475 vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
476 &sup_ext_count, sup_ext);
477 }
478
479 for (int i = 0; i < optional_exts_num; i++) {
480 tstr = optional_exts[i].name;
481 found = 0;
482 for (int j = 0; j < sup_ext_count; j++) {
483 if (!strcmp(tstr, sup_ext[j].extensionName)) {
484 found = 1;
485 break;
486 }
487 }
488 if (!found)
489 continue;
490
491 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
492 p->extensions |= optional_exts[i].flag;
493 ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
494 }
495
496 if (debug && !dev) {
497 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
498 found = 0;
499 for (int j = 0; j < sup_ext_count; j++) {
500 if (!strcmp(tstr, sup_ext[j].extensionName)) {
501 found = 1;
502 break;
503 }
504 }
505 if (found) {
506 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
507 ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
508 p->extensions |= FF_VK_EXT_DEBUG_UTILS;
509 } else {
510 av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
511 tstr);
512 err = AVERROR(EINVAL);
513 goto fail;
514 }
515 }
516
517 if (user_exts_str) {
518 char *save, *token = av_strtok(user_exts_str, "+", &save);
519 while (token) {
520 found = 0;
521 for (int j = 0; j < sup_ext_count; j++) {
522 if (!strcmp(token, sup_ext[j].extensionName)) {
523 found = 1;
524 break;
525 }
526 }
527 if (found) {
528 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token);
529 ADD_VAL_TO_LIST(extension_names, extensions_found, token);
530 } else {
531 av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n",
532 mod, token);
533 }
534 token = av_strtok(NULL, "+", &save);
535 }
536 }
537
538 *dst = extension_names;
539 *num = extensions_found;
540
541 av_free(user_exts_str);
542 av_free(sup_ext);
543 return 0;
544
545 fail:
546 RELEASE_PROPS(extension_names, extensions_found);
547 av_free(user_exts_str);
548 av_free(sup_ext);
549 return err;
550 }
551
check_validation_layers(AVHWDeviceContext *ctx, AVDictionary *opts, const char * const **dst, uint32_t *num, int *debug_mode)552 static int check_validation_layers(AVHWDeviceContext *ctx, AVDictionary *opts,
553 const char * const **dst, uint32_t *num,
554 int *debug_mode)
555 {
556 static const char default_layer[] = { "VK_LAYER_KHRONOS_validation" };
557
558 int found = 0, err = 0;
559 VulkanDevicePriv *priv = ctx->internal->priv;
560 FFVulkanFunctions *vk = &priv->vkfn;
561
562 uint32_t sup_layer_count;
563 VkLayerProperties *sup_layers;
564
565 AVDictionaryEntry *user_layers;
566 char *user_layers_str = NULL;
567 char *save, *token;
568
569 const char **enabled_layers = NULL;
570 uint32_t enabled_layers_count = 0;
571
572 AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
573 int debug = debug_opt && strtol(debug_opt->value, NULL, 10);
574
575 /* If `debug=0`, enable no layers at all. */
576 if (debug_opt && !debug)
577 return 0;
578
579 vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL);
580 sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties));
581 if (!sup_layers)
582 return AVERROR(ENOMEM);
583 vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
584
585 av_log(ctx, AV_LOG_VERBOSE, "Supported validation layers:\n");
586 for (int i = 0; i < sup_layer_count; i++)
587 av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName);
588
589 /* If `debug=1` is specified, enable the standard validation layer extension */
590 if (debug) {
591 *debug_mode = debug;
592 for (int i = 0; i < sup_layer_count; i++) {
593 if (!strcmp(default_layer, sup_layers[i].layerName)) {
594 found = 1;
595 av_log(ctx, AV_LOG_VERBOSE, "Default validation layer %s is enabled\n",
596 default_layer);
597 ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, default_layer);
598 break;
599 }
600 }
601 }
602
603 user_layers = av_dict_get(opts, "validation_layers", NULL, 0);
604 if (!user_layers)
605 goto end;
606
607 user_layers_str = av_strdup(user_layers->value);
608 if (!user_layers_str) {
609 err = AVERROR(ENOMEM);
610 goto fail;
611 }
612
613 token = av_strtok(user_layers_str, "+", &save);
614 while (token) {
615 found = 0;
616 if (!strcmp(default_layer, token)) {
617 if (debug) {
618 /* if the `debug=1`, default_layer is enabled, skip here */
619 token = av_strtok(NULL, "+", &save);
620 continue;
621 } else {
622 /* if the `debug=0`, enable debug mode to load its callback properly */
623 *debug_mode = debug;
624 }
625 }
626 for (int j = 0; j < sup_layer_count; j++) {
627 if (!strcmp(token, sup_layers[j].layerName)) {
628 found = 1;
629 break;
630 }
631 }
632 if (found) {
633 av_log(ctx, AV_LOG_VERBOSE, "Requested Validation Layer: %s\n", token);
634 ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token);
635 } else {
636 av_log(ctx, AV_LOG_ERROR,
637 "Validation Layer \"%s\" not support.\n", token);
638 err = AVERROR(EINVAL);
639 goto fail;
640 }
641 token = av_strtok(NULL, "+", &save);
642 }
643
644 av_free(user_layers_str);
645
646 end:
647 av_free(sup_layers);
648
649 *dst = enabled_layers;
650 *num = enabled_layers_count;
651
652 return 0;
653
654 fail:
655 RELEASE_PROPS(enabled_layers, enabled_layers_count);
656 av_free(sup_layers);
657 av_free(user_layers_str);
658 return err;
659 }
660
661 /* Creates a VkInstance */
create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)662 static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
663 {
664 int err = 0, debug_mode = 0;
665 VkResult ret;
666 VulkanDevicePriv *p = ctx->internal->priv;
667 FFVulkanFunctions *vk = &p->vkfn;
668 AVVulkanDeviceContext *hwctx = ctx->hwctx;
669 VkApplicationInfo application_info = {
670 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
671 .pEngineName = "libavutil",
672 .apiVersion = VK_API_VERSION_1_2,
673 .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
674 LIBAVUTIL_VERSION_MINOR,
675 LIBAVUTIL_VERSION_MICRO),
676 };
677 VkInstanceCreateInfo inst_props = {
678 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
679 .pApplicationInfo = &application_info,
680 };
681
682 if (!hwctx->get_proc_addr) {
683 err = load_libvulkan(ctx);
684 if (err < 0)
685 return err;
686 }
687
688 err = ff_vk_load_functions(ctx, vk, p->extensions, 0, 0);
689 if (err < 0) {
690 av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n");
691 return err;
692 }
693
694 err = check_validation_layers(ctx, opts, &inst_props.ppEnabledLayerNames,
695 &inst_props.enabledLayerCount, &debug_mode);
696 if (err)
697 goto fail;
698
699 /* Check for present/missing extensions */
700 err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames,
701 &inst_props.enabledExtensionCount, debug_mode);
702 hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames;
703 hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount;
704 if (err < 0)
705 goto fail;
706
707 /* Try to create the instance */
708 ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
709
710 /* Check for errors */
711 if (ret != VK_SUCCESS) {
712 av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
713 vk_ret2str(ret));
714 err = AVERROR_EXTERNAL;
715 goto fail;
716 }
717
718 err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 0);
719 if (err < 0) {
720 av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n");
721 goto fail;
722 }
723
724 if (debug_mode) {
725 VkDebugUtilsMessengerCreateInfoEXT dbg = {
726 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
727 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
728 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
729 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
730 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
731 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
732 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
733 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
734 .pfnUserCallback = vk_dbg_callback,
735 .pUserData = ctx,
736 };
737
738 vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
739 hwctx->alloc, &p->debug_ctx);
740 }
741
742 err = 0;
743
744 fail:
745 RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
746 return err;
747 }
748
749 typedef struct VulkanDeviceSelection {
750 uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
751 int has_uuid;
752 const char *name; /* Will use this second unless NULL */
753 uint32_t pci_device; /* Will use this third unless 0x0 */
754 uint32_t vendor_id; /* Last resort to find something deterministic */
755 int index; /* Finally fall back to index */
756 } VulkanDeviceSelection;
757
vk_dev_type(enum VkPhysicalDeviceType type)758 static const char *vk_dev_type(enum VkPhysicalDeviceType type)
759 {
760 switch (type) {
761 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated";
762 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete";
763 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual";
764 case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software";
765 default: return "unknown";
766 }
767 }
768
769 /* Finds a device */
find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)770 static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
771 {
772 int err = 0, choice = -1;
773 uint32_t num;
774 VkResult ret;
775 VulkanDevicePriv *p = ctx->internal->priv;
776 FFVulkanFunctions *vk = &p->vkfn;
777 VkPhysicalDevice *devices = NULL;
778 VkPhysicalDeviceIDProperties *idp = NULL;
779 VkPhysicalDeviceProperties2 *prop = NULL;
780 AVVulkanDeviceContext *hwctx = ctx->hwctx;
781
782 ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL);
783 if (ret != VK_SUCCESS || !num) {
784 av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
785 return AVERROR(ENODEV);
786 }
787
788 devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
789 if (!devices)
790 return AVERROR(ENOMEM);
791
792 ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices);
793 if (ret != VK_SUCCESS) {
794 av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
795 vk_ret2str(ret));
796 err = AVERROR(ENODEV);
797 goto end;
798 }
799
800 prop = av_calloc(num, sizeof(*prop));
801 if (!prop) {
802 err = AVERROR(ENOMEM);
803 goto end;
804 }
805
806 idp = av_calloc(num, sizeof(*idp));
807 if (!idp) {
808 err = AVERROR(ENOMEM);
809 goto end;
810 }
811
812 av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
813 for (int i = 0; i < num; i++) {
814 idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
815 prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
816 prop[i].pNext = &idp[i];
817
818 vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]);
819 av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
820 prop[i].properties.deviceName,
821 vk_dev_type(prop[i].properties.deviceType),
822 prop[i].properties.deviceID);
823 }
824
825 if (select->has_uuid) {
826 for (int i = 0; i < num; i++) {
827 if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) {
828 choice = i;
829 goto end;
830 }
831 }
832 av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
833 err = AVERROR(ENODEV);
834 goto end;
835 } else if (select->name) {
836 av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
837 for (int i = 0; i < num; i++) {
838 if (strstr(prop[i].properties.deviceName, select->name)) {
839 choice = i;
840 goto end;
841 }
842 }
843 av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
844 select->name);
845 err = AVERROR(ENODEV);
846 goto end;
847 } else if (select->pci_device) {
848 av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
849 for (int i = 0; i < num; i++) {
850 if (select->pci_device == prop[i].properties.deviceID) {
851 choice = i;
852 goto end;
853 }
854 }
855 av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
856 select->pci_device);
857 err = AVERROR(EINVAL);
858 goto end;
859 } else if (select->vendor_id) {
860 av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
861 for (int i = 0; i < num; i++) {
862 if (select->vendor_id == prop[i].properties.vendorID) {
863 choice = i;
864 goto end;
865 }
866 }
867 av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
868 select->vendor_id);
869 err = AVERROR(ENODEV);
870 goto end;
871 } else {
872 if (select->index < num) {
873 choice = select->index;
874 goto end;
875 }
876 av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
877 select->index);
878 err = AVERROR(ENODEV);
879 goto end;
880 }
881
882 end:
883 if (choice > -1) {
884 av_log(ctx, AV_LOG_VERBOSE, "Device %d selected: %s (%s) (0x%x)\n",
885 choice, prop[choice].properties.deviceName,
886 vk_dev_type(prop[choice].properties.deviceType),
887 prop[choice].properties.deviceID);
888 hwctx->phys_dev = devices[choice];
889 }
890
891 av_free(devices);
892 av_free(prop);
893 av_free(idp);
894
895 return err;
896 }
897
898 /* Picks the least used qf with the fewest unneeded flags, or -1 if none found */
pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf, VkQueueFlagBits flags)899 static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf,
900 VkQueueFlagBits flags)
901 {
902 int index = -1;
903 uint32_t min_score = UINT32_MAX;
904
905 for (int i = 0; i < num_qf; i++) {
906 const VkQueueFlagBits qflags = qf[i].queueFlags;
907 if (qflags & flags) {
908 uint32_t score = av_popcount(qflags) + qf[i].timestampValidBits;
909 if (score < min_score) {
910 index = i;
911 min_score = score;
912 }
913 }
914 }
915
916 if (index > -1)
917 qf[index].timestampValidBits++;
918
919 return index;
920 }
921
setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)922 static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
923 {
924 uint32_t num;
925 float *weights;
926 VkQueueFamilyProperties *qf = NULL;
927 VulkanDevicePriv *p = ctx->internal->priv;
928 FFVulkanFunctions *vk = &p->vkfn;
929 AVVulkanDeviceContext *hwctx = ctx->hwctx;
930 int graph_index, comp_index, tx_index, enc_index, dec_index;
931
932 /* First get the number of queue families */
933 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
934 if (!num) {
935 av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
936 return AVERROR_EXTERNAL;
937 }
938
939 /* Then allocate memory */
940 qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
941 if (!qf)
942 return AVERROR(ENOMEM);
943
944 /* Finally retrieve the queue families */
945 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qf);
946
947 av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
948 for (int i = 0; i < num; i++) {
949 av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s (queues: %i)\n", i,
950 ((qf[i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
951 ((qf[i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
952 ((qf[i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
953 ((qf[i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
954 ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
955 ((qf[i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
956 ((qf[i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
957 qf[i].queueCount);
958
959 /* We use this field to keep a score of how many times we've used that
960 * queue family in order to make better choices. */
961 qf[i].timestampValidBits = 0;
962 }
963
964 /* Pick each queue family to use */
965 graph_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT);
966 comp_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT);
967 tx_index = pick_queue_family(qf, num, VK_QUEUE_TRANSFER_BIT);
968 enc_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
969 dec_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
970
971 /* Signalling the transfer capabilities on a queue family is optional */
972 if (tx_index < 0) {
973 tx_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT);
974 if (tx_index < 0)
975 tx_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT);
976 }
977
978 hwctx->queue_family_index = -1;
979 hwctx->queue_family_comp_index = -1;
980 hwctx->queue_family_tx_index = -1;
981 hwctx->queue_family_encode_index = -1;
982 hwctx->queue_family_decode_index = -1;
983
984 #define SETUP_QUEUE(qf_idx) \
985 if (qf_idx > -1) { \
986 int fidx = qf_idx; \
987 int qc = qf[fidx].queueCount; \
988 VkDeviceQueueCreateInfo *pc; \
989 \
990 if (fidx == graph_index) { \
991 hwctx->queue_family_index = fidx; \
992 hwctx->nb_graphics_queues = qc; \
993 graph_index = -1; \
994 } \
995 if (fidx == comp_index) { \
996 hwctx->queue_family_comp_index = fidx; \
997 hwctx->nb_comp_queues = qc; \
998 comp_index = -1; \
999 } \
1000 if (fidx == tx_index) { \
1001 hwctx->queue_family_tx_index = fidx; \
1002 hwctx->nb_tx_queues = qc; \
1003 tx_index = -1; \
1004 } \
1005 if (fidx == enc_index) { \
1006 hwctx->queue_family_encode_index = fidx; \
1007 hwctx->nb_encode_queues = qc; \
1008 enc_index = -1; \
1009 } \
1010 if (fidx == dec_index) { \
1011 hwctx->queue_family_decode_index = fidx; \
1012 hwctx->nb_decode_queues = qc; \
1013 dec_index = -1; \
1014 } \
1015 \
1016 pc = av_realloc((void *)cd->pQueueCreateInfos, \
1017 sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \
1018 if (!pc) { \
1019 av_free(qf); \
1020 return AVERROR(ENOMEM); \
1021 } \
1022 cd->pQueueCreateInfos = pc; \
1023 pc = &pc[cd->queueCreateInfoCount]; \
1024 \
1025 weights = av_malloc(qc * sizeof(float)); \
1026 if (!weights) { \
1027 av_free(qf); \
1028 return AVERROR(ENOMEM); \
1029 } \
1030 \
1031 memset(pc, 0, sizeof(*pc)); \
1032 pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \
1033 pc->queueFamilyIndex = fidx; \
1034 pc->queueCount = qc; \
1035 pc->pQueuePriorities = weights; \
1036 \
1037 for (int i = 0; i < qc; i++) \
1038 weights[i] = 1.0f / qc; \
1039 \
1040 cd->queueCreateInfoCount++; \
1041 }
1042
1043 SETUP_QUEUE(graph_index)
1044 SETUP_QUEUE(comp_index)
1045 SETUP_QUEUE(tx_index)
1046 SETUP_QUEUE(enc_index)
1047 SETUP_QUEUE(dec_index)
1048
1049 #undef SETUP_QUEUE
1050
1051 av_free(qf);
1052
1053 return 0;
1054 }
1055
create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, int queue_family_index, int num_queues)1056 static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
1057 int queue_family_index, int num_queues)
1058 {
1059 VkResult ret;
1060 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1061 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1062 FFVulkanFunctions *vk = &p->vkfn;
1063
1064 VkCommandPoolCreateInfo cqueue_create = {
1065 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
1066 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1067 .queueFamilyIndex = queue_family_index,
1068 };
1069 VkCommandBufferAllocateInfo cbuf_create = {
1070 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
1071 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
1072 .commandBufferCount = num_queues,
1073 };
1074
1075 cmd->nb_queues = num_queues;
1076
1077 /* Create command pool */
1078 ret = vk->CreateCommandPool(hwctx->act_dev, &cqueue_create,
1079 hwctx->alloc, &cmd->pool);
1080 if (ret != VK_SUCCESS) {
1081 av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n",
1082 vk_ret2str(ret));
1083 return AVERROR_EXTERNAL;
1084 }
1085
1086 cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs));
1087 if (!cmd->bufs)
1088 return AVERROR(ENOMEM);
1089
1090 cbuf_create.commandPool = cmd->pool;
1091
1092 /* Allocate command buffer */
1093 ret = vk->AllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs);
1094 if (ret != VK_SUCCESS) {
1095 av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
1096 vk_ret2str(ret));
1097 av_freep(&cmd->bufs);
1098 return AVERROR_EXTERNAL;
1099 }
1100
1101 cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues));
1102 if (!cmd->queues)
1103 return AVERROR(ENOMEM);
1104
1105 for (int i = 0; i < num_queues; i++) {
1106 VulkanQueueCtx *q = &cmd->queues[i];
1107 vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
1108 q->was_synchronous = 1;
1109 }
1110
1111 return 0;
1112 }
1113
free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)1114 static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
1115 {
1116 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1117 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1118 FFVulkanFunctions *vk = &p->vkfn;
1119
1120 if (cmd->queues) {
1121 for (int i = 0; i < cmd->nb_queues; i++) {
1122 VulkanQueueCtx *q = &cmd->queues[i];
1123
1124 /* Make sure all queues have finished executing */
1125 if (q->fence && !q->was_synchronous) {
1126 vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1127 vk->ResetFences(hwctx->act_dev, 1, &q->fence);
1128 }
1129
1130 /* Free the fence */
1131 if (q->fence)
1132 vk->DestroyFence(hwctx->act_dev, q->fence, hwctx->alloc);
1133
1134 /* Free buffer dependencies */
1135 for (int j = 0; j < q->nb_buf_deps; j++)
1136 av_buffer_unref(&q->buf_deps[j]);
1137 av_free(q->buf_deps);
1138 }
1139 }
1140
1141 if (cmd->bufs)
1142 vk->FreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs);
1143 if (cmd->pool)
1144 vk->DestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
1145
1146 av_freep(&cmd->queues);
1147 av_freep(&cmd->bufs);
1148 cmd->pool = VK_NULL_HANDLE;
1149 }
1150
get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)1151 static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
1152 {
1153 return cmd->bufs[cmd->cur_queue_idx];
1154 }
1155
unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)1156 static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
1157 {
1158 VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
1159
1160 for (int j = 0; j < q->nb_buf_deps; j++)
1161 av_buffer_unref(&q->buf_deps[j]);
1162 q->nb_buf_deps = 0;
1163 }
1164
wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)1165 static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
1166 {
1167 VkResult ret;
1168 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1169 VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
1170 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1171 FFVulkanFunctions *vk = &p->vkfn;
1172
1173 VkCommandBufferBeginInfo cmd_start = {
1174 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1175 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
1176 };
1177
1178 /* Create the fence and don't wait for it initially */
1179 if (!q->fence) {
1180 VkFenceCreateInfo fence_spawn = {
1181 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
1182 };
1183 ret = vk->CreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc,
1184 &q->fence);
1185 if (ret != VK_SUCCESS) {
1186 av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
1187 vk_ret2str(ret));
1188 return AVERROR_EXTERNAL;
1189 }
1190 } else if (!q->was_synchronous) {
1191 vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1192 vk->ResetFences(hwctx->act_dev, 1, &q->fence);
1193 }
1194
1195 /* Discard queue dependencies */
1196 unref_exec_ctx_deps(hwfc, cmd);
1197
1198 ret = vk->BeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start);
1199 if (ret != VK_SUCCESS) {
1200 av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
1201 vk_ret2str(ret));
1202 return AVERROR_EXTERNAL;
1203 }
1204
1205 return 0;
1206 }
1207
add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, AVBufferRef * const *deps, int nb_deps)1208 static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
1209 AVBufferRef * const *deps, int nb_deps)
1210 {
1211 AVBufferRef **dst;
1212 VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
1213
1214 if (!deps || !nb_deps)
1215 return 0;
1216
1217 dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
1218 (q->nb_buf_deps + nb_deps) * sizeof(*dst));
1219 if (!dst)
1220 goto err;
1221
1222 q->buf_deps = dst;
1223
1224 for (int i = 0; i < nb_deps; i++) {
1225 q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]);
1226 if (!q->buf_deps[q->nb_buf_deps])
1227 goto err;
1228 q->nb_buf_deps++;
1229 }
1230
1231 return 0;
1232
1233 err:
1234 unref_exec_ctx_deps(hwfc, cmd);
1235 return AVERROR(ENOMEM);
1236 }
1237
submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, VkSubmitInfo *s_info, AVVkFrame *f, int synchronous)1238 static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
1239 VkSubmitInfo *s_info, AVVkFrame *f, int synchronous)
1240 {
1241 VkResult ret;
1242 VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
1243 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1244 FFVulkanFunctions *vk = &p->vkfn;
1245
1246 ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
1247 if (ret != VK_SUCCESS) {
1248 av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
1249 vk_ret2str(ret));
1250 unref_exec_ctx_deps(hwfc, cmd);
1251 return AVERROR_EXTERNAL;
1252 }
1253
1254 s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
1255 s_info->commandBufferCount = 1;
1256
1257 ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence);
1258 if (ret != VK_SUCCESS) {
1259 av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n",
1260 vk_ret2str(ret));
1261 unref_exec_ctx_deps(hwfc, cmd);
1262 return AVERROR_EXTERNAL;
1263 }
1264
1265 if (f)
1266 for (int i = 0; i < s_info->signalSemaphoreCount; i++)
1267 f->sem_value[i]++;
1268
1269 q->was_synchronous = synchronous;
1270
1271 if (synchronous) {
1272 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1273 vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1274 vk->ResetFences(hwctx->act_dev, 1, &q->fence);
1275 unref_exec_ctx_deps(hwfc, cmd);
1276 } else { /* Rotate queues */
1277 cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues;
1278 }
1279
1280 return 0;
1281 }
1282
vulkan_device_free(AVHWDeviceContext *ctx)1283 static void vulkan_device_free(AVHWDeviceContext *ctx)
1284 {
1285 VulkanDevicePriv *p = ctx->internal->priv;
1286 FFVulkanFunctions *vk = &p->vkfn;
1287 AVVulkanDeviceContext *hwctx = ctx->hwctx;
1288
1289 if (hwctx->act_dev)
1290 vk->DestroyDevice(hwctx->act_dev, hwctx->alloc);
1291
1292 if (p->debug_ctx)
1293 vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
1294 hwctx->alloc);
1295
1296 if (hwctx->inst)
1297 vk->DestroyInstance(hwctx->inst, hwctx->alloc);
1298
1299 if (p->libvulkan)
1300 dlclose(p->libvulkan);
1301
1302 RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
1303 RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
1304 }
1305
vulkan_device_create_internal(AVHWDeviceContext *ctx, VulkanDeviceSelection *dev_select, AVDictionary *opts, int flags)1306 static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
1307 VulkanDeviceSelection *dev_select,
1308 AVDictionary *opts, int flags)
1309 {
1310 int err = 0;
1311 VkResult ret;
1312 AVDictionaryEntry *opt_d;
1313 VulkanDevicePriv *p = ctx->internal->priv;
1314 FFVulkanFunctions *vk = &p->vkfn;
1315 AVVulkanDeviceContext *hwctx = ctx->hwctx;
1316
1317 /*
1318 * VkPhysicalDeviceVulkan12Features has a timelineSemaphore field, but
1319 * MoltenVK doesn't implement VkPhysicalDeviceVulkan12Features yet, so we
1320 * use VkPhysicalDeviceTimelineSemaphoreFeatures directly.
1321 */
1322 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
1323 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
1324 };
1325 VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
1326 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1327 .pNext = &timeline_features,
1328 };
1329 VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
1330 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1331 .pNext = &dev_features_1_2,
1332 };
1333 VkPhysicalDeviceFeatures2 dev_features = {
1334 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
1335 .pNext = &dev_features_1_1,
1336 };
1337
1338 VkDeviceCreateInfo dev_info = {
1339 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1340 .pNext = &hwctx->device_features,
1341 };
1342
1343 hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1344 hwctx->device_features.pNext = &p->device_features_1_1;
1345 p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
1346 p->device_features_1_1.pNext = &p->device_features_1_2;
1347 p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
1348 ctx->free = vulkan_device_free;
1349
1350 /* Create an instance if not given one */
1351 if ((err = create_instance(ctx, opts)))
1352 goto end;
1353
1354 /* Find a device (if not given one) */
1355 if ((err = find_device(ctx, dev_select)))
1356 goto end;
1357
1358 vk->GetPhysicalDeviceFeatures2(hwctx->phys_dev, &dev_features);
1359
1360 /* Try to keep in sync with libplacebo */
1361 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.features.NAME;
1362 COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended)
1363 COPY_FEATURE(hwctx->device_features, shaderStorageImageReadWithoutFormat)
1364 COPY_FEATURE(hwctx->device_features, shaderStorageImageWriteWithoutFormat)
1365 COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
1366 COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
1367 COPY_FEATURE(hwctx->device_features, shaderInt64)
1368 #undef COPY_FEATURE
1369
1370 /* We require timeline semaphores */
1371 if (!timeline_features.timelineSemaphore) {
1372 av_log(ctx, AV_LOG_ERROR, "Device does not support timeline semaphores!\n");
1373 err = AVERROR(ENOSYS);
1374 goto end;
1375 }
1376 p->device_features_1_2.timelineSemaphore = 1;
1377
1378 /* Setup queue family */
1379 if ((err = setup_queue_families(ctx, &dev_info)))
1380 goto end;
1381
1382 if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames,
1383 &dev_info.enabledExtensionCount, 0))) {
1384 for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
1385 av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
1386 av_free((void *)dev_info.pQueueCreateInfos);
1387 goto end;
1388 }
1389
1390 ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc,
1391 &hwctx->act_dev);
1392
1393 for (int i = 0; i < dev_info.queueCreateInfoCount; i++)
1394 av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities);
1395 av_free((void *)dev_info.pQueueCreateInfos);
1396
1397 if (ret != VK_SUCCESS) {
1398 av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
1399 vk_ret2str(ret));
1400 for (int i = 0; i < dev_info.enabledExtensionCount; i++)
1401 av_free((void *)dev_info.ppEnabledExtensionNames[i]);
1402 av_free((void *)dev_info.ppEnabledExtensionNames);
1403 err = AVERROR_EXTERNAL;
1404 goto end;
1405 }
1406
1407 /* Tiled images setting, use them by default */
1408 opt_d = av_dict_get(opts, "linear_images", NULL, 0);
1409 if (opt_d)
1410 p->use_linear_images = strtol(opt_d->value, NULL, 10);
1411
1412 opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0);
1413 if (opt_d)
1414 p->contiguous_planes = strtol(opt_d->value, NULL, 10);
1415 else
1416 p->contiguous_planes = -1;
1417
1418 hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
1419 hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
1420
1421 end:
1422 return err;
1423 }
1424
vulkan_device_init(AVHWDeviceContext *ctx)1425 static int vulkan_device_init(AVHWDeviceContext *ctx)
1426 {
1427 int err;
1428 uint32_t queue_num;
1429 AVVulkanDeviceContext *hwctx = ctx->hwctx;
1430 VulkanDevicePriv *p = ctx->internal->priv;
1431 FFVulkanFunctions *vk = &p->vkfn;
1432 int graph_index, comp_index, tx_index, enc_index, dec_index;
1433
1434 /* Set device extension flags */
1435 for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) {
1436 for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
1437 if (!strcmp(hwctx->enabled_dev_extensions[i],
1438 optional_device_exts[j].name)) {
1439 p->extensions |= optional_device_exts[j].flag;
1440 break;
1441 }
1442 }
1443 }
1444
1445 err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 1);
1446 if (err < 0) {
1447 av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n");
1448 return err;
1449 }
1450
1451 p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1452 p->props.pNext = &p->hprops;
1453 p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1454
1455 vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props);
1456 av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
1457 p->props.properties.deviceName);
1458 av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
1459 av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n",
1460 p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
1461 av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n",
1462 p->props.properties.limits.minMemoryMapAlignment);
1463 if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
1464 av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
1465 p->hprops.minImportedHostPointerAlignment);
1466
1467 p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
1468 p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
1469
1470 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
1471 if (!queue_num) {
1472 av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
1473 return AVERROR_EXTERNAL;
1474 }
1475
1476 graph_index = hwctx->queue_family_index;
1477 comp_index = hwctx->queue_family_comp_index;
1478 tx_index = hwctx->queue_family_tx_index;
1479 enc_index = hwctx->queue_family_encode_index;
1480 dec_index = hwctx->queue_family_decode_index;
1481
1482 #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
1483 do { \
1484 if (ctx_qf < 0 && required) { \
1485 av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
1486 " in the context!\n", type); \
1487 return AVERROR(EINVAL); \
1488 } else if (fidx < 0 || ctx_qf < 0) { \
1489 break; \
1490 } else if (ctx_qf >= queue_num) { \
1491 av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
1492 type, ctx_qf, queue_num); \
1493 return AVERROR(EINVAL); \
1494 } \
1495 \
1496 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
1497 " for%s%s%s%s%s\n", \
1498 ctx_qf, qc, \
1499 ctx_qf == graph_index ? " graphics" : "", \
1500 ctx_qf == comp_index ? " compute" : "", \
1501 ctx_qf == tx_index ? " transfers" : "", \
1502 ctx_qf == enc_index ? " encode" : "", \
1503 ctx_qf == dec_index ? " decode" : ""); \
1504 graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
1505 comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
1506 tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
1507 enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
1508 dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
1509 p->qfs[p->num_qfs++] = ctx_qf; \
1510 } while (0)
1511
1512 CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
1513 CHECK_QUEUE("upload", 1, tx_index, hwctx->queue_family_tx_index, hwctx->nb_tx_queues);
1514 CHECK_QUEUE("compute", 1, comp_index, hwctx->queue_family_comp_index, hwctx->nb_comp_queues);
1515 CHECK_QUEUE("encode", 0, enc_index, hwctx->queue_family_encode_index, hwctx->nb_encode_queues);
1516 CHECK_QUEUE("decode", 0, dec_index, hwctx->queue_family_decode_index, hwctx->nb_decode_queues);
1517
1518 #undef CHECK_QUEUE
1519
1520 /* Get device capabilities */
1521 vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
1522
1523 return 0;
1524 }
1525
vulkan_device_create(AVHWDeviceContext *ctx, const char *device, AVDictionary *opts, int flags)1526 static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
1527 AVDictionary *opts, int flags)
1528 {
1529 VulkanDeviceSelection dev_select = { 0 };
1530 if (device && device[0]) {
1531 char *end = NULL;
1532 dev_select.index = strtol(device, &end, 10);
1533 if (end == device) {
1534 dev_select.index = 0;
1535 dev_select.name = device;
1536 }
1537 }
1538
1539 return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1540 }
1541
vulkan_device_derive(AVHWDeviceContext *ctx, AVHWDeviceContext *src_ctx, AVDictionary *opts, int flags)1542 static int vulkan_device_derive(AVHWDeviceContext *ctx,
1543 AVHWDeviceContext *src_ctx,
1544 AVDictionary *opts, int flags)
1545 {
1546 av_unused VulkanDeviceSelection dev_select = { 0 };
1547
1548 /* If there's only one device on the system, then even if its not covered
1549 * by the following checks (e.g. non-PCIe ARM GPU), having an empty
1550 * dev_select will mean it'll get picked. */
1551 switch(src_ctx->type) {
1552 #if CONFIG_LIBDRM
1553 #if CONFIG_VAAPI
1554 case AV_HWDEVICE_TYPE_VAAPI: {
1555 AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
1556
1557 const char *vendor = vaQueryVendorString(src_hwctx->display);
1558 if (!vendor) {
1559 av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n");
1560 return AVERROR_EXTERNAL;
1561 }
1562
1563 if (strstr(vendor, "Intel"))
1564 dev_select.vendor_id = 0x8086;
1565 if (strstr(vendor, "AMD"))
1566 dev_select.vendor_id = 0x1002;
1567
1568 return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1569 }
1570 #endif
1571 case AV_HWDEVICE_TYPE_DRM: {
1572 AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
1573
1574 drmDevice *drm_dev_info;
1575 int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
1576 if (err) {
1577 av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
1578 return AVERROR_EXTERNAL;
1579 }
1580
1581 if (drm_dev_info->bustype == DRM_BUS_PCI)
1582 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
1583
1584 drmFreeDevice(&drm_dev_info);
1585
1586 return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1587 }
1588 #endif
1589 #if CONFIG_CUDA
1590 case AV_HWDEVICE_TYPE_CUDA: {
1591 AVHWDeviceContext *cuda_cu = src_ctx;
1592 AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
1593 AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal;
1594 CudaFunctions *cu = cu_internal->cuda_dl;
1595
1596 int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1597 cu_internal->cuda_device));
1598 if (ret < 0) {
1599 av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n");
1600 return AVERROR_EXTERNAL;
1601 }
1602
1603 dev_select.has_uuid = 1;
1604
1605 return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
1606 }
1607 #endif
1608 default:
1609 return AVERROR(ENOSYS);
1610 }
1611 }
1612
vulkan_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints)1613 static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
1614 const void *hwconfig,
1615 AVHWFramesConstraints *constraints)
1616 {
1617 int count = 0;
1618 VulkanDevicePriv *p = ctx->internal->priv;
1619
1620 for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1621 count += pixfmt_is_supported(ctx, i, p->use_linear_images);
1622
1623 #if CONFIG_CUDA
1624 if (p->dev_is_nvidia)
1625 count++;
1626 #endif
1627
1628 constraints->valid_sw_formats = av_malloc_array(count + 1,
1629 sizeof(enum AVPixelFormat));
1630 if (!constraints->valid_sw_formats)
1631 return AVERROR(ENOMEM);
1632
1633 count = 0;
1634 for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
1635 if (pixfmt_is_supported(ctx, i, p->use_linear_images))
1636 constraints->valid_sw_formats[count++] = i;
1637
1638 #if CONFIG_CUDA
1639 if (p->dev_is_nvidia)
1640 constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
1641 #endif
1642 constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
1643
1644 constraints->min_width = 0;
1645 constraints->min_height = 0;
1646 constraints->max_width = p->props.properties.limits.maxImageDimension2D;
1647 constraints->max_height = p->props.properties.limits.maxImageDimension2D;
1648
1649 constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
1650 if (!constraints->valid_hw_formats)
1651 return AVERROR(ENOMEM);
1652
1653 constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
1654 constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
1655
1656 return 0;
1657 }
1658
alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, VkMemoryPropertyFlagBits req_flags, const void *alloc_extension, VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)1659 static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
1660 VkMemoryPropertyFlagBits req_flags, const void *alloc_extension,
1661 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1662 {
1663 VkResult ret;
1664 int index = -1;
1665 VulkanDevicePriv *p = ctx->internal->priv;
1666 FFVulkanFunctions *vk = &p->vkfn;
1667 AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
1668 VkMemoryAllocateInfo alloc_info = {
1669 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1670 .pNext = alloc_extension,
1671 .allocationSize = req->size,
1672 };
1673
1674 /* The vulkan spec requires memory types to be sorted in the "optimal"
1675 * order, so the first matching type we find will be the best/fastest one */
1676 for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
1677 const VkMemoryType *type = &p->mprops.memoryTypes[i];
1678
1679 /* The memory type must be supported by the requirements (bitfield) */
1680 if (!(req->memoryTypeBits & (1 << i)))
1681 continue;
1682
1683 /* The memory type flags must include our properties */
1684 if ((type->propertyFlags & req_flags) != req_flags)
1685 continue;
1686
1687 /* The memory type must be large enough */
1688 if (req->size > p->mprops.memoryHeaps[type->heapIndex].size)
1689 continue;
1690
1691 /* Found a suitable memory type */
1692 index = i;
1693 break;
1694 }
1695
1696 if (index < 0) {
1697 av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
1698 req_flags);
1699 return AVERROR(EINVAL);
1700 }
1701
1702 alloc_info.memoryTypeIndex = index;
1703
1704 ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info,
1705 dev_hwctx->alloc, mem);
1706 if (ret != VK_SUCCESS) {
1707 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
1708 vk_ret2str(ret));
1709 return AVERROR(ENOMEM);
1710 }
1711
1712 *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
1713
1714 return 0;
1715 }
1716
vulkan_free_internal(AVVkFrame *f)1717 static void vulkan_free_internal(AVVkFrame *f)
1718 {
1719 AVVkFrameInternal *internal = f->internal;
1720
1721 if (!internal)
1722 return;
1723
1724 #if CONFIG_CUDA
1725 if (internal->cuda_fc_ref) {
1726 AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
1727 int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
1728 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
1729 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
1730 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
1731 CudaFunctions *cu = cu_internal->cuda_dl;
1732
1733 for (int i = 0; i < planes; i++) {
1734 if (internal->cu_sem[i])
1735 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
1736 if (internal->cu_mma[i])
1737 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
1738 if (internal->ext_mem[i])
1739 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i]));
1740 #ifdef _WIN32
1741 if (internal->ext_sem_handle[i])
1742 CloseHandle(internal->ext_sem_handle[i]);
1743 if (internal->ext_mem_handle[i])
1744 CloseHandle(internal->ext_mem_handle[i]);
1745 #endif
1746 }
1747
1748 av_buffer_unref(&internal->cuda_fc_ref);
1749 }
1750 #endif
1751
1752 av_freep(&f->internal);
1753 }
1754
vulkan_frame_free(void *opaque, uint8_t *data)1755 static void vulkan_frame_free(void *opaque, uint8_t *data)
1756 {
1757 AVVkFrame *f = (AVVkFrame *)data;
1758 AVHWFramesContext *hwfc = opaque;
1759 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
1760 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1761 FFVulkanFunctions *vk = &p->vkfn;
1762 int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1763
1764 /* We could use vkWaitSemaphores, but the validation layer seems to have
1765 * issues tracking command buffer execution state on uninit. */
1766 vk->DeviceWaitIdle(hwctx->act_dev);
1767
1768 vulkan_free_internal(f);
1769
1770 for (int i = 0; i < planes; i++) {
1771 vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
1772 vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
1773 vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
1774 }
1775
1776 av_free(f);
1777 }
1778
alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, void *alloc_pnext, size_t alloc_pnext_stride)1779 static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
1780 void *alloc_pnext, size_t alloc_pnext_stride)
1781 {
1782 int err;
1783 VkResult ret;
1784 AVHWDeviceContext *ctx = hwfc->device_ctx;
1785 VulkanDevicePriv *p = ctx->internal->priv;
1786 FFVulkanFunctions *vk = &p->vkfn;
1787 AVVulkanFramesContext *hwfctx = hwfc->hwctx;
1788 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1789 VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
1790
1791 VkMemoryRequirements cont_memory_requirements = { 0 };
1792 int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 };
1793 int cont_mem_size = 0;
1794
1795 AVVulkanDeviceContext *hwctx = ctx->hwctx;
1796
1797 for (int i = 0; i < planes; i++) {
1798 int use_ded_mem;
1799 VkImageMemoryRequirementsInfo2 req_desc = {
1800 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1801 .image = f->img[i],
1802 };
1803 VkMemoryDedicatedAllocateInfo ded_alloc = {
1804 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1805 .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
1806 };
1807 VkMemoryDedicatedRequirements ded_req = {
1808 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1809 };
1810 VkMemoryRequirements2 req = {
1811 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1812 .pNext = &ded_req,
1813 };
1814
1815 vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
1816
1817 if (f->tiling == VK_IMAGE_TILING_LINEAR)
1818 req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
1819 p->props.properties.limits.minMemoryMapAlignment);
1820
1821 if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
1822 if (ded_req.requiresDedicatedAllocation) {
1823 av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, "
1824 "device requires dedicated image allocation!\n");
1825 return AVERROR(EINVAL);
1826 } else if (!i) {
1827 cont_memory_requirements = req.memoryRequirements;
1828 } else if (cont_memory_requirements.memoryTypeBits !=
1829 req.memoryRequirements.memoryTypeBits) {
1830 av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 "
1831 "and %i, cannot allocate in a single region!\n",
1832 i);
1833 return AVERROR(EINVAL);
1834 }
1835
1836 cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size,
1837 req.memoryRequirements.alignment);
1838 cont_mem_size += cont_mem_size_list[i];
1839 continue;
1840 }
1841
1842 /* In case the implementation prefers/requires dedicated allocation */
1843 use_ded_mem = ded_req.prefersDedicatedAllocation |
1844 ded_req.requiresDedicatedAllocation;
1845 if (use_ded_mem)
1846 ded_alloc.image = f->img[i];
1847
1848 /* Allocate memory */
1849 if ((err = alloc_mem(ctx, &req.memoryRequirements,
1850 f->tiling == VK_IMAGE_TILING_LINEAR ?
1851 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1852 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1853 use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
1854 &f->flags, &f->mem[i])))
1855 return err;
1856
1857 f->size[i] = req.memoryRequirements.size;
1858 bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1859 bind_info[i].image = f->img[i];
1860 bind_info[i].memory = f->mem[i];
1861 }
1862
1863 if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
1864 cont_memory_requirements.size = cont_mem_size;
1865
1866 /* Allocate memory */
1867 if ((err = alloc_mem(ctx, &cont_memory_requirements,
1868 f->tiling == VK_IMAGE_TILING_LINEAR ?
1869 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1870 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1871 (void *)(((uint8_t *)alloc_pnext)),
1872 &f->flags, &f->mem[0])))
1873 return err;
1874
1875 f->size[0] = cont_memory_requirements.size;
1876
1877 for (int i = 0, offset = 0; i < planes; i++) {
1878 bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1879 bind_info[i].image = f->img[i];
1880 bind_info[i].memory = f->mem[0];
1881 bind_info[i].memoryOffset = offset;
1882
1883 f->offset[i] = bind_info[i].memoryOffset;
1884 offset += cont_mem_size_list[i];
1885 }
1886 }
1887
1888 /* Bind the allocated memory to the images */
1889 ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info);
1890 if (ret != VK_SUCCESS) {
1891 av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
1892 vk_ret2str(ret));
1893 return AVERROR_EXTERNAL;
1894 }
1895
1896 return 0;
1897 }
1898
1899 enum PrepMode {
1900 PREP_MODE_WRITE,
1901 PREP_MODE_EXTERNAL_EXPORT,
1902 PREP_MODE_EXTERNAL_IMPORT
1903 };
1904
prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, AVVkFrame *frame, enum PrepMode pmode)1905 static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
1906 AVVkFrame *frame, enum PrepMode pmode)
1907 {
1908 int err;
1909 uint32_t src_qf, dst_qf;
1910 VkImageLayout new_layout;
1911 VkAccessFlags new_access;
1912 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
1913 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
1914 FFVulkanFunctions *vk = &p->vkfn;
1915 uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
1916
1917 VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
1918
1919 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
1920 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
1921 .pSignalSemaphoreValues = sem_sig_val,
1922 .signalSemaphoreValueCount = planes,
1923 };
1924
1925 VkSubmitInfo s_info = {
1926 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1927 .pNext = &s_timeline_sem_info,
1928 .pSignalSemaphores = frame->sem,
1929 .signalSemaphoreCount = planes,
1930 };
1931
1932 VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
1933 for (int i = 0; i < planes; i++) {
1934 wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1935 sem_sig_val[i] = frame->sem_value[i] + 1;
1936 }
1937
1938 switch (pmode) {
1939 case PREP_MODE_WRITE:
1940 new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1941 new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1942 src_qf = VK_QUEUE_FAMILY_IGNORED;
1943 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1944 break;
1945 case PREP_MODE_EXTERNAL_IMPORT:
1946 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1947 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1948 src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1949 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1950 s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
1951 s_timeline_sem_info.waitSemaphoreValueCount = planes;
1952 s_info.pWaitSemaphores = frame->sem;
1953 s_info.pWaitDstStageMask = wait_st;
1954 s_info.waitSemaphoreCount = planes;
1955 break;
1956 case PREP_MODE_EXTERNAL_EXPORT:
1957 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1958 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1959 src_qf = VK_QUEUE_FAMILY_IGNORED;
1960 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1961 s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
1962 s_timeline_sem_info.waitSemaphoreValueCount = planes;
1963 s_info.pWaitSemaphores = frame->sem;
1964 s_info.pWaitDstStageMask = wait_st;
1965 s_info.waitSemaphoreCount = planes;
1966 break;
1967 }
1968
1969 if ((err = wait_start_exec_ctx(hwfc, ectx)))
1970 return err;
1971
1972 /* Change the image layout to something more optimal for writes.
1973 * This also signals the newly created semaphore, making it usable
1974 * for synchronization */
1975 for (int i = 0; i < planes; i++) {
1976 img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1977 img_bar[i].srcAccessMask = 0x0;
1978 img_bar[i].dstAccessMask = new_access;
1979 img_bar[i].oldLayout = frame->layout[i];
1980 img_bar[i].newLayout = new_layout;
1981 img_bar[i].srcQueueFamilyIndex = src_qf;
1982 img_bar[i].dstQueueFamilyIndex = dst_qf;
1983 img_bar[i].image = frame->img[i];
1984 img_bar[i].subresourceRange.levelCount = 1;
1985 img_bar[i].subresourceRange.layerCount = 1;
1986 img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1987
1988 frame->layout[i] = img_bar[i].newLayout;
1989 frame->access[i] = img_bar[i].dstAccessMask;
1990 }
1991
1992 vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
1993 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1994 VK_PIPELINE_STAGE_TRANSFER_BIT,
1995 0, 0, NULL, 0, NULL, planes, img_bar);
1996
1997 return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
1998 }
1999
get_plane_wh(int *w, int *h, enum AVPixelFormat format, int frame_w, int frame_h, int plane)2000 static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
2001 int frame_w, int frame_h, int plane)
2002 {
2003 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
2004
2005 /* Currently always true unless gray + alpha support is added */
2006 if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB ||
2007 !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) {
2008 *w = frame_w;
2009 *h = frame_h;
2010 return;
2011 }
2012
2013 *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w);
2014 *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h);
2015 }
2016
create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, VkImageTiling tiling, VkImageUsageFlagBits usage, void *create_pnext)2017 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
2018 VkImageTiling tiling, VkImageUsageFlagBits usage,
2019 void *create_pnext)
2020 {
2021 int err;
2022 VkResult ret;
2023 AVHWDeviceContext *ctx = hwfc->device_ctx;
2024 VulkanDevicePriv *p = ctx->internal->priv;
2025 FFVulkanFunctions *vk = &p->vkfn;
2026 AVVulkanDeviceContext *hwctx = ctx->hwctx;
2027 enum AVPixelFormat format = hwfc->sw_format;
2028 const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
2029 const int planes = av_pix_fmt_count_planes(format);
2030
2031 VkExportSemaphoreCreateInfo ext_sem_info = {
2032 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
2033 #ifdef _WIN32
2034 .handleTypes = IsWindows8OrGreater()
2035 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
2036 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
2037 #else
2038 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2039 #endif
2040 };
2041
2042 VkSemaphoreTypeCreateInfo sem_type_info = {
2043 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2044 #ifdef _WIN32
2045 .pNext = p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
2046 #else
2047 .pNext = p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
2048 #endif
2049 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2050 .initialValue = 0,
2051 };
2052
2053 VkSemaphoreCreateInfo sem_spawn = {
2054 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2055 .pNext = &sem_type_info,
2056 };
2057
2058 AVVkFrame *f = av_vk_frame_alloc();
2059 if (!f) {
2060 av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
2061 return AVERROR(ENOMEM);
2062 }
2063
2064 /* Create the images */
2065 for (int i = 0; i < planes; i++) {
2066 VkImageCreateInfo create_info = {
2067 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2068 .pNext = create_pnext,
2069 .imageType = VK_IMAGE_TYPE_2D,
2070 .format = img_fmts[i],
2071 .extent.depth = 1,
2072 .mipLevels = 1,
2073 .arrayLayers = 1,
2074 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2075 .tiling = tiling,
2076 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2077 .usage = usage,
2078 .samples = VK_SAMPLE_COUNT_1_BIT,
2079 .pQueueFamilyIndices = p->qfs,
2080 .queueFamilyIndexCount = p->num_qfs,
2081 .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2082 VK_SHARING_MODE_EXCLUSIVE,
2083 };
2084
2085 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2086 format, hwfc->width, hwfc->height, i);
2087
2088 ret = vk->CreateImage(hwctx->act_dev, &create_info,
2089 hwctx->alloc, &f->img[i]);
2090 if (ret != VK_SUCCESS) {
2091 av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
2092 vk_ret2str(ret));
2093 err = AVERROR(EINVAL);
2094 goto fail;
2095 }
2096
2097 /* Create semaphore */
2098 ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
2099 hwctx->alloc, &f->sem[i]);
2100 if (ret != VK_SUCCESS) {
2101 av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
2102 vk_ret2str(ret));
2103 return AVERROR_EXTERNAL;
2104 }
2105
2106 f->layout[i] = create_info.initialLayout;
2107 f->access[i] = 0x0;
2108 f->sem_value[i] = 0;
2109 }
2110
2111 f->flags = 0x0;
2112 f->tiling = tiling;
2113
2114 *frame = f;
2115 return 0;
2116
2117 fail:
2118 vulkan_frame_free(hwfc, (uint8_t *)f);
2119 return err;
2120 }
2121
2122 /* Checks if an export flag is enabled, and if it is ORs it with *iexp */
try_export_flags(AVHWFramesContext *hwfc, VkExternalMemoryHandleTypeFlags *comp_handle_types, VkExternalMemoryHandleTypeFlagBits *iexp, VkExternalMemoryHandleTypeFlagBits exp)2123 static void try_export_flags(AVHWFramesContext *hwfc,
2124 VkExternalMemoryHandleTypeFlags *comp_handle_types,
2125 VkExternalMemoryHandleTypeFlagBits *iexp,
2126 VkExternalMemoryHandleTypeFlagBits exp)
2127 {
2128 VkResult ret;
2129 AVVulkanFramesContext *hwctx = hwfc->hwctx;
2130 AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
2131 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2132 FFVulkanFunctions *vk = &p->vkfn;
2133
2134 const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
2135 vk_find_struct(hwctx->create_pnext,
2136 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2137 int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
2138 int nb_mods;
2139
2140 VkExternalImageFormatProperties eprops = {
2141 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2142 };
2143 VkImageFormatProperties2 props = {
2144 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2145 .pNext = &eprops,
2146 };
2147 VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
2148 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2149 .pNext = NULL,
2150 .pQueueFamilyIndices = p->qfs,
2151 .queueFamilyIndexCount = p->num_qfs,
2152 .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2153 VK_SHARING_MODE_EXCLUSIVE,
2154 };
2155 VkPhysicalDeviceExternalImageFormatInfo enext = {
2156 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2157 .handleType = exp,
2158 .pNext = has_mods ? &phy_dev_mod_info : NULL,
2159 };
2160 VkPhysicalDeviceImageFormatInfo2 pinfo = {
2161 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2162 .pNext = !exp ? NULL : &enext,
2163 .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0],
2164 .type = VK_IMAGE_TYPE_2D,
2165 .tiling = hwctx->tiling,
2166 .usage = hwctx->usage,
2167 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2168 };
2169
2170 nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
2171 for (int i = 0; i < nb_mods; i++) {
2172 if (has_mods)
2173 phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[i];
2174
2175 ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
2176 &pinfo, &props);
2177
2178 if (ret == VK_SUCCESS) {
2179 *iexp |= exp;
2180 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
2181 }
2182 }
2183 }
2184
vulkan_pool_alloc(void *opaque, size_t size)2185 static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
2186 {
2187 int err;
2188 AVVkFrame *f;
2189 AVBufferRef *avbuf = NULL;
2190 AVHWFramesContext *hwfc = opaque;
2191 AVVulkanFramesContext *hwctx = hwfc->hwctx;
2192 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2193 VulkanFramesPriv *fp = hwfc->internal->priv;
2194 VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
2195 VkExternalMemoryHandleTypeFlags e = 0x0;
2196
2197 VkExternalMemoryImageCreateInfo eiinfo = {
2198 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2199 .pNext = hwctx->create_pnext,
2200 };
2201
2202 #ifdef _WIN32
2203 if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
2204 try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
2205 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
2206 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
2207 #else
2208 if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
2209 try_export_flags(hwfc, &eiinfo.handleTypes, &e,
2210 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
2211
2212 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
2213 try_export_flags(hwfc, &eiinfo.handleTypes, &e,
2214 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2215 #endif
2216
2217 for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
2218 eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
2219 eminfo[i].pNext = hwctx->alloc_pnext[i];
2220 eminfo[i].handleTypes = e;
2221 }
2222
2223 err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
2224 eiinfo.handleTypes ? &eiinfo : NULL);
2225 if (err)
2226 return NULL;
2227
2228 err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
2229 if (err)
2230 goto fail;
2231
2232 err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
2233 if (err)
2234 goto fail;
2235
2236 avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
2237 vulkan_frame_free, hwfc, 0);
2238 if (!avbuf)
2239 goto fail;
2240
2241 return avbuf;
2242
2243 fail:
2244 vulkan_frame_free(hwfc, (uint8_t *)f);
2245 return NULL;
2246 }
2247
vulkan_frames_uninit(AVHWFramesContext *hwfc)2248 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
2249 {
2250 VulkanFramesPriv *fp = hwfc->internal->priv;
2251
2252 if (fp->modifier_info) {
2253 if (fp->modifier_info->pDrmFormatModifiers)
2254 av_freep(&fp->modifier_info->pDrmFormatModifiers);
2255 av_freep(&fp->modifier_info);
2256 }
2257
2258 free_exec_ctx(hwfc, &fp->conv_ctx);
2259 free_exec_ctx(hwfc, &fp->upload_ctx);
2260 free_exec_ctx(hwfc, &fp->download_ctx);
2261 }
2262
vulkan_frames_init(AVHWFramesContext *hwfc)2263 static int vulkan_frames_init(AVHWFramesContext *hwfc)
2264 {
2265 int err;
2266 AVVkFrame *f;
2267 AVVulkanFramesContext *hwctx = hwfc->hwctx;
2268 VulkanFramesPriv *fp = hwfc->internal->priv;
2269 AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
2270 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2271 const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
2272 const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS);
2273
2274 /* Default tiling flags */
2275 hwctx->tiling = hwctx->tiling ? hwctx->tiling :
2276 has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
2277 p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
2278 VK_IMAGE_TILING_OPTIMAL;
2279
2280 if (!hwctx->usage)
2281 hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
2282
2283 if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) {
2284 if (p->contiguous_planes == 1 ||
2285 ((p->contiguous_planes == -1) && p->dev_is_intel))
2286 hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY;
2287 }
2288
2289 modifier_info = vk_find_struct(hwctx->create_pnext,
2290 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2291
2292 /* Get the supported modifiers if the user has not given any. */
2293 if (has_modifiers && !modifier_info) {
2294 const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
2295 VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
2296 FFVulkanFunctions *vk = &p->vkfn;
2297 VkDrmFormatModifierPropertiesEXT *mod_props;
2298 uint64_t *modifiers;
2299 int modifier_count = 0;
2300
2301 VkDrmFormatModifierPropertiesListEXT mod_props_list = {
2302 .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
2303 .pNext = NULL,
2304 .drmFormatModifierCount = 0,
2305 .pDrmFormatModifierProperties = NULL,
2306 };
2307 VkFormatProperties2 prop = {
2308 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
2309 .pNext = &mod_props_list,
2310 };
2311
2312 /* Get all supported modifiers */
2313 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
2314
2315 if (!mod_props_list.drmFormatModifierCount) {
2316 av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n");
2317 return AVERROR(EINVAL);
2318 }
2319
2320 /* Createa structure to hold the modifier list info */
2321 modifier_info = av_mallocz(sizeof(*modifier_info));
2322 if (!modifier_info)
2323 return AVERROR(ENOMEM);
2324
2325 modifier_info->pNext = NULL;
2326 modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
2327
2328 /* Add structure to the image creation pNext chain */
2329 if (!hwctx->create_pnext)
2330 hwctx->create_pnext = modifier_info;
2331 else
2332 vk_link_struct(hwctx->create_pnext, (void *)modifier_info);
2333
2334 /* Backup the allocated struct to be freed later */
2335 fp->modifier_info = modifier_info;
2336
2337 /* Allocate list of modifiers */
2338 modifiers = av_mallocz(mod_props_list.drmFormatModifierCount *
2339 sizeof(*modifiers));
2340 if (!modifiers)
2341 return AVERROR(ENOMEM);
2342
2343 modifier_info->pDrmFormatModifiers = modifiers;
2344
2345 /* Allocate a temporary list to hold all modifiers supported */
2346 mod_props = av_mallocz(mod_props_list.drmFormatModifierCount *
2347 sizeof(*mod_props));
2348 if (!mod_props)
2349 return AVERROR(ENOMEM);
2350
2351 mod_props_list.pDrmFormatModifierProperties = mod_props;
2352
2353 /* Finally get all modifiers from the device */
2354 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
2355
2356 /* Reject any modifiers that don't match our requirements */
2357 for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) {
2358 if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage))
2359 continue;
2360
2361 modifiers[modifier_count++] = mod_props[i].drmFormatModifier;
2362 }
2363
2364 if (!modifier_count) {
2365 av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports"
2366 " the usage flags!\n");
2367 av_freep(&mod_props);
2368 return AVERROR(EINVAL);
2369 }
2370
2371 modifier_info->drmFormatModifierCount = modifier_count;
2372 av_freep(&mod_props);
2373 }
2374
2375 err = create_exec_ctx(hwfc, &fp->conv_ctx,
2376 dev_hwctx->queue_family_comp_index,
2377 dev_hwctx->nb_comp_queues);
2378 if (err)
2379 return err;
2380
2381 err = create_exec_ctx(hwfc, &fp->upload_ctx,
2382 dev_hwctx->queue_family_tx_index,
2383 dev_hwctx->nb_tx_queues);
2384 if (err)
2385 return err;
2386
2387 err = create_exec_ctx(hwfc, &fp->download_ctx,
2388 dev_hwctx->queue_family_tx_index, 1);
2389 if (err)
2390 return err;
2391
2392 /* Test to see if allocation will fail */
2393 err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
2394 hwctx->create_pnext);
2395 if (err)
2396 return err;
2397
2398 vulkan_frame_free(hwfc, (uint8_t *)f);
2399
2400 /* If user did not specify a pool, hwfc->pool will be set to the internal one
2401 * in hwcontext.c just after this gets called */
2402 if (!hwfc->pool) {
2403 hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
2404 hwfc, vulkan_pool_alloc,
2405 NULL);
2406 if (!hwfc->internal->pool_internal)
2407 return AVERROR(ENOMEM);
2408 }
2409
2410 return 0;
2411 }
2412
vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)2413 static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
2414 {
2415 frame->buf[0] = av_buffer_pool_get(hwfc->pool);
2416 if (!frame->buf[0])
2417 return AVERROR(ENOMEM);
2418
2419 frame->data[0] = frame->buf[0]->data;
2420 frame->format = AV_PIX_FMT_VULKAN;
2421 frame->width = hwfc->width;
2422 frame->height = hwfc->height;
2423
2424 return 0;
2425 }
2426
vulkan_transfer_get_formats(AVHWFramesContext *hwfc, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats)2427 static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
2428 enum AVHWFrameTransferDirection dir,
2429 enum AVPixelFormat **formats)
2430 {
2431 enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts));
2432 if (!fmts)
2433 return AVERROR(ENOMEM);
2434
2435 fmts[0] = hwfc->sw_format;
2436 fmts[1] = AV_PIX_FMT_NONE;
2437
2438 *formats = fmts;
2439 return 0;
2440 }
2441
2442 typedef struct VulkanMapping {
2443 AVVkFrame *frame;
2444 int flags;
2445 } VulkanMapping;
2446
vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)2447 static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2448 {
2449 VulkanMapping *map = hwmap->priv;
2450 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2451 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2452 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2453 FFVulkanFunctions *vk = &p->vkfn;
2454
2455 /* Check if buffer needs flushing */
2456 if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
2457 !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2458 VkResult ret;
2459 VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
2460
2461 for (int i = 0; i < planes; i++) {
2462 flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2463 flush_ranges[i].memory = map->frame->mem[i];
2464 flush_ranges[i].size = VK_WHOLE_SIZE;
2465 }
2466
2467 ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, planes,
2468 flush_ranges);
2469 if (ret != VK_SUCCESS) {
2470 av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
2471 vk_ret2str(ret));
2472 }
2473 }
2474
2475 for (int i = 0; i < planes; i++)
2476 vk->UnmapMemory(hwctx->act_dev, map->frame->mem[i]);
2477
2478 av_free(map);
2479 }
2480
vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags)2481 static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
2482 const AVFrame *src, int flags)
2483 {
2484 VkResult ret;
2485 int err, mapped_mem_count = 0, mem_planes = 0;
2486 AVVkFrame *f = (AVVkFrame *)src->data[0];
2487 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2488 AVVulkanFramesContext *hwfctx = hwfc->hwctx;
2489 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2490 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2491 FFVulkanFunctions *vk = &p->vkfn;
2492
2493 VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
2494 if (!map)
2495 return AVERROR(EINVAL);
2496
2497 if (src->format != AV_PIX_FMT_VULKAN) {
2498 av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
2499 av_get_pix_fmt_name(src->format));
2500 err = AVERROR(EINVAL);
2501 goto fail;
2502 }
2503
2504 if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
2505 !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
2506 av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
2507 "and linear!\n");
2508 err = AVERROR(EINVAL);
2509 goto fail;
2510 }
2511
2512 dst->width = src->width;
2513 dst->height = src->height;
2514
2515 mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes;
2516 for (int i = 0; i < mem_planes; i++) {
2517 ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0,
2518 VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
2519 if (ret != VK_SUCCESS) {
2520 av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
2521 vk_ret2str(ret));
2522 err = AVERROR_EXTERNAL;
2523 goto fail;
2524 }
2525 mapped_mem_count++;
2526 }
2527
2528 if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
2529 for (int i = 0; i < planes; i++)
2530 dst->data[i] = dst->data[0] + f->offset[i];
2531 }
2532
2533 /* Check if the memory contents matter */
2534 if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
2535 !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2536 VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
2537 for (int i = 0; i < planes; i++) {
2538 map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2539 map_mem_ranges[i].size = VK_WHOLE_SIZE;
2540 map_mem_ranges[i].memory = f->mem[i];
2541 }
2542
2543 ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes,
2544 map_mem_ranges);
2545 if (ret != VK_SUCCESS) {
2546 av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
2547 vk_ret2str(ret));
2548 err = AVERROR_EXTERNAL;
2549 goto fail;
2550 }
2551 }
2552
2553 for (int i = 0; i < planes; i++) {
2554 VkImageSubresource sub = {
2555 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2556 };
2557 VkSubresourceLayout layout;
2558 vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
2559 dst->linesize[i] = layout.rowPitch;
2560 }
2561
2562 map->frame = f;
2563 map->flags = flags;
2564
2565 err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
2566 &vulkan_unmap_frame, map);
2567 if (err < 0)
2568 goto fail;
2569
2570 return 0;
2571
2572 fail:
2573 for (int i = 0; i < mapped_mem_count; i++)
2574 vk->UnmapMemory(hwctx->act_dev, f->mem[i]);
2575
2576 av_free(map);
2577 return err;
2578 }
2579
2580 #if CONFIG_LIBDRM
vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)2581 static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
2582 {
2583 AVVkFrame *f = hwmap->priv;
2584 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
2585 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2586 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
2587 FFVulkanFunctions *vk = &p->vkfn;
2588
2589 VkSemaphoreWaitInfo wait_info = {
2590 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
2591 .flags = 0x0,
2592 .pSemaphores = f->sem,
2593 .pValues = f->sem_value,
2594 .semaphoreCount = planes,
2595 };
2596
2597 vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
2598
2599 vulkan_free_internal(f);
2600
2601 for (int i = 0; i < planes; i++) {
2602 vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
2603 vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
2604 vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
2605 }
2606
2607 av_free(f);
2608 }
2609
2610 static const struct {
2611 uint32_t drm_fourcc;
2612 VkFormat vk_format;
2613 } vulkan_drm_format_map[] = {
2614 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
2615 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
2616 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
2617 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
2618 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
2619 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
2620 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2621 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2622 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2623 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2624 };
2625
drm_to_vulkan_fmt(uint32_t drm_fourcc)2626 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
2627 {
2628 for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
2629 if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc)
2630 return vulkan_drm_format_map[i].vk_format;
2631 return VK_FORMAT_UNDEFINED;
2632 }
2633
vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame, const AVFrame *src)2634 static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame,
2635 const AVFrame *src)
2636 {
2637 int err = 0;
2638 VkResult ret;
2639 AVVkFrame *f;
2640 int bind_counts = 0;
2641 AVHWDeviceContext *ctx = hwfc->device_ctx;
2642 AVVulkanDeviceContext *hwctx = ctx->hwctx;
2643 VulkanDevicePriv *p = ctx->internal->priv;
2644 FFVulkanFunctions *vk = &p->vkfn;
2645 VulkanFramesPriv *fp = hwfc->internal->priv;
2646 const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
2647 VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
2648 VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES];
2649
2650 for (int i = 0; i < desc->nb_layers; i++) {
2651 if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) {
2652 av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n",
2653 desc->layers[i].format);
2654 return AVERROR(EINVAL);
2655 }
2656 }
2657
2658 if (!(f = av_vk_frame_alloc())) {
2659 av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
2660 err = AVERROR(ENOMEM);
2661 goto fail;
2662 }
2663
2664 f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
2665
2666 for (int i = 0; i < desc->nb_layers; i++) {
2667 const int planes = desc->layers[i].nb_planes;
2668
2669 /* Semaphore */
2670 VkSemaphoreTypeCreateInfo sem_type_info = {
2671 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2672 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2673 .initialValue = 0,
2674 };
2675 VkSemaphoreCreateInfo sem_spawn = {
2676 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2677 .pNext = &sem_type_info,
2678 };
2679
2680 /* Image creation */
2681 VkSubresourceLayout ext_img_layouts[AV_DRM_MAX_PLANES];
2682 VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
2683 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
2684 .drmFormatModifier = desc->objects[0].format_modifier,
2685 .drmFormatModifierPlaneCount = planes,
2686 .pPlaneLayouts = (const VkSubresourceLayout *)&ext_img_layouts,
2687 };
2688 VkExternalMemoryImageCreateInfo ext_img_spec = {
2689 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2690 .pNext = &ext_img_mod_spec,
2691 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2692 };
2693 VkImageCreateInfo create_info = {
2694 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2695 .pNext = &ext_img_spec,
2696 .imageType = VK_IMAGE_TYPE_2D,
2697 .format = drm_to_vulkan_fmt(desc->layers[i].format),
2698 .extent.depth = 1,
2699 .mipLevels = 1,
2700 .arrayLayers = 1,
2701 .flags = 0x0, /* ALIAS flag is implicit for imported images */
2702 .tiling = f->tiling,
2703 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
2704 .usage = VK_IMAGE_USAGE_SAMPLED_BIT |
2705 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2706 .samples = VK_SAMPLE_COUNT_1_BIT,
2707 .pQueueFamilyIndices = p->qfs,
2708 .queueFamilyIndexCount = p->num_qfs,
2709 .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2710 VK_SHARING_MODE_EXCLUSIVE,
2711 };
2712
2713 /* Image format verification */
2714 VkExternalImageFormatProperties ext_props = {
2715 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2716 };
2717 VkImageFormatProperties2 props_ret = {
2718 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2719 .pNext = &ext_props,
2720 };
2721 VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
2722 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2723 .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
2724 .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
2725 .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
2726 .sharingMode = create_info.sharingMode,
2727 };
2728 VkPhysicalDeviceExternalImageFormatInfo props_ext = {
2729 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2730 .pNext = &props_drm_mod,
2731 .handleType = ext_img_spec.handleTypes,
2732 };
2733 VkPhysicalDeviceImageFormatInfo2 fmt_props = {
2734 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2735 .pNext = &props_ext,
2736 .format = create_info.format,
2737 .type = create_info.imageType,
2738 .tiling = create_info.tiling,
2739 .usage = create_info.usage,
2740 .flags = create_info.flags,
2741 };
2742
2743 /* Check if importing is possible for this combination of parameters */
2744 ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->phys_dev,
2745 &fmt_props, &props_ret);
2746 if (ret != VK_SUCCESS) {
2747 av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n",
2748 vk_ret2str(ret));
2749 err = AVERROR_EXTERNAL;
2750 goto fail;
2751 }
2752
2753 /* Set the image width/height */
2754 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2755 hwfc->sw_format, src->width, src->height, i);
2756
2757 /* Set the subresource layout based on the layer properties */
2758 for (int j = 0; j < planes; j++) {
2759 ext_img_layouts[j].offset = desc->layers[i].planes[j].offset;
2760 ext_img_layouts[j].rowPitch = desc->layers[i].planes[j].pitch;
2761 ext_img_layouts[j].size = 0; /* The specs say so for all 3 */
2762 ext_img_layouts[j].arrayPitch = 0;
2763 ext_img_layouts[j].depthPitch = 0;
2764 }
2765
2766 /* Create image */
2767 ret = vk->CreateImage(hwctx->act_dev, &create_info,
2768 hwctx->alloc, &f->img[i]);
2769 if (ret != VK_SUCCESS) {
2770 av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
2771 vk_ret2str(ret));
2772 err = AVERROR(EINVAL);
2773 goto fail;
2774 }
2775
2776 ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
2777 hwctx->alloc, &f->sem[i]);
2778 if (ret != VK_SUCCESS) {
2779 av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
2780 vk_ret2str(ret));
2781 return AVERROR_EXTERNAL;
2782 }
2783
2784 /* We'd import a semaphore onto the one we created using
2785 * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
2786 * offer us anything we could import and sync with, so instead
2787 * just signal the semaphore we created. */
2788
2789 f->layout[i] = create_info.initialLayout;
2790 f->access[i] = 0x0;
2791 f->sem_value[i] = 0;
2792 }
2793
2794 for (int i = 0; i < desc->nb_objects; i++) {
2795 /* Memory requirements */
2796 VkImageMemoryRequirementsInfo2 req_desc = {
2797 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2798 .image = f->img[i],
2799 };
2800 VkMemoryDedicatedRequirements ded_req = {
2801 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2802 };
2803 VkMemoryRequirements2 req2 = {
2804 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2805 .pNext = &ded_req,
2806 };
2807
2808 /* Allocation/importing */
2809 VkMemoryFdPropertiesKHR fdmp = {
2810 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
2811 };
2812 VkImportMemoryFdInfoKHR idesc = {
2813 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
2814 .fd = dup(desc->objects[i].fd),
2815 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2816 };
2817 VkMemoryDedicatedAllocateInfo ded_alloc = {
2818 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2819 .pNext = &idesc,
2820 .image = req_desc.image,
2821 };
2822
2823 /* Get object properties */
2824 ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev,
2825 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2826 idesc.fd, &fdmp);
2827 if (ret != VK_SUCCESS) {
2828 av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
2829 vk_ret2str(ret));
2830 err = AVERROR_EXTERNAL;
2831 close(idesc.fd);
2832 goto fail;
2833 }
2834
2835 vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2);
2836
2837 /* Only a single bit must be set, not a range, and it must match */
2838 req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
2839
2840 err = alloc_mem(ctx, &req2.memoryRequirements,
2841 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2842 (ded_req.prefersDedicatedAllocation ||
2843 ded_req.requiresDedicatedAllocation) ?
2844 &ded_alloc : ded_alloc.pNext,
2845 &f->flags, &f->mem[i]);
2846 if (err) {
2847 close(idesc.fd);
2848 return err;
2849 }
2850
2851 f->size[i] = req2.memoryRequirements.size;
2852 }
2853
2854 for (int i = 0; i < desc->nb_layers; i++) {
2855 const int planes = desc->layers[i].nb_planes;
2856 for (int j = 0; j < planes; j++) {
2857 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2858 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
2859 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
2860
2861 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
2862 plane_info[bind_counts].pNext = NULL;
2863 plane_info[bind_counts].planeAspect = aspect;
2864
2865 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2866 bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL;
2867 bind_info[bind_counts].image = f->img[i];
2868 bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
2869
2870 /* Offset is already signalled via pPlaneLayouts above */
2871 bind_info[bind_counts].memoryOffset = 0;
2872
2873 bind_counts++;
2874 }
2875 }
2876
2877 /* Bind the allocated memory to the images */
2878 ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
2879 if (ret != VK_SUCCESS) {
2880 av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
2881 vk_ret2str(ret));
2882 err = AVERROR_EXTERNAL;
2883 goto fail;
2884 }
2885
2886 err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_IMPORT);
2887 if (err)
2888 goto fail;
2889
2890 *frame = f;
2891
2892 return 0;
2893
2894 fail:
2895 for (int i = 0; i < desc->nb_layers; i++) {
2896 vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
2897 vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
2898 }
2899 for (int i = 0; i < desc->nb_objects; i++)
2900 vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
2901
2902 av_free(f);
2903
2904 return err;
2905 }
2906
vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags)2907 static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
2908 const AVFrame *src, int flags)
2909 {
2910 int err = 0;
2911 AVVkFrame *f;
2912
2913 if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src)))
2914 return err;
2915
2916 /* The unmapping function will free this */
2917 dst->data[0] = (uint8_t *)f;
2918 dst->width = src->width;
2919 dst->height = src->height;
2920
2921 err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
2922 &vulkan_unmap_from_drm, f);
2923 if (err < 0)
2924 goto fail;
2925
2926 av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
2927
2928 return 0;
2929
2930 fail:
2931 vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
2932 dst->data[0] = NULL;
2933 return err;
2934 }
2935
2936 #if CONFIG_VAAPI
vulkan_map_from_vaapi(AVHWFramesContext *dst_fc, AVFrame *dst, const AVFrame *src, int flags)2937 static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
2938 AVFrame *dst, const AVFrame *src,
2939 int flags)
2940 {
2941 int err;
2942 AVFrame *tmp = av_frame_alloc();
2943 AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
2944 AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx;
2945 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3];
2946
2947 if (!tmp)
2948 return AVERROR(ENOMEM);
2949
2950 /* We have to sync since like the previous comment said, no semaphores */
2951 vaSyncSurface(vaapi_ctx->display, surface_id);
2952
2953 tmp->format = AV_PIX_FMT_DRM_PRIME;
2954
2955 err = av_hwframe_map(tmp, src, flags);
2956 if (err < 0)
2957 goto fail;
2958
2959 err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
2960 if (err < 0)
2961 goto fail;
2962
2963 err = ff_hwframe_map_replace(dst, src);
2964
2965 fail:
2966 av_frame_free(&tmp);
2967 return err;
2968 }
2969 #endif
2970 #endif
2971
2972 #if CONFIG_CUDA
vulkan_export_to_cuda(AVHWFramesContext *hwfc, AVBufferRef *cuda_hwfc, const AVFrame *frame)2973 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
2974 AVBufferRef *cuda_hwfc,
2975 const AVFrame *frame)
2976 {
2977 int err;
2978 VkResult ret;
2979 AVVkFrame *dst_f;
2980 AVVkFrameInternal *dst_int;
2981 AVHWDeviceContext *ctx = hwfc->device_ctx;
2982 AVVulkanDeviceContext *hwctx = ctx->hwctx;
2983 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
2984 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
2985 VulkanDevicePriv *p = ctx->internal->priv;
2986 FFVulkanFunctions *vk = &p->vkfn;
2987
2988 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
2989 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
2990 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
2991 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
2992 CudaFunctions *cu = cu_internal->cuda_dl;
2993 CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
2994 CU_AD_FORMAT_UNSIGNED_INT8;
2995
2996 dst_f = (AVVkFrame *)frame->data[0];
2997
2998 dst_int = dst_f->internal;
2999 if (!dst_int || !dst_int->cuda_fc_ref) {
3000 if (!dst_f->internal)
3001 dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
3002
3003 if (!dst_int)
3004 return AVERROR(ENOMEM);
3005
3006 dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
3007 if (!dst_int->cuda_fc_ref) {
3008 av_freep(&dst_f->internal);
3009 return AVERROR(ENOMEM);
3010 }
3011
3012 for (int i = 0; i < planes; i++) {
3013 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
3014 .offset = 0,
3015 .arrayDesc = {
3016 .Depth = 0,
3017 .Format = cufmt,
3018 .NumChannels = 1 + ((planes == 2) && i),
3019 .Flags = 0,
3020 },
3021 .numLevels = 1,
3022 };
3023 int p_w, p_h;
3024
3025 #ifdef _WIN32
3026 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3027 .type = IsWindows8OrGreater()
3028 ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
3029 : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
3030 .size = dst_f->size[i],
3031 };
3032 VkMemoryGetWin32HandleInfoKHR export_info = {
3033 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
3034 .memory = dst_f->mem[i],
3035 .handleType = IsWindows8OrGreater()
3036 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
3037 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3038 };
3039 VkSemaphoreGetWin32HandleInfoKHR sem_export = {
3040 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
3041 .semaphore = dst_f->sem[i],
3042 .handleType = IsWindows8OrGreater()
3043 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
3044 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3045 };
3046 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3047 .type = 10 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 */,
3048 };
3049
3050 ret = vk->GetMemoryWin32HandleKHR(hwctx->act_dev, &export_info,
3051 &ext_desc.handle.win32.handle);
3052 if (ret != VK_SUCCESS) {
3053 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n",
3054 vk_ret2str(ret));
3055 err = AVERROR_EXTERNAL;
3056 goto fail;
3057 }
3058 dst_int->ext_mem_handle[i] = ext_desc.handle.win32.handle;
3059 #else
3060 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3061 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
3062 .size = dst_f->size[i],
3063 };
3064 VkMemoryGetFdInfoKHR export_info = {
3065 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3066 .memory = dst_f->mem[i],
3067 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
3068 };
3069 VkSemaphoreGetFdInfoKHR sem_export = {
3070 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
3071 .semaphore = dst_f->sem[i],
3072 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
3073 };
3074 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3075 .type = 9 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD */,
3076 };
3077
3078 ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
3079 &ext_desc.handle.fd);
3080 if (ret != VK_SUCCESS) {
3081 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n",
3082 vk_ret2str(ret));
3083 err = AVERROR_EXTERNAL;
3084 goto fail;
3085 }
3086 #endif
3087
3088 ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc));
3089 if (ret < 0) {
3090 #ifndef _WIN32
3091 close(ext_desc.handle.fd);
3092 #endif
3093 err = AVERROR_EXTERNAL;
3094 goto fail;
3095 }
3096
3097 get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
3098 tex_desc.arrayDesc.Width = p_w;
3099 tex_desc.arrayDesc.Height = p_h;
3100
3101 ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i],
3102 dst_int->ext_mem[i],
3103 &tex_desc));
3104 if (ret < 0) {
3105 err = AVERROR_EXTERNAL;
3106 goto fail;
3107 }
3108
3109 ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i],
3110 dst_int->cu_mma[i], 0));
3111 if (ret < 0) {
3112 err = AVERROR_EXTERNAL;
3113 goto fail;
3114 }
3115
3116 #ifdef _WIN32
3117 ret = vk->GetSemaphoreWin32HandleKHR(hwctx->act_dev, &sem_export,
3118 &ext_sem_desc.handle.win32.handle);
3119 #else
3120 ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
3121 &ext_sem_desc.handle.fd);
3122 #endif
3123 if (ret != VK_SUCCESS) {
3124 av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
3125 vk_ret2str(ret));
3126 err = AVERROR_EXTERNAL;
3127 goto fail;
3128 }
3129 #ifdef _WIN32
3130 dst_int->ext_sem_handle[i] = ext_sem_desc.handle.win32.handle;
3131 #endif
3132
3133 ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
3134 &ext_sem_desc));
3135 if (ret < 0) {
3136 #ifndef _WIN32
3137 close(ext_sem_desc.handle.fd);
3138 #endif
3139 err = AVERROR_EXTERNAL;
3140 goto fail;
3141 }
3142 }
3143 }
3144
3145 return 0;
3146
3147 fail:
3148 vulkan_free_internal(dst_f);
3149 return err;
3150 }
3151
vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src)3152 static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
3153 AVFrame *dst, const AVFrame *src)
3154 {
3155 int err;
3156 CUcontext dummy;
3157 AVVkFrame *dst_f;
3158 AVVkFrameInternal *dst_int;
3159 VulkanFramesPriv *fp = hwfc->internal->priv;
3160 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
3161 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
3162
3163 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data;
3164 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
3165 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
3166 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
3167 CudaFunctions *cu = cu_internal->cuda_dl;
3168 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
3169 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
3170
3171 dst_f = (AVVkFrame *)dst->data[0];
3172
3173 err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT);
3174 if (err < 0)
3175 return err;
3176
3177 err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3178 if (err < 0)
3179 return err;
3180
3181 err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst);
3182 if (err < 0) {
3183 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3184 return err;
3185 }
3186
3187 dst_int = dst_f->internal;
3188
3189 for (int i = 0; i < planes; i++) {
3190 s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
3191 s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
3192 }
3193
3194 err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
3195 planes, cuda_dev->stream));
3196 if (err < 0)
3197 goto fail;
3198
3199 for (int i = 0; i < planes; i++) {
3200 CUDA_MEMCPY2D cpy = {
3201 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
3202 .srcDevice = (CUdeviceptr)src->data[i],
3203 .srcPitch = src->linesize[i],
3204 .srcY = 0,
3205
3206 .dstMemoryType = CU_MEMORYTYPE_ARRAY,
3207 .dstArray = dst_int->cu_array[i],
3208 };
3209
3210 int p_w, p_h;
3211 get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i);
3212
3213 cpy.WidthInBytes = p_w * desc->comp[i].step;
3214 cpy.Height = p_h;
3215
3216 err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3217 if (err < 0)
3218 goto fail;
3219 }
3220
3221 err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
3222 planes, cuda_dev->stream));
3223 if (err < 0)
3224 goto fail;
3225
3226 for (int i = 0; i < planes; i++)
3227 dst_f->sem_value[i]++;
3228
3229 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3230
3231 av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
3232
3233 return err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT);
3234
3235 fail:
3236 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
3237 vulkan_free_internal(dst_f);
3238 dst_f->internal = NULL;
3239 av_buffer_unref(&dst->buf[0]);
3240 return err;
3241 }
3242 #endif
3243
vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags)3244 static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
3245 const AVFrame *src, int flags)
3246 {
3247 av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3248
3249 switch (src->format) {
3250 #if CONFIG_LIBDRM
3251 #if CONFIG_VAAPI
3252 case AV_PIX_FMT_VAAPI:
3253 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
3254 return vulkan_map_from_vaapi(hwfc, dst, src, flags);
3255 else
3256 return AVERROR(ENOSYS);
3257 #endif
3258 case AV_PIX_FMT_DRM_PRIME:
3259 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
3260 return vulkan_map_from_drm(hwfc, dst, src, flags);
3261 else
3262 return AVERROR(ENOSYS);
3263 #endif
3264 default:
3265 return AVERROR(ENOSYS);
3266 }
3267 }
3268
3269 #if CONFIG_LIBDRM
3270 typedef struct VulkanDRMMapping {
3271 AVDRMFrameDescriptor drm_desc;
3272 AVVkFrame *source;
3273 } VulkanDRMMapping;
3274
vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)3275 static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
3276 {
3277 AVDRMFrameDescriptor *drm_desc = hwmap->priv;
3278
3279 for (int i = 0; i < drm_desc->nb_objects; i++)
3280 close(drm_desc->objects[i].fd);
3281
3282 av_free(drm_desc);
3283 }
3284
vulkan_fmt_to_drm(VkFormat vkfmt)3285 static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt)
3286 {
3287 for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++)
3288 if (vulkan_drm_format_map[i].vk_format == vkfmt)
3289 return vulkan_drm_format_map[i].drm_fourcc;
3290 return DRM_FORMAT_INVALID;
3291 }
3292
vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags)3293 static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
3294 const AVFrame *src, int flags)
3295 {
3296 int err = 0;
3297 VkResult ret;
3298 AVVkFrame *f = (AVVkFrame *)src->data[0];
3299 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3300 FFVulkanFunctions *vk = &p->vkfn;
3301 VulkanFramesPriv *fp = hwfc->internal->priv;
3302 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
3303 AVVulkanFramesContext *hwfctx = hwfc->hwctx;
3304 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
3305 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
3306 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
3307 };
3308 VkSemaphoreWaitInfo wait_info = {
3309 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
3310 .flags = 0x0,
3311 .semaphoreCount = planes,
3312 };
3313
3314 AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
3315 if (!drm_desc)
3316 return AVERROR(ENOMEM);
3317
3318 err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT);
3319 if (err < 0)
3320 goto end;
3321
3322 /* Wait for the operation to finish so we can cleanly export it. */
3323 wait_info.pSemaphores = f->sem;
3324 wait_info.pValues = f->sem_value;
3325
3326 vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
3327
3328 err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc);
3329 if (err < 0)
3330 goto end;
3331
3332 ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0],
3333 &drm_mod);
3334 if (ret != VK_SUCCESS) {
3335 av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
3336 err = AVERROR_EXTERNAL;
3337 goto end;
3338 }
3339
3340 for (int i = 0; (i < planes) && (f->mem[i]); i++) {
3341 VkMemoryGetFdInfoKHR export_info = {
3342 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3343 .memory = f->mem[i],
3344 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3345 };
3346
3347 ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info,
3348 &drm_desc->objects[i].fd);
3349 if (ret != VK_SUCCESS) {
3350 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
3351 err = AVERROR_EXTERNAL;
3352 goto end;
3353 }
3354
3355 drm_desc->nb_objects++;
3356 drm_desc->objects[i].size = f->size[i];
3357 drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
3358 }
3359
3360 drm_desc->nb_layers = planes;
3361 for (int i = 0; i < drm_desc->nb_layers; i++) {
3362 VkSubresourceLayout layout;
3363 VkImageSubresource sub = {
3364 .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
3365 };
3366 VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i];
3367
3368 drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt);
3369 drm_desc->layers[i].nb_planes = 1;
3370
3371 if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) {
3372 av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n");
3373 err = AVERROR_PATCHWELCOME;
3374 goto end;
3375 }
3376
3377 drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
3378
3379 if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
3380 continue;
3381
3382 vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
3383 drm_desc->layers[i].planes[0].offset = layout.offset;
3384 drm_desc->layers[i].planes[0].pitch = layout.rowPitch;
3385
3386 if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY)
3387 drm_desc->layers[i].planes[0].offset += f->offset[i];
3388 }
3389
3390 dst->width = src->width;
3391 dst->height = src->height;
3392 dst->data[0] = (uint8_t *)drm_desc;
3393
3394 av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n");
3395
3396 return 0;
3397
3398 end:
3399 av_free(drm_desc);
3400 return err;
3401 }
3402
3403 #if CONFIG_VAAPI
vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags)3404 static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst,
3405 const AVFrame *src, int flags)
3406 {
3407 int err;
3408 AVFrame *tmp = av_frame_alloc();
3409 if (!tmp)
3410 return AVERROR(ENOMEM);
3411
3412 tmp->format = AV_PIX_FMT_DRM_PRIME;
3413
3414 err = vulkan_map_to_drm(hwfc, tmp, src, flags);
3415 if (err < 0)
3416 goto fail;
3417
3418 err = av_hwframe_map(dst, tmp, flags);
3419 if (err < 0)
3420 goto fail;
3421
3422 err = ff_hwframe_map_replace(dst, src);
3423
3424 fail:
3425 av_frame_free(&tmp);
3426 return err;
3427 }
3428 #endif
3429 #endif
3430
vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags)3431 static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
3432 const AVFrame *src, int flags)
3433 {
3434 av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3435
3436 switch (dst->format) {
3437 #if CONFIG_LIBDRM
3438 case AV_PIX_FMT_DRM_PRIME:
3439 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
3440 return vulkan_map_to_drm(hwfc, dst, src, flags);
3441 else
3442 return AVERROR(ENOSYS);
3443 #if CONFIG_VAAPI
3444 case AV_PIX_FMT_VAAPI:
3445 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
3446 return vulkan_map_to_vaapi(hwfc, dst, src, flags);
3447 else
3448 return AVERROR(ENOSYS);
3449 #endif
3450 #endif
3451 default:
3452 return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
3453 }
3454 }
3455
3456 typedef struct ImageBuffer {
3457 VkBuffer buf;
3458 VkDeviceMemory mem;
3459 VkMemoryPropertyFlagBits flags;
3460 int mapped_mem;
3461 } ImageBuffer;
3462
free_buf(void *opaque, uint8_t *data)3463 static void free_buf(void *opaque, uint8_t *data)
3464 {
3465 AVHWDeviceContext *ctx = opaque;
3466 AVVulkanDeviceContext *hwctx = ctx->hwctx;
3467 VulkanDevicePriv *p = ctx->internal->priv;
3468 FFVulkanFunctions *vk = &p->vkfn;
3469 ImageBuffer *vkbuf = (ImageBuffer *)data;
3470
3471 if (vkbuf->buf)
3472 vk->DestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc);
3473 if (vkbuf->mem)
3474 vk->FreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc);
3475
3476 av_free(data);
3477 }
3478
get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)3479 static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
3480 {
3481 size_t size;
3482 *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
3483 size = height*(*stride);
3484 size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment);
3485 return size;
3486 }
3487
create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags, size_t size, uint32_t req_memory_bits, int host_mapped, void *create_pnext, void *alloc_pnext)3488 static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
3489 VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
3490 size_t size, uint32_t req_memory_bits, int host_mapped,
3491 void *create_pnext, void *alloc_pnext)
3492 {
3493 int err;
3494 VkResult ret;
3495 int use_ded_mem;
3496 AVVulkanDeviceContext *hwctx = ctx->hwctx;
3497 VulkanDevicePriv *p = ctx->internal->priv;
3498 FFVulkanFunctions *vk = &p->vkfn;
3499
3500 VkBufferCreateInfo buf_spawn = {
3501 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
3502 .pNext = create_pnext,
3503 .usage = usage,
3504 .size = size,
3505 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
3506 };
3507
3508 VkBufferMemoryRequirementsInfo2 req_desc = {
3509 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
3510 };
3511 VkMemoryDedicatedAllocateInfo ded_alloc = {
3512 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
3513 .pNext = alloc_pnext,
3514 };
3515 VkMemoryDedicatedRequirements ded_req = {
3516 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
3517 };
3518 VkMemoryRequirements2 req = {
3519 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
3520 .pNext = &ded_req,
3521 };
3522
3523 ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf));
3524 if (!vkbuf)
3525 return AVERROR(ENOMEM);
3526
3527 vkbuf->mapped_mem = host_mapped;
3528
3529 ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf);
3530 if (ret != VK_SUCCESS) {
3531 av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
3532 vk_ret2str(ret));
3533 err = AVERROR_EXTERNAL;
3534 goto fail;
3535 }
3536
3537 req_desc.buffer = vkbuf->buf;
3538
3539 vk->GetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
3540
3541 /* In case the implementation prefers/requires dedicated allocation */
3542 use_ded_mem = ded_req.prefersDedicatedAllocation |
3543 ded_req.requiresDedicatedAllocation;
3544 if (use_ded_mem)
3545 ded_alloc.buffer = vkbuf->buf;
3546
3547 /* Additional requirements imposed on us */
3548 if (req_memory_bits)
3549 req.memoryRequirements.memoryTypeBits &= req_memory_bits;
3550
3551 err = alloc_mem(ctx, &req.memoryRequirements, flags,
3552 use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
3553 &vkbuf->flags, &vkbuf->mem);
3554 if (err)
3555 goto fail;
3556
3557 ret = vk->BindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0);
3558 if (ret != VK_SUCCESS) {
3559 av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
3560 vk_ret2str(ret));
3561 err = AVERROR_EXTERNAL;
3562 goto fail;
3563 }
3564
3565 *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0);
3566 if (!(*buf)) {
3567 err = AVERROR(ENOMEM);
3568 goto fail;
3569 }
3570
3571 return 0;
3572
3573 fail:
3574 free_buf(ctx, (uint8_t *)vkbuf);
3575 return err;
3576 }
3577
3578 /* Skips mapping of host mapped buffers but still invalidates them */
map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[], int nb_buffers, int invalidate)3579 static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[],
3580 int nb_buffers, int invalidate)
3581 {
3582 VkResult ret;
3583 AVVulkanDeviceContext *hwctx = ctx->hwctx;
3584 VulkanDevicePriv *p = ctx->internal->priv;
3585 FFVulkanFunctions *vk = &p->vkfn;
3586 VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
3587 int invalidate_count = 0;
3588
3589 for (int i = 0; i < nb_buffers; i++) {
3590 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3591 if (vkbuf->mapped_mem)
3592 continue;
3593
3594 ret = vk->MapMemory(hwctx->act_dev, vkbuf->mem, 0,
3595 VK_WHOLE_SIZE, 0, (void **)&mem[i]);
3596 if (ret != VK_SUCCESS) {
3597 av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
3598 vk_ret2str(ret));
3599 return AVERROR_EXTERNAL;
3600 }
3601 }
3602
3603 if (!invalidate)
3604 return 0;
3605
3606 for (int i = 0; i < nb_buffers; i++) {
3607 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3608 const VkMappedMemoryRange ival_buf = {
3609 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3610 .memory = vkbuf->mem,
3611 .size = VK_WHOLE_SIZE,
3612 };
3613
3614 /* For host imported memory Vulkan says to use platform-defined
3615 * sync methods, but doesn't really say not to call flush or invalidate
3616 * on original host pointers. It does explicitly allow to do that on
3617 * host-mapped pointers which are then mapped again using vkMapMemory,
3618 * but known implementations return the original pointers when mapped
3619 * again. */
3620 if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3621 continue;
3622
3623 invalidate_ctx[invalidate_count++] = ival_buf;
3624 }
3625
3626 if (invalidate_count) {
3627 ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
3628 invalidate_ctx);
3629 if (ret != VK_SUCCESS)
3630 av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
3631 vk_ret2str(ret));
3632 }
3633
3634 return 0;
3635 }
3636
unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, int nb_buffers, int flush)3637 static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
3638 int nb_buffers, int flush)
3639 {
3640 int err = 0;
3641 VkResult ret;
3642 AVVulkanDeviceContext *hwctx = ctx->hwctx;
3643 VulkanDevicePriv *p = ctx->internal->priv;
3644 FFVulkanFunctions *vk = &p->vkfn;
3645 VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
3646 int flush_count = 0;
3647
3648 if (flush) {
3649 for (int i = 0; i < nb_buffers; i++) {
3650 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3651 const VkMappedMemoryRange flush_buf = {
3652 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3653 .memory = vkbuf->mem,
3654 .size = VK_WHOLE_SIZE,
3655 };
3656
3657 if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3658 continue;
3659
3660 flush_ctx[flush_count++] = flush_buf;
3661 }
3662 }
3663
3664 if (flush_count) {
3665 ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
3666 if (ret != VK_SUCCESS) {
3667 av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
3668 vk_ret2str(ret));
3669 err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
3670 }
3671 }
3672
3673 for (int i = 0; i < nb_buffers; i++) {
3674 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3675 if (vkbuf->mapped_mem)
3676 continue;
3677
3678 vk->UnmapMemory(hwctx->act_dev, vkbuf->mem);
3679 }
3680
3681 return err;
3682 }
3683
transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, AVBufferRef **bufs, size_t *buf_offsets, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf)3684 static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
3685 AVBufferRef **bufs, size_t *buf_offsets,
3686 const int *buf_stride, int w,
3687 int h, enum AVPixelFormat pix_fmt, int to_buf)
3688 {
3689 int err;
3690 AVVkFrame *frame = (AVVkFrame *)f->data[0];
3691 VulkanFramesPriv *fp = hwfc->internal->priv;
3692 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3693 FFVulkanFunctions *vk = &p->vkfn;
3694
3695 int bar_num = 0;
3696 VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
3697
3698 const int planes = av_pix_fmt_count_planes(pix_fmt);
3699 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
3700
3701 VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
3702 VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx;
3703 VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx);
3704
3705 uint64_t sem_signal_values[AV_NUM_DATA_POINTERS];
3706
3707 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
3708 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
3709 .pWaitSemaphoreValues = frame->sem_value,
3710 .pSignalSemaphoreValues = sem_signal_values,
3711 .waitSemaphoreValueCount = planes,
3712 .signalSemaphoreValueCount = planes,
3713 };
3714
3715 VkSubmitInfo s_info = {
3716 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
3717 .pNext = &s_timeline_sem_info,
3718 .pSignalSemaphores = frame->sem,
3719 .pWaitSemaphores = frame->sem,
3720 .pWaitDstStageMask = sem_wait_dst,
3721 .signalSemaphoreCount = planes,
3722 .waitSemaphoreCount = planes,
3723 };
3724
3725 for (int i = 0; i < planes; i++)
3726 sem_signal_values[i] = frame->sem_value[i] + 1;
3727
3728 if ((err = wait_start_exec_ctx(hwfc, ectx)))
3729 return err;
3730
3731 /* Change the image layout to something more optimal for transfers */
3732 for (int i = 0; i < planes; i++) {
3733 VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
3734 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
3735 VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
3736 VK_ACCESS_TRANSFER_WRITE_BIT;
3737
3738 sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
3739
3740 /* If the layout matches and we have read access skip the barrier */
3741 if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
3742 continue;
3743
3744 img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
3745 img_bar[bar_num].srcAccessMask = 0x0;
3746 img_bar[bar_num].dstAccessMask = new_access;
3747 img_bar[bar_num].oldLayout = frame->layout[i];
3748 img_bar[bar_num].newLayout = new_layout;
3749 img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3750 img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3751 img_bar[bar_num].image = frame->img[i];
3752 img_bar[bar_num].subresourceRange.levelCount = 1;
3753 img_bar[bar_num].subresourceRange.layerCount = 1;
3754 img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3755
3756 frame->layout[i] = img_bar[bar_num].newLayout;
3757 frame->access[i] = img_bar[bar_num].dstAccessMask;
3758
3759 bar_num++;
3760 }
3761
3762 if (bar_num)
3763 vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3764 VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
3765 0, NULL, 0, NULL, bar_num, img_bar);
3766
3767 /* Schedule a copy for each plane */
3768 for (int i = 0; i < planes; i++) {
3769 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
3770 VkBufferImageCopy buf_reg = {
3771 .bufferOffset = buf_offsets[i],
3772 .bufferRowLength = buf_stride[i] / desc->comp[i].step,
3773 .imageSubresource.layerCount = 1,
3774 .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
3775 .imageOffset = { 0, 0, 0, },
3776 };
3777
3778 int p_w, p_h;
3779 get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
3780
3781 buf_reg.bufferImageHeight = p_h;
3782 buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
3783
3784 if (to_buf)
3785 vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
3786 vkbuf->buf, 1, &buf_reg);
3787 else
3788 vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
3789 frame->layout[i], 1, &buf_reg);
3790 }
3791
3792 /* When uploading, do this asynchronously if the source is refcounted by
3793 * keeping the buffers as a submission dependency.
3794 * The hwcontext is guaranteed to not be freed until all frames are freed
3795 * in the frames_unint function.
3796 * When downloading to buffer, do this synchronously and wait for the
3797 * queue submission to finish executing */
3798 if (!to_buf) {
3799 int ref;
3800 for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) {
3801 if (!f->buf[ref])
3802 break;
3803 if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
3804 return err;
3805 }
3806 if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
3807 return err;
3808 return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
3809 } else {
3810 return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
3811 }
3812 }
3813
vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, const AVFrame *swf, int from)3814 static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
3815 const AVFrame *swf, int from)
3816 {
3817 int err = 0;
3818 VkResult ret;
3819 AVVkFrame *f = (AVVkFrame *)vkf->data[0];
3820 AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
3821 AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
3822 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3823 FFVulkanFunctions *vk = &p->vkfn;
3824
3825 AVFrame tmp;
3826 AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
3827 size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
3828
3829 int p_w, p_h;
3830 const int planes = av_pix_fmt_count_planes(swf->format);
3831
3832 int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
3833 const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY);
3834
3835 if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
3836 av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
3837 return AVERROR(EINVAL);
3838 }
3839
3840 if (swf->width > hwfc->width || swf->height > hwfc->height)
3841 return AVERROR(EINVAL);
3842
3843 /* For linear, host visiable images */
3844 if (f->tiling == VK_IMAGE_TILING_LINEAR &&
3845 f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3846 AVFrame *map = av_frame_alloc();
3847 if (!map)
3848 return AVERROR(ENOMEM);
3849 map->format = swf->format;
3850
3851 err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE);
3852 if (err)
3853 return err;
3854
3855 err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf);
3856 av_frame_free(&map);
3857 return err;
3858 }
3859
3860 /* Create buffers */
3861 for (int i = 0; i < planes; i++) {
3862 size_t req_size;
3863
3864 VkExternalMemoryBufferCreateInfo create_desc = {
3865 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
3866 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3867 };
3868
3869 VkImportMemoryHostPointerInfoEXT import_desc = {
3870 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3871 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3872 };
3873
3874 VkMemoryHostPointerPropertiesEXT p_props = {
3875 .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
3876 };
3877
3878 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3879
3880 tmp.linesize[i] = FFABS(swf->linesize[i]);
3881
3882 /* Do not map images with a negative stride */
3883 if (map_host && swf->linesize[i] > 0) {
3884 size_t offs;
3885 offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
3886 import_desc.pHostPointer = swf->data[i] - offs;
3887
3888 /* We have to compensate for the few extra bytes of padding we
3889 * completely ignore at the start */
3890 req_size = FFALIGN(offs + tmp.linesize[i] * p_h,
3891 p->hprops.minImportedHostPointerAlignment);
3892
3893 ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
3894 import_desc.handleType,
3895 import_desc.pHostPointer,
3896 &p_props);
3897
3898 if (ret == VK_SUCCESS) {
3899 host_mapped[i] = 1;
3900 buf_offsets[i] = offs;
3901 }
3902 }
3903
3904 if (!host_mapped[i])
3905 req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
3906
3907 err = create_buf(dev_ctx, &bufs[i],
3908 from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
3909 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
3910 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
3911 req_size, p_props.memoryTypeBits, host_mapped[i],
3912 host_mapped[i] ? &create_desc : NULL,
3913 host_mapped[i] ? &import_desc : NULL);
3914 if (err)
3915 goto end;
3916 }
3917
3918 if (!from) {
3919 /* Map, copy image TO buffer (which then goes to the VkImage), unmap */
3920 if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
3921 goto end;
3922
3923 for (int i = 0; i < planes; i++) {
3924 if (host_mapped[i])
3925 continue;
3926
3927 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3928
3929 av_image_copy_plane(tmp.data[i], tmp.linesize[i],
3930 (const uint8_t *)swf->data[i], swf->linesize[i],
3931 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
3932 p_h);
3933 }
3934
3935 if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
3936 goto end;
3937 }
3938
3939 /* Copy buffers into/from image */
3940 err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
3941 swf->width, swf->height, swf->format, from);
3942
3943 if (from) {
3944 /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
3945 if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
3946 goto end;
3947
3948 for (int i = 0; i < planes; i++) {
3949 if (host_mapped[i])
3950 continue;
3951
3952 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
3953
3954 av_image_copy_plane_uc_from(swf->data[i], swf->linesize[i],
3955 (const uint8_t *)tmp.data[i], tmp.linesize[i],
3956 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
3957 p_h);
3958 }
3959
3960 if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
3961 goto end;
3962 }
3963
3964 end:
3965 for (int i = 0; i < planes; i++)
3966 av_buffer_unref(&bufs[i]);
3967
3968 return err;
3969 }
3970
vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src)3971 static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
3972 const AVFrame *src)
3973 {
3974 av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
3975
3976 switch (src->format) {
3977 #if CONFIG_CUDA
3978 case AV_PIX_FMT_CUDA:
3979 #ifdef _WIN32
3980 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
3981 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
3982 #else
3983 if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
3984 (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
3985 #endif
3986 return vulkan_transfer_data_from_cuda(hwfc, dst, src);
3987 #endif
3988 default:
3989 if (src->hw_frames_ctx)
3990 return AVERROR(ENOSYS);
3991 else
3992 return vulkan_transfer_data(hwfc, dst, src, 0);
3993 }
3994 }
3995
3996 #if CONFIG_CUDA
vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src)3997 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
3998 const AVFrame *src)
3999 {
4000 int err;
4001 CUcontext dummy;
4002 AVVkFrame *dst_f;
4003 AVVkFrameInternal *dst_int;
4004 VulkanFramesPriv *fp = hwfc->internal->priv;
4005 const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
4006 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
4007
4008 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data;
4009 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
4010 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
4011 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
4012 CudaFunctions *cu = cu_internal->cuda_dl;
4013 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
4014 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
4015
4016 dst_f = (AVVkFrame *)src->data[0];
4017
4018 err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT);
4019 if (err < 0)
4020 return err;
4021
4022 err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
4023 if (err < 0)
4024 return err;
4025
4026 err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src);
4027 if (err < 0) {
4028 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
4029 return err;
4030 }
4031
4032 dst_int = dst_f->internal;
4033
4034 for (int i = 0; i < planes; i++) {
4035 s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0;
4036 s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1;
4037 }
4038
4039 err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
4040 planes, cuda_dev->stream));
4041 if (err < 0)
4042 goto fail;
4043
4044 for (int i = 0; i < planes; i++) {
4045 CUDA_MEMCPY2D cpy = {
4046 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
4047 .dstDevice = (CUdeviceptr)dst->data[i],
4048 .dstPitch = dst->linesize[i],
4049 .dstY = 0,
4050
4051 .srcMemoryType = CU_MEMORYTYPE_ARRAY,
4052 .srcArray = dst_int->cu_array[i],
4053 };
4054
4055 int w, h;
4056 get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i);
4057
4058 cpy.WidthInBytes = w * desc->comp[i].step;
4059 cpy.Height = h;
4060
4061 err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
4062 if (err < 0)
4063 goto fail;
4064 }
4065
4066 err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
4067 planes, cuda_dev->stream));
4068 if (err < 0)
4069 goto fail;
4070
4071 for (int i = 0; i < planes; i++)
4072 dst_f->sem_value[i]++;
4073
4074 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
4075
4076 av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
4077
4078 return prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT);
4079
4080 fail:
4081 CHECK_CU(cu->cuCtxPopCurrent(&dummy));
4082 vulkan_free_internal(dst_f);
4083 dst_f->internal = NULL;
4084 av_buffer_unref(&dst->buf[0]);
4085 return err;
4086 }
4087 #endif
4088
vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src)4089 static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
4090 const AVFrame *src)
4091 {
4092 av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
4093
4094 switch (dst->format) {
4095 #if CONFIG_CUDA
4096 case AV_PIX_FMT_CUDA:
4097 #ifdef _WIN32
4098 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
4099 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
4100 #else
4101 if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
4102 (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
4103 #endif
4104 return vulkan_transfer_data_to_cuda(hwfc, dst, src);
4105 #endif
4106 default:
4107 if (dst->hw_frames_ctx)
4108 return AVERROR(ENOSYS);
4109 else
4110 return vulkan_transfer_data(hwfc, src, dst, 1);
4111 }
4112 }
4113
vulkan_frames_derive_to(AVHWFramesContext *dst_fc, AVHWFramesContext *src_fc, int flags)4114 static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
4115 AVHWFramesContext *src_fc, int flags)
4116 {
4117 return vulkan_frames_init(dst_fc);
4118 }
4119
av_vk_frame_alloc(void)4120 AVVkFrame *av_vk_frame_alloc(void)
4121 {
4122 return av_mallocz(sizeof(AVVkFrame));
4123 }
4124
4125 const HWContextType ff_hwcontext_type_vulkan = {
4126 .type = AV_HWDEVICE_TYPE_VULKAN,
4127 .name = "Vulkan",
4128
4129 .device_hwctx_size = sizeof(AVVulkanDeviceContext),
4130 .device_priv_size = sizeof(VulkanDevicePriv),
4131 .frames_hwctx_size = sizeof(AVVulkanFramesContext),
4132 .frames_priv_size = sizeof(VulkanFramesPriv),
4133
4134 .device_init = &vulkan_device_init,
4135 .device_create = &vulkan_device_create,
4136 .device_derive = &vulkan_device_derive,
4137
4138 .frames_get_constraints = &vulkan_frames_get_constraints,
4139 .frames_init = vulkan_frames_init,
4140 .frames_get_buffer = vulkan_get_buffer,
4141 .frames_uninit = vulkan_frames_uninit,
4142
4143 .transfer_get_formats = vulkan_transfer_get_formats,
4144 .transfer_data_to = vulkan_transfer_data_to,
4145 .transfer_data_from = vulkan_transfer_data_from,
4146
4147 .map_to = vulkan_map_to,
4148 .map_from = vulkan_map_from,
4149 .frames_derive_to = &vulkan_frames_derive_to,
4150
4151 .pix_fmts = (const enum AVPixelFormat []) {
4152 AV_PIX_FMT_VULKAN,
4153 AV_PIX_FMT_NONE
4154 },
4155 };
4156