1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#define VK_NO_PROTOTYPES 20#define VK_ENABLE_BETA_EXTENSIONS 21 22#ifdef _WIN32 23#include <windows.h> /* Included to prevent conflicts with CreateSemaphore */ 24#include <versionhelpers.h> 25#include "compat/w32dlfcn.h" 26#else 27#include <dlfcn.h> 28#endif 29 30#include <unistd.h> 31 32#include "config.h" 33#include "pixdesc.h" 34#include "avstring.h" 35#include "imgutils.h" 36#include "hwcontext.h" 37#include "avassert.h" 38#include "hwcontext_internal.h" 39#include "hwcontext_vulkan.h" 40 41#include "vulkan.h" 42#include "vulkan_loader.h" 43 44#if CONFIG_LIBDRM 45#include <xf86drm.h> 46#include <drm_fourcc.h> 47#include "hwcontext_drm.h" 48#if CONFIG_VAAPI 49#include <va/va_drmcommon.h> 50#include "hwcontext_vaapi.h" 51#endif 52#endif 53 54#if CONFIG_CUDA 55#include "hwcontext_cuda_internal.h" 56#include "cuda_check.h" 57#define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) 58#endif 59 60typedef struct VulkanQueueCtx { 61 VkFence fence; 62 VkQueue queue; 63 int was_synchronous; 64 65 /* Buffer dependencies */ 66 AVBufferRef **buf_deps; 67 int nb_buf_deps; 68 int buf_deps_alloc_size; 69} VulkanQueueCtx; 70 71typedef struct VulkanExecCtx { 72 VkCommandPool pool; 73 VkCommandBuffer *bufs; 74 VulkanQueueCtx *queues; 75 int nb_queues; 76 int cur_queue_idx; 77} VulkanExecCtx; 78 79typedef struct VulkanDevicePriv { 80 /* Vulkan library and loader functions */ 81 void *libvulkan; 82 FFVulkanFunctions vkfn; 83 84 /* Properties */ 85 VkPhysicalDeviceProperties2 props; 86 VkPhysicalDeviceMemoryProperties mprops; 87 VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; 88 89 /* Features */ 90 VkPhysicalDeviceVulkan11Features device_features_1_1; 91 VkPhysicalDeviceVulkan12Features device_features_1_2; 92 93 /* Queues */ 94 uint32_t qfs[5]; 95 int num_qfs; 96 97 /* Debug callback */ 98 VkDebugUtilsMessengerEXT debug_ctx; 99 100 /* Extensions */ 101 FFVulkanExtensions extensions; 102 103 /* Settings */ 104 int use_linear_images; 105 106 /* Option to allocate all image planes in a single allocation */ 107 int contiguous_planes; 108 109 /* Nvidia */ 110 int dev_is_nvidia; 111 112 /* Intel */ 113 int dev_is_intel; 114} VulkanDevicePriv; 115 116typedef struct VulkanFramesPriv { 117 /* Image conversions */ 118 VulkanExecCtx conv_ctx; 119 120 /* Image transfers */ 121 VulkanExecCtx upload_ctx; 122 VulkanExecCtx download_ctx; 123 124 /* Modifier info list to free at uninit */ 125 VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; 126} VulkanFramesPriv; 127 128typedef struct AVVkFrameInternal { 129#if CONFIG_CUDA 130 /* Importing external memory into cuda is really expensive so we keep the 131 * memory imported all the time */ 132 AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */ 133 CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS]; 134 CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS]; 135 CUarray cu_array[AV_NUM_DATA_POINTERS]; 136 CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS]; 137#ifdef _WIN32 138 HANDLE ext_mem_handle[AV_NUM_DATA_POINTERS]; 139 HANDLE ext_sem_handle[AV_NUM_DATA_POINTERS]; 140#endif 141#endif 142} AVVkFrameInternal; 143 144#define ADD_VAL_TO_LIST(list, count, val) \ 145 do { \ 146 list = av_realloc_array(list, sizeof(*list), ++count); \ 147 if (!list) { \ 148 err = AVERROR(ENOMEM); \ 149 goto fail; \ 150 } \ 151 list[count - 1] = av_strdup(val); \ 152 if (!list[count - 1]) { \ 153 err = AVERROR(ENOMEM); \ 154 goto fail; \ 155 } \ 156 } while(0) 157 158#define RELEASE_PROPS(props, count) \ 159 if (props) { \ 160 for (int i = 0; i < count; i++) \ 161 av_free((void *)((props)[i])); \ 162 av_free((void *)props); \ 163 } 164 165static const struct { 166 enum AVPixelFormat pixfmt; 167 const VkFormat vkfmts[4]; 168} vk_pixfmt_map[] = { 169 { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, 170 { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, 171 { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, 172 173 { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, 174 { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, 175 { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, 176 { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, 177 178 { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, 179 180 { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, 181 { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, 182 183 { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, 184 { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 185 { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 186 { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 187 188 { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, 189 { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 190 { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 191 { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 192 193 { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, 194 { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 195 { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 196 { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 197 198 { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, 199 { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 200 /* There is no AV_PIX_FMT_YUVA420P12 */ 201 { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 202 203 { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, 204 { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 205 { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 206 { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 207 208 { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, 209 { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 210 { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 211 { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 212 213 { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } }, 214 { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } }, 215 { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } }, 216 { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } }, 217 { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } }, 218 { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, 219 { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, 220 { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, 221 { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, 222 { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } }, 223 { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } }, 224 225 /* Lower priority as there's an endianess-dependent overlap between these 226 * and rgba/bgr0, and PACK32 formats are more limited */ 227 { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, 228 { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, 229 230 { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, 231 232 { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, 233 { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, 234 { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, 235 { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, 236}; 237 238const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) 239{ 240 for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++) 241 if (vk_pixfmt_map[i].pixfmt == p) 242 return vk_pixfmt_map[i].vkfmts; 243 return NULL; 244} 245 246static const void *vk_find_struct(const void *chain, VkStructureType stype) 247{ 248 const VkBaseInStructure *in = chain; 249 while (in) { 250 if (in->sType == stype) 251 return in; 252 253 in = in->pNext; 254 } 255 256 return NULL; 257} 258 259static void vk_link_struct(void *chain, void *in) 260{ 261 VkBaseOutStructure *out = chain; 262 if (!in) 263 return; 264 265 while (out->pNext) 266 out = out->pNext; 267 268 out->pNext = in; 269} 270 271static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, 272 int linear) 273{ 274 AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; 275 VulkanDevicePriv *priv = dev_ctx->internal->priv; 276 FFVulkanFunctions *vk = &priv->vkfn; 277 const VkFormat *fmt = av_vkfmt_from_pixfmt(p); 278 int planes = av_pix_fmt_count_planes(p); 279 280 if (!fmt) 281 return 0; 282 283 for (int i = 0; i < planes; i++) { 284 VkFormatFeatureFlags flags; 285 VkFormatProperties2 prop = { 286 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, 287 }; 288 vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop); 289 flags = linear ? prop.formatProperties.linearTilingFeatures : 290 prop.formatProperties.optimalTilingFeatures; 291 if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS)) 292 return 0; 293 } 294 295 return 1; 296} 297 298static int load_libvulkan(AVHWDeviceContext *ctx) 299{ 300 AVVulkanDeviceContext *hwctx = ctx->hwctx; 301 VulkanDevicePriv *p = ctx->internal->priv; 302 303 static const char *lib_names[] = { 304#if defined(_WIN32) 305 "vulkan-1.dll", 306#elif defined(__APPLE__) 307 "libvulkan.dylib", 308 "libvulkan.1.dylib", 309 "libMoltenVK.dylib", 310#else 311 "libvulkan.so.1", 312 "libvulkan.so", 313#endif 314 }; 315 316 for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) { 317 p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL); 318 if (p->libvulkan) 319 break; 320 } 321 322 if (!p->libvulkan) { 323 av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n"); 324 return AVERROR_UNKNOWN; 325 } 326 327 hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr"); 328 329 return 0; 330} 331 332typedef struct VulkanOptExtension { 333 const char *name; 334 FFVulkanExtensions flag; 335} VulkanOptExtension; 336 337static const VulkanOptExtension optional_instance_exts[] = { 338 /* For future use */ 339}; 340 341static const VulkanOptExtension optional_device_exts[] = { 342 /* Misc or required by other extensions */ 343 { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, 344 { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, 345 { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, 346 347 /* Imports/exports */ 348 { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, 349 { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY }, 350 { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS }, 351 { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM }, 352 { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY }, 353#ifdef _WIN32 354 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, 355 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, 356#endif 357}; 358 359/* Converts return values to strings */ 360static const char *vk_ret2str(VkResult res) 361{ 362#define CASE(VAL) case VAL: return #VAL 363 switch (res) { 364 CASE(VK_SUCCESS); 365 CASE(VK_NOT_READY); 366 CASE(VK_TIMEOUT); 367 CASE(VK_EVENT_SET); 368 CASE(VK_EVENT_RESET); 369 CASE(VK_INCOMPLETE); 370 CASE(VK_ERROR_OUT_OF_HOST_MEMORY); 371 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); 372 CASE(VK_ERROR_INITIALIZATION_FAILED); 373 CASE(VK_ERROR_DEVICE_LOST); 374 CASE(VK_ERROR_MEMORY_MAP_FAILED); 375 CASE(VK_ERROR_LAYER_NOT_PRESENT); 376 CASE(VK_ERROR_EXTENSION_NOT_PRESENT); 377 CASE(VK_ERROR_FEATURE_NOT_PRESENT); 378 CASE(VK_ERROR_INCOMPATIBLE_DRIVER); 379 CASE(VK_ERROR_TOO_MANY_OBJECTS); 380 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); 381 CASE(VK_ERROR_FRAGMENTED_POOL); 382 CASE(VK_ERROR_SURFACE_LOST_KHR); 383 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); 384 CASE(VK_SUBOPTIMAL_KHR); 385 CASE(VK_ERROR_OUT_OF_DATE_KHR); 386 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); 387 CASE(VK_ERROR_VALIDATION_FAILED_EXT); 388 CASE(VK_ERROR_INVALID_SHADER_NV); 389 CASE(VK_ERROR_OUT_OF_POOL_MEMORY); 390 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); 391 CASE(VK_ERROR_NOT_PERMITTED_EXT); 392 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); 393 CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT); 394 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); 395 default: return "Unknown error"; 396 } 397#undef CASE 398} 399 400static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, 401 VkDebugUtilsMessageTypeFlagsEXT messageType, 402 const VkDebugUtilsMessengerCallbackDataEXT *data, 403 void *priv) 404{ 405 int l; 406 AVHWDeviceContext *ctx = priv; 407 408 switch (severity) { 409 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break; 410 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break; 411 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break; 412 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break; 413 default: l = AV_LOG_DEBUG; break; 414 } 415 416 av_log(ctx, l, "%s\n", data->pMessage); 417 for (int i = 0; i < data->cmdBufLabelCount; i++) 418 av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName); 419 420 return 0; 421} 422 423static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, 424 const char * const **dst, uint32_t *num, int debug) 425{ 426 const char *tstr; 427 const char **extension_names = NULL; 428 VulkanDevicePriv *p = ctx->internal->priv; 429 FFVulkanFunctions *vk = &p->vkfn; 430 AVVulkanDeviceContext *hwctx = ctx->hwctx; 431 int err = 0, found, extensions_found = 0; 432 433 const char *mod; 434 int optional_exts_num; 435 uint32_t sup_ext_count; 436 char *user_exts_str = NULL; 437 AVDictionaryEntry *user_exts; 438 VkExtensionProperties *sup_ext; 439 const VulkanOptExtension *optional_exts; 440 441 if (!dev) { 442 mod = "instance"; 443 optional_exts = optional_instance_exts; 444 optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts); 445 user_exts = av_dict_get(opts, "instance_extensions", NULL, 0); 446 if (user_exts) { 447 user_exts_str = av_strdup(user_exts->value); 448 if (!user_exts_str) { 449 err = AVERROR(ENOMEM); 450 goto fail; 451 } 452 } 453 vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL); 454 sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); 455 if (!sup_ext) 456 return AVERROR(ENOMEM); 457 vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext); 458 } else { 459 mod = "device"; 460 optional_exts = optional_device_exts; 461 optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts); 462 user_exts = av_dict_get(opts, "device_extensions", NULL, 0); 463 if (user_exts) { 464 user_exts_str = av_strdup(user_exts->value); 465 if (!user_exts_str) { 466 err = AVERROR(ENOMEM); 467 goto fail; 468 } 469 } 470 vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, 471 &sup_ext_count, NULL); 472 sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); 473 if (!sup_ext) 474 return AVERROR(ENOMEM); 475 vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, 476 &sup_ext_count, sup_ext); 477 } 478 479 for (int i = 0; i < optional_exts_num; i++) { 480 tstr = optional_exts[i].name; 481 found = 0; 482 for (int j = 0; j < sup_ext_count; j++) { 483 if (!strcmp(tstr, sup_ext[j].extensionName)) { 484 found = 1; 485 break; 486 } 487 } 488 if (!found) 489 continue; 490 491 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); 492 p->extensions |= optional_exts[i].flag; 493 ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); 494 } 495 496 if (debug && !dev) { 497 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; 498 found = 0; 499 for (int j = 0; j < sup_ext_count; j++) { 500 if (!strcmp(tstr, sup_ext[j].extensionName)) { 501 found = 1; 502 break; 503 } 504 } 505 if (found) { 506 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); 507 ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); 508 p->extensions |= FF_VK_EXT_DEBUG_UTILS; 509 } else { 510 av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n", 511 tstr); 512 err = AVERROR(EINVAL); 513 goto fail; 514 } 515 } 516 517 if (user_exts_str) { 518 char *save, *token = av_strtok(user_exts_str, "+", &save); 519 while (token) { 520 found = 0; 521 for (int j = 0; j < sup_ext_count; j++) { 522 if (!strcmp(token, sup_ext[j].extensionName)) { 523 found = 1; 524 break; 525 } 526 } 527 if (found) { 528 av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token); 529 ADD_VAL_TO_LIST(extension_names, extensions_found, token); 530 } else { 531 av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n", 532 mod, token); 533 } 534 token = av_strtok(NULL, "+", &save); 535 } 536 } 537 538 *dst = extension_names; 539 *num = extensions_found; 540 541 av_free(user_exts_str); 542 av_free(sup_ext); 543 return 0; 544 545fail: 546 RELEASE_PROPS(extension_names, extensions_found); 547 av_free(user_exts_str); 548 av_free(sup_ext); 549 return err; 550} 551 552static int check_validation_layers(AVHWDeviceContext *ctx, AVDictionary *opts, 553 const char * const **dst, uint32_t *num, 554 int *debug_mode) 555{ 556 static const char default_layer[] = { "VK_LAYER_KHRONOS_validation" }; 557 558 int found = 0, err = 0; 559 VulkanDevicePriv *priv = ctx->internal->priv; 560 FFVulkanFunctions *vk = &priv->vkfn; 561 562 uint32_t sup_layer_count; 563 VkLayerProperties *sup_layers; 564 565 AVDictionaryEntry *user_layers; 566 char *user_layers_str = NULL; 567 char *save, *token; 568 569 const char **enabled_layers = NULL; 570 uint32_t enabled_layers_count = 0; 571 572 AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0); 573 int debug = debug_opt && strtol(debug_opt->value, NULL, 10); 574 575 /* If `debug=0`, enable no layers at all. */ 576 if (debug_opt && !debug) 577 return 0; 578 579 vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL); 580 sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties)); 581 if (!sup_layers) 582 return AVERROR(ENOMEM); 583 vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers); 584 585 av_log(ctx, AV_LOG_VERBOSE, "Supported validation layers:\n"); 586 for (int i = 0; i < sup_layer_count; i++) 587 av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName); 588 589 /* If `debug=1` is specified, enable the standard validation layer extension */ 590 if (debug) { 591 *debug_mode = debug; 592 for (int i = 0; i < sup_layer_count; i++) { 593 if (!strcmp(default_layer, sup_layers[i].layerName)) { 594 found = 1; 595 av_log(ctx, AV_LOG_VERBOSE, "Default validation layer %s is enabled\n", 596 default_layer); 597 ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, default_layer); 598 break; 599 } 600 } 601 } 602 603 user_layers = av_dict_get(opts, "validation_layers", NULL, 0); 604 if (!user_layers) 605 goto end; 606 607 user_layers_str = av_strdup(user_layers->value); 608 if (!user_layers_str) { 609 err = AVERROR(ENOMEM); 610 goto fail; 611 } 612 613 token = av_strtok(user_layers_str, "+", &save); 614 while (token) { 615 found = 0; 616 if (!strcmp(default_layer, token)) { 617 if (debug) { 618 /* if the `debug=1`, default_layer is enabled, skip here */ 619 token = av_strtok(NULL, "+", &save); 620 continue; 621 } else { 622 /* if the `debug=0`, enable debug mode to load its callback properly */ 623 *debug_mode = debug; 624 } 625 } 626 for (int j = 0; j < sup_layer_count; j++) { 627 if (!strcmp(token, sup_layers[j].layerName)) { 628 found = 1; 629 break; 630 } 631 } 632 if (found) { 633 av_log(ctx, AV_LOG_VERBOSE, "Requested Validation Layer: %s\n", token); 634 ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token); 635 } else { 636 av_log(ctx, AV_LOG_ERROR, 637 "Validation Layer \"%s\" not support.\n", token); 638 err = AVERROR(EINVAL); 639 goto fail; 640 } 641 token = av_strtok(NULL, "+", &save); 642 } 643 644 av_free(user_layers_str); 645 646end: 647 av_free(sup_layers); 648 649 *dst = enabled_layers; 650 *num = enabled_layers_count; 651 652 return 0; 653 654fail: 655 RELEASE_PROPS(enabled_layers, enabled_layers_count); 656 av_free(sup_layers); 657 av_free(user_layers_str); 658 return err; 659} 660 661/* Creates a VkInstance */ 662static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) 663{ 664 int err = 0, debug_mode = 0; 665 VkResult ret; 666 VulkanDevicePriv *p = ctx->internal->priv; 667 FFVulkanFunctions *vk = &p->vkfn; 668 AVVulkanDeviceContext *hwctx = ctx->hwctx; 669 VkApplicationInfo application_info = { 670 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, 671 .pEngineName = "libavutil", 672 .apiVersion = VK_API_VERSION_1_2, 673 .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, 674 LIBAVUTIL_VERSION_MINOR, 675 LIBAVUTIL_VERSION_MICRO), 676 }; 677 VkInstanceCreateInfo inst_props = { 678 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, 679 .pApplicationInfo = &application_info, 680 }; 681 682 if (!hwctx->get_proc_addr) { 683 err = load_libvulkan(ctx); 684 if (err < 0) 685 return err; 686 } 687 688 err = ff_vk_load_functions(ctx, vk, p->extensions, 0, 0); 689 if (err < 0) { 690 av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n"); 691 return err; 692 } 693 694 err = check_validation_layers(ctx, opts, &inst_props.ppEnabledLayerNames, 695 &inst_props.enabledLayerCount, &debug_mode); 696 if (err) 697 goto fail; 698 699 /* Check for present/missing extensions */ 700 err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames, 701 &inst_props.enabledExtensionCount, debug_mode); 702 hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames; 703 hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount; 704 if (err < 0) 705 goto fail; 706 707 /* Try to create the instance */ 708 ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); 709 710 /* Check for errors */ 711 if (ret != VK_SUCCESS) { 712 av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n", 713 vk_ret2str(ret)); 714 err = AVERROR_EXTERNAL; 715 goto fail; 716 } 717 718 err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 0); 719 if (err < 0) { 720 av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n"); 721 goto fail; 722 } 723 724 if (debug_mode) { 725 VkDebugUtilsMessengerCreateInfoEXT dbg = { 726 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, 727 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | 728 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | 729 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | 730 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, 731 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | 732 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | 733 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, 734 .pfnUserCallback = vk_dbg_callback, 735 .pUserData = ctx, 736 }; 737 738 vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg, 739 hwctx->alloc, &p->debug_ctx); 740 } 741 742 err = 0; 743 744fail: 745 RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount); 746 return err; 747} 748 749typedef struct VulkanDeviceSelection { 750 uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */ 751 int has_uuid; 752 const char *name; /* Will use this second unless NULL */ 753 uint32_t pci_device; /* Will use this third unless 0x0 */ 754 uint32_t vendor_id; /* Last resort to find something deterministic */ 755 int index; /* Finally fall back to index */ 756} VulkanDeviceSelection; 757 758static const char *vk_dev_type(enum VkPhysicalDeviceType type) 759{ 760 switch (type) { 761 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated"; 762 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete"; 763 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual"; 764 case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software"; 765 default: return "unknown"; 766 } 767} 768 769/* Finds a device */ 770static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) 771{ 772 int err = 0, choice = -1; 773 uint32_t num; 774 VkResult ret; 775 VulkanDevicePriv *p = ctx->internal->priv; 776 FFVulkanFunctions *vk = &p->vkfn; 777 VkPhysicalDevice *devices = NULL; 778 VkPhysicalDeviceIDProperties *idp = NULL; 779 VkPhysicalDeviceProperties2 *prop = NULL; 780 AVVulkanDeviceContext *hwctx = ctx->hwctx; 781 782 ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL); 783 if (ret != VK_SUCCESS || !num) { 784 av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret)); 785 return AVERROR(ENODEV); 786 } 787 788 devices = av_malloc_array(num, sizeof(VkPhysicalDevice)); 789 if (!devices) 790 return AVERROR(ENOMEM); 791 792 ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices); 793 if (ret != VK_SUCCESS) { 794 av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n", 795 vk_ret2str(ret)); 796 err = AVERROR(ENODEV); 797 goto end; 798 } 799 800 prop = av_calloc(num, sizeof(*prop)); 801 if (!prop) { 802 err = AVERROR(ENOMEM); 803 goto end; 804 } 805 806 idp = av_calloc(num, sizeof(*idp)); 807 if (!idp) { 808 err = AVERROR(ENOMEM); 809 goto end; 810 } 811 812 av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n"); 813 for (int i = 0; i < num; i++) { 814 idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; 815 prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; 816 prop[i].pNext = &idp[i]; 817 818 vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]); 819 av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, 820 prop[i].properties.deviceName, 821 vk_dev_type(prop[i].properties.deviceType), 822 prop[i].properties.deviceID); 823 } 824 825 if (select->has_uuid) { 826 for (int i = 0; i < num; i++) { 827 if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) { 828 choice = i; 829 goto end; 830 } 831 } 832 av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); 833 err = AVERROR(ENODEV); 834 goto end; 835 } else if (select->name) { 836 av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name); 837 for (int i = 0; i < num; i++) { 838 if (strstr(prop[i].properties.deviceName, select->name)) { 839 choice = i; 840 goto end; 841 } 842 } 843 av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n", 844 select->name); 845 err = AVERROR(ENODEV); 846 goto end; 847 } else if (select->pci_device) { 848 av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device); 849 for (int i = 0; i < num; i++) { 850 if (select->pci_device == prop[i].properties.deviceID) { 851 choice = i; 852 goto end; 853 } 854 } 855 av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n", 856 select->pci_device); 857 err = AVERROR(EINVAL); 858 goto end; 859 } else if (select->vendor_id) { 860 av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id); 861 for (int i = 0; i < num; i++) { 862 if (select->vendor_id == prop[i].properties.vendorID) { 863 choice = i; 864 goto end; 865 } 866 } 867 av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n", 868 select->vendor_id); 869 err = AVERROR(ENODEV); 870 goto end; 871 } else { 872 if (select->index < num) { 873 choice = select->index; 874 goto end; 875 } 876 av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n", 877 select->index); 878 err = AVERROR(ENODEV); 879 goto end; 880 } 881 882end: 883 if (choice > -1) { 884 av_log(ctx, AV_LOG_VERBOSE, "Device %d selected: %s (%s) (0x%x)\n", 885 choice, prop[choice].properties.deviceName, 886 vk_dev_type(prop[choice].properties.deviceType), 887 prop[choice].properties.deviceID); 888 hwctx->phys_dev = devices[choice]; 889 } 890 891 av_free(devices); 892 av_free(prop); 893 av_free(idp); 894 895 return err; 896} 897 898/* Picks the least used qf with the fewest unneeded flags, or -1 if none found */ 899static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf, 900 VkQueueFlagBits flags) 901{ 902 int index = -1; 903 uint32_t min_score = UINT32_MAX; 904 905 for (int i = 0; i < num_qf; i++) { 906 const VkQueueFlagBits qflags = qf[i].queueFlags; 907 if (qflags & flags) { 908 uint32_t score = av_popcount(qflags) + qf[i].timestampValidBits; 909 if (score < min_score) { 910 index = i; 911 min_score = score; 912 } 913 } 914 } 915 916 if (index > -1) 917 qf[index].timestampValidBits++; 918 919 return index; 920} 921 922static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) 923{ 924 uint32_t num; 925 float *weights; 926 VkQueueFamilyProperties *qf = NULL; 927 VulkanDevicePriv *p = ctx->internal->priv; 928 FFVulkanFunctions *vk = &p->vkfn; 929 AVVulkanDeviceContext *hwctx = ctx->hwctx; 930 int graph_index, comp_index, tx_index, enc_index, dec_index; 931 932 /* First get the number of queue families */ 933 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); 934 if (!num) { 935 av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); 936 return AVERROR_EXTERNAL; 937 } 938 939 /* Then allocate memory */ 940 qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties)); 941 if (!qf) 942 return AVERROR(ENOMEM); 943 944 /* Finally retrieve the queue families */ 945 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qf); 946 947 av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n"); 948 for (int i = 0; i < num; i++) { 949 av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s (queues: %i)\n", i, 950 ((qf[i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "", 951 ((qf[i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "", 952 ((qf[i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "", 953 ((qf[i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "", 954 ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "", 955 ((qf[i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "", 956 ((qf[i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "", 957 qf[i].queueCount); 958 959 /* We use this field to keep a score of how many times we've used that 960 * queue family in order to make better choices. */ 961 qf[i].timestampValidBits = 0; 962 } 963 964 /* Pick each queue family to use */ 965 graph_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT); 966 comp_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT); 967 tx_index = pick_queue_family(qf, num, VK_QUEUE_TRANSFER_BIT); 968 enc_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); 969 dec_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_DECODE_BIT_KHR); 970 971 /* Signalling the transfer capabilities on a queue family is optional */ 972 if (tx_index < 0) { 973 tx_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT); 974 if (tx_index < 0) 975 tx_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT); 976 } 977 978 hwctx->queue_family_index = -1; 979 hwctx->queue_family_comp_index = -1; 980 hwctx->queue_family_tx_index = -1; 981 hwctx->queue_family_encode_index = -1; 982 hwctx->queue_family_decode_index = -1; 983 984#define SETUP_QUEUE(qf_idx) \ 985 if (qf_idx > -1) { \ 986 int fidx = qf_idx; \ 987 int qc = qf[fidx].queueCount; \ 988 VkDeviceQueueCreateInfo *pc; \ 989 \ 990 if (fidx == graph_index) { \ 991 hwctx->queue_family_index = fidx; \ 992 hwctx->nb_graphics_queues = qc; \ 993 graph_index = -1; \ 994 } \ 995 if (fidx == comp_index) { \ 996 hwctx->queue_family_comp_index = fidx; \ 997 hwctx->nb_comp_queues = qc; \ 998 comp_index = -1; \ 999 } \ 1000 if (fidx == tx_index) { \ 1001 hwctx->queue_family_tx_index = fidx; \ 1002 hwctx->nb_tx_queues = qc; \ 1003 tx_index = -1; \ 1004 } \ 1005 if (fidx == enc_index) { \ 1006 hwctx->queue_family_encode_index = fidx; \ 1007 hwctx->nb_encode_queues = qc; \ 1008 enc_index = -1; \ 1009 } \ 1010 if (fidx == dec_index) { \ 1011 hwctx->queue_family_decode_index = fidx; \ 1012 hwctx->nb_decode_queues = qc; \ 1013 dec_index = -1; \ 1014 } \ 1015 \ 1016 pc = av_realloc((void *)cd->pQueueCreateInfos, \ 1017 sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \ 1018 if (!pc) { \ 1019 av_free(qf); \ 1020 return AVERROR(ENOMEM); \ 1021 } \ 1022 cd->pQueueCreateInfos = pc; \ 1023 pc = &pc[cd->queueCreateInfoCount]; \ 1024 \ 1025 weights = av_malloc(qc * sizeof(float)); \ 1026 if (!weights) { \ 1027 av_free(qf); \ 1028 return AVERROR(ENOMEM); \ 1029 } \ 1030 \ 1031 memset(pc, 0, sizeof(*pc)); \ 1032 pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \ 1033 pc->queueFamilyIndex = fidx; \ 1034 pc->queueCount = qc; \ 1035 pc->pQueuePriorities = weights; \ 1036 \ 1037 for (int i = 0; i < qc; i++) \ 1038 weights[i] = 1.0f / qc; \ 1039 \ 1040 cd->queueCreateInfoCount++; \ 1041 } 1042 1043 SETUP_QUEUE(graph_index) 1044 SETUP_QUEUE(comp_index) 1045 SETUP_QUEUE(tx_index) 1046 SETUP_QUEUE(enc_index) 1047 SETUP_QUEUE(dec_index) 1048 1049#undef SETUP_QUEUE 1050 1051 av_free(qf); 1052 1053 return 0; 1054} 1055 1056static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, 1057 int queue_family_index, int num_queues) 1058{ 1059 VkResult ret; 1060 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 1061 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 1062 FFVulkanFunctions *vk = &p->vkfn; 1063 1064 VkCommandPoolCreateInfo cqueue_create = { 1065 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, 1066 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 1067 .queueFamilyIndex = queue_family_index, 1068 }; 1069 VkCommandBufferAllocateInfo cbuf_create = { 1070 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 1071 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1072 .commandBufferCount = num_queues, 1073 }; 1074 1075 cmd->nb_queues = num_queues; 1076 1077 /* Create command pool */ 1078 ret = vk->CreateCommandPool(hwctx->act_dev, &cqueue_create, 1079 hwctx->alloc, &cmd->pool); 1080 if (ret != VK_SUCCESS) { 1081 av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n", 1082 vk_ret2str(ret)); 1083 return AVERROR_EXTERNAL; 1084 } 1085 1086 cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs)); 1087 if (!cmd->bufs) 1088 return AVERROR(ENOMEM); 1089 1090 cbuf_create.commandPool = cmd->pool; 1091 1092 /* Allocate command buffer */ 1093 ret = vk->AllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs); 1094 if (ret != VK_SUCCESS) { 1095 av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", 1096 vk_ret2str(ret)); 1097 av_freep(&cmd->bufs); 1098 return AVERROR_EXTERNAL; 1099 } 1100 1101 cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues)); 1102 if (!cmd->queues) 1103 return AVERROR(ENOMEM); 1104 1105 for (int i = 0; i < num_queues; i++) { 1106 VulkanQueueCtx *q = &cmd->queues[i]; 1107 vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue); 1108 q->was_synchronous = 1; 1109 } 1110 1111 return 0; 1112} 1113 1114static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) 1115{ 1116 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 1117 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 1118 FFVulkanFunctions *vk = &p->vkfn; 1119 1120 if (cmd->queues) { 1121 for (int i = 0; i < cmd->nb_queues; i++) { 1122 VulkanQueueCtx *q = &cmd->queues[i]; 1123 1124 /* Make sure all queues have finished executing */ 1125 if (q->fence && !q->was_synchronous) { 1126 vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); 1127 vk->ResetFences(hwctx->act_dev, 1, &q->fence); 1128 } 1129 1130 /* Free the fence */ 1131 if (q->fence) 1132 vk->DestroyFence(hwctx->act_dev, q->fence, hwctx->alloc); 1133 1134 /* Free buffer dependencies */ 1135 for (int j = 0; j < q->nb_buf_deps; j++) 1136 av_buffer_unref(&q->buf_deps[j]); 1137 av_free(q->buf_deps); 1138 } 1139 } 1140 1141 if (cmd->bufs) 1142 vk->FreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs); 1143 if (cmd->pool) 1144 vk->DestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc); 1145 1146 av_freep(&cmd->queues); 1147 av_freep(&cmd->bufs); 1148 cmd->pool = VK_NULL_HANDLE; 1149} 1150 1151static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) 1152{ 1153 return cmd->bufs[cmd->cur_queue_idx]; 1154} 1155 1156static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) 1157{ 1158 VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; 1159 1160 for (int j = 0; j < q->nb_buf_deps; j++) 1161 av_buffer_unref(&q->buf_deps[j]); 1162 q->nb_buf_deps = 0; 1163} 1164 1165static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd) 1166{ 1167 VkResult ret; 1168 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 1169 VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; 1170 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 1171 FFVulkanFunctions *vk = &p->vkfn; 1172 1173 VkCommandBufferBeginInfo cmd_start = { 1174 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 1175 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 1176 }; 1177 1178 /* Create the fence and don't wait for it initially */ 1179 if (!q->fence) { 1180 VkFenceCreateInfo fence_spawn = { 1181 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, 1182 }; 1183 ret = vk->CreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc, 1184 &q->fence); 1185 if (ret != VK_SUCCESS) { 1186 av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", 1187 vk_ret2str(ret)); 1188 return AVERROR_EXTERNAL; 1189 } 1190 } else if (!q->was_synchronous) { 1191 vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); 1192 vk->ResetFences(hwctx->act_dev, 1, &q->fence); 1193 } 1194 1195 /* Discard queue dependencies */ 1196 unref_exec_ctx_deps(hwfc, cmd); 1197 1198 ret = vk->BeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start); 1199 if (ret != VK_SUCCESS) { 1200 av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n", 1201 vk_ret2str(ret)); 1202 return AVERROR_EXTERNAL; 1203 } 1204 1205 return 0; 1206} 1207 1208static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, 1209 AVBufferRef * const *deps, int nb_deps) 1210{ 1211 AVBufferRef **dst; 1212 VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; 1213 1214 if (!deps || !nb_deps) 1215 return 0; 1216 1217 dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, 1218 (q->nb_buf_deps + nb_deps) * sizeof(*dst)); 1219 if (!dst) 1220 goto err; 1221 1222 q->buf_deps = dst; 1223 1224 for (int i = 0; i < nb_deps; i++) { 1225 q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]); 1226 if (!q->buf_deps[q->nb_buf_deps]) 1227 goto err; 1228 q->nb_buf_deps++; 1229 } 1230 1231 return 0; 1232 1233err: 1234 unref_exec_ctx_deps(hwfc, cmd); 1235 return AVERROR(ENOMEM); 1236} 1237 1238static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, 1239 VkSubmitInfo *s_info, AVVkFrame *f, int synchronous) 1240{ 1241 VkResult ret; 1242 VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; 1243 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 1244 FFVulkanFunctions *vk = &p->vkfn; 1245 1246 ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]); 1247 if (ret != VK_SUCCESS) { 1248 av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", 1249 vk_ret2str(ret)); 1250 unref_exec_ctx_deps(hwfc, cmd); 1251 return AVERROR_EXTERNAL; 1252 } 1253 1254 s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx]; 1255 s_info->commandBufferCount = 1; 1256 1257 ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence); 1258 if (ret != VK_SUCCESS) { 1259 av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n", 1260 vk_ret2str(ret)); 1261 unref_exec_ctx_deps(hwfc, cmd); 1262 return AVERROR_EXTERNAL; 1263 } 1264 1265 if (f) 1266 for (int i = 0; i < s_info->signalSemaphoreCount; i++) 1267 f->sem_value[i]++; 1268 1269 q->was_synchronous = synchronous; 1270 1271 if (synchronous) { 1272 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 1273 vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); 1274 vk->ResetFences(hwctx->act_dev, 1, &q->fence); 1275 unref_exec_ctx_deps(hwfc, cmd); 1276 } else { /* Rotate queues */ 1277 cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues; 1278 } 1279 1280 return 0; 1281} 1282 1283static void vulkan_device_free(AVHWDeviceContext *ctx) 1284{ 1285 VulkanDevicePriv *p = ctx->internal->priv; 1286 FFVulkanFunctions *vk = &p->vkfn; 1287 AVVulkanDeviceContext *hwctx = ctx->hwctx; 1288 1289 if (hwctx->act_dev) 1290 vk->DestroyDevice(hwctx->act_dev, hwctx->alloc); 1291 1292 if (p->debug_ctx) 1293 vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx, 1294 hwctx->alloc); 1295 1296 if (hwctx->inst) 1297 vk->DestroyInstance(hwctx->inst, hwctx->alloc); 1298 1299 if (p->libvulkan) 1300 dlclose(p->libvulkan); 1301 1302 RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions); 1303 RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); 1304} 1305 1306static int vulkan_device_create_internal(AVHWDeviceContext *ctx, 1307 VulkanDeviceSelection *dev_select, 1308 AVDictionary *opts, int flags) 1309{ 1310 int err = 0; 1311 VkResult ret; 1312 AVDictionaryEntry *opt_d; 1313 VulkanDevicePriv *p = ctx->internal->priv; 1314 FFVulkanFunctions *vk = &p->vkfn; 1315 AVVulkanDeviceContext *hwctx = ctx->hwctx; 1316 1317 /* 1318 * VkPhysicalDeviceVulkan12Features has a timelineSemaphore field, but 1319 * MoltenVK doesn't implement VkPhysicalDeviceVulkan12Features yet, so we 1320 * use VkPhysicalDeviceTimelineSemaphoreFeatures directly. 1321 */ 1322 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = { 1323 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, 1324 }; 1325 VkPhysicalDeviceVulkan12Features dev_features_1_2 = { 1326 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, 1327 .pNext = &timeline_features, 1328 }; 1329 VkPhysicalDeviceVulkan11Features dev_features_1_1 = { 1330 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, 1331 .pNext = &dev_features_1_2, 1332 }; 1333 VkPhysicalDeviceFeatures2 dev_features = { 1334 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, 1335 .pNext = &dev_features_1_1, 1336 }; 1337 1338 VkDeviceCreateInfo dev_info = { 1339 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, 1340 .pNext = &hwctx->device_features, 1341 }; 1342 1343 hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; 1344 hwctx->device_features.pNext = &p->device_features_1_1; 1345 p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; 1346 p->device_features_1_1.pNext = &p->device_features_1_2; 1347 p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; 1348 ctx->free = vulkan_device_free; 1349 1350 /* Create an instance if not given one */ 1351 if ((err = create_instance(ctx, opts))) 1352 goto end; 1353 1354 /* Find a device (if not given one) */ 1355 if ((err = find_device(ctx, dev_select))) 1356 goto end; 1357 1358 vk->GetPhysicalDeviceFeatures2(hwctx->phys_dev, &dev_features); 1359 1360 /* Try to keep in sync with libplacebo */ 1361#define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.features.NAME; 1362 COPY_FEATURE(hwctx->device_features, shaderImageGatherExtended) 1363 COPY_FEATURE(hwctx->device_features, shaderStorageImageReadWithoutFormat) 1364 COPY_FEATURE(hwctx->device_features, shaderStorageImageWriteWithoutFormat) 1365 COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics) 1366 COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics) 1367 COPY_FEATURE(hwctx->device_features, shaderInt64) 1368#undef COPY_FEATURE 1369 1370 /* We require timeline semaphores */ 1371 if (!timeline_features.timelineSemaphore) { 1372 av_log(ctx, AV_LOG_ERROR, "Device does not support timeline semaphores!\n"); 1373 err = AVERROR(ENOSYS); 1374 goto end; 1375 } 1376 p->device_features_1_2.timelineSemaphore = 1; 1377 1378 /* Setup queue family */ 1379 if ((err = setup_queue_families(ctx, &dev_info))) 1380 goto end; 1381 1382 if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames, 1383 &dev_info.enabledExtensionCount, 0))) { 1384 for (int i = 0; i < dev_info.queueCreateInfoCount; i++) 1385 av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities); 1386 av_free((void *)dev_info.pQueueCreateInfos); 1387 goto end; 1388 } 1389 1390 ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc, 1391 &hwctx->act_dev); 1392 1393 for (int i = 0; i < dev_info.queueCreateInfoCount; i++) 1394 av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities); 1395 av_free((void *)dev_info.pQueueCreateInfos); 1396 1397 if (ret != VK_SUCCESS) { 1398 av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n", 1399 vk_ret2str(ret)); 1400 for (int i = 0; i < dev_info.enabledExtensionCount; i++) 1401 av_free((void *)dev_info.ppEnabledExtensionNames[i]); 1402 av_free((void *)dev_info.ppEnabledExtensionNames); 1403 err = AVERROR_EXTERNAL; 1404 goto end; 1405 } 1406 1407 /* Tiled images setting, use them by default */ 1408 opt_d = av_dict_get(opts, "linear_images", NULL, 0); 1409 if (opt_d) 1410 p->use_linear_images = strtol(opt_d->value, NULL, 10); 1411 1412 opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0); 1413 if (opt_d) 1414 p->contiguous_planes = strtol(opt_d->value, NULL, 10); 1415 else 1416 p->contiguous_planes = -1; 1417 1418 hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames; 1419 hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount; 1420 1421end: 1422 return err; 1423} 1424 1425static int vulkan_device_init(AVHWDeviceContext *ctx) 1426{ 1427 int err; 1428 uint32_t queue_num; 1429 AVVulkanDeviceContext *hwctx = ctx->hwctx; 1430 VulkanDevicePriv *p = ctx->internal->priv; 1431 FFVulkanFunctions *vk = &p->vkfn; 1432 int graph_index, comp_index, tx_index, enc_index, dec_index; 1433 1434 /* Set device extension flags */ 1435 for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) { 1436 for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) { 1437 if (!strcmp(hwctx->enabled_dev_extensions[i], 1438 optional_device_exts[j].name)) { 1439 p->extensions |= optional_device_exts[j].flag; 1440 break; 1441 } 1442 } 1443 } 1444 1445 err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 1); 1446 if (err < 0) { 1447 av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n"); 1448 return err; 1449 } 1450 1451 p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; 1452 p->props.pNext = &p->hprops; 1453 p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT; 1454 1455 vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props); 1456 av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", 1457 p->props.properties.deviceName); 1458 av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); 1459 av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n", 1460 p->props.properties.limits.optimalBufferCopyRowPitchAlignment); 1461 av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n", 1462 p->props.properties.limits.minMemoryMapAlignment); 1463 if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) 1464 av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n", 1465 p->hprops.minImportedHostPointerAlignment); 1466 1467 p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de); 1468 p->dev_is_intel = (p->props.properties.vendorID == 0x8086); 1469 1470 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); 1471 if (!queue_num) { 1472 av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); 1473 return AVERROR_EXTERNAL; 1474 } 1475 1476 graph_index = hwctx->queue_family_index; 1477 comp_index = hwctx->queue_family_comp_index; 1478 tx_index = hwctx->queue_family_tx_index; 1479 enc_index = hwctx->queue_family_encode_index; 1480 dec_index = hwctx->queue_family_decode_index; 1481 1482#define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \ 1483 do { \ 1484 if (ctx_qf < 0 && required) { \ 1485 av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \ 1486 " in the context!\n", type); \ 1487 return AVERROR(EINVAL); \ 1488 } else if (fidx < 0 || ctx_qf < 0) { \ 1489 break; \ 1490 } else if (ctx_qf >= queue_num) { \ 1491 av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \ 1492 type, ctx_qf, queue_num); \ 1493 return AVERROR(EINVAL); \ 1494 } \ 1495 \ 1496 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \ 1497 " for%s%s%s%s%s\n", \ 1498 ctx_qf, qc, \ 1499 ctx_qf == graph_index ? " graphics" : "", \ 1500 ctx_qf == comp_index ? " compute" : "", \ 1501 ctx_qf == tx_index ? " transfers" : "", \ 1502 ctx_qf == enc_index ? " encode" : "", \ 1503 ctx_qf == dec_index ? " decode" : ""); \ 1504 graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \ 1505 comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \ 1506 tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \ 1507 enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \ 1508 dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \ 1509 p->qfs[p->num_qfs++] = ctx_qf; \ 1510 } while (0) 1511 1512 CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues); 1513 CHECK_QUEUE("upload", 1, tx_index, hwctx->queue_family_tx_index, hwctx->nb_tx_queues); 1514 CHECK_QUEUE("compute", 1, comp_index, hwctx->queue_family_comp_index, hwctx->nb_comp_queues); 1515 CHECK_QUEUE("encode", 0, enc_index, hwctx->queue_family_encode_index, hwctx->nb_encode_queues); 1516 CHECK_QUEUE("decode", 0, dec_index, hwctx->queue_family_decode_index, hwctx->nb_decode_queues); 1517 1518#undef CHECK_QUEUE 1519 1520 /* Get device capabilities */ 1521 vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); 1522 1523 return 0; 1524} 1525 1526static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device, 1527 AVDictionary *opts, int flags) 1528{ 1529 VulkanDeviceSelection dev_select = { 0 }; 1530 if (device && device[0]) { 1531 char *end = NULL; 1532 dev_select.index = strtol(device, &end, 10); 1533 if (end == device) { 1534 dev_select.index = 0; 1535 dev_select.name = device; 1536 } 1537 } 1538 1539 return vulkan_device_create_internal(ctx, &dev_select, opts, flags); 1540} 1541 1542static int vulkan_device_derive(AVHWDeviceContext *ctx, 1543 AVHWDeviceContext *src_ctx, 1544 AVDictionary *opts, int flags) 1545{ 1546 av_unused VulkanDeviceSelection dev_select = { 0 }; 1547 1548 /* If there's only one device on the system, then even if its not covered 1549 * by the following checks (e.g. non-PCIe ARM GPU), having an empty 1550 * dev_select will mean it'll get picked. */ 1551 switch(src_ctx->type) { 1552#if CONFIG_LIBDRM 1553#if CONFIG_VAAPI 1554 case AV_HWDEVICE_TYPE_VAAPI: { 1555 AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; 1556 1557 const char *vendor = vaQueryVendorString(src_hwctx->display); 1558 if (!vendor) { 1559 av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n"); 1560 return AVERROR_EXTERNAL; 1561 } 1562 1563 if (strstr(vendor, "Intel")) 1564 dev_select.vendor_id = 0x8086; 1565 if (strstr(vendor, "AMD")) 1566 dev_select.vendor_id = 0x1002; 1567 1568 return vulkan_device_create_internal(ctx, &dev_select, opts, flags); 1569 } 1570#endif 1571 case AV_HWDEVICE_TYPE_DRM: { 1572 AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; 1573 1574 drmDevice *drm_dev_info; 1575 int err = drmGetDevice(src_hwctx->fd, &drm_dev_info); 1576 if (err) { 1577 av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n"); 1578 return AVERROR_EXTERNAL; 1579 } 1580 1581 if (drm_dev_info->bustype == DRM_BUS_PCI) 1582 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id; 1583 1584 drmFreeDevice(&drm_dev_info); 1585 1586 return vulkan_device_create_internal(ctx, &dev_select, opts, flags); 1587 } 1588#endif 1589#if CONFIG_CUDA 1590 case AV_HWDEVICE_TYPE_CUDA: { 1591 AVHWDeviceContext *cuda_cu = src_ctx; 1592 AVCUDADeviceContext *src_hwctx = src_ctx->hwctx; 1593 AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal; 1594 CudaFunctions *cu = cu_internal->cuda_dl; 1595 1596 int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid, 1597 cu_internal->cuda_device)); 1598 if (ret < 0) { 1599 av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n"); 1600 return AVERROR_EXTERNAL; 1601 } 1602 1603 dev_select.has_uuid = 1; 1604 1605 return vulkan_device_create_internal(ctx, &dev_select, opts, flags); 1606 } 1607#endif 1608 default: 1609 return AVERROR(ENOSYS); 1610 } 1611} 1612 1613static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, 1614 const void *hwconfig, 1615 AVHWFramesConstraints *constraints) 1616{ 1617 int count = 0; 1618 VulkanDevicePriv *p = ctx->internal->priv; 1619 1620 for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) 1621 count += pixfmt_is_supported(ctx, i, p->use_linear_images); 1622 1623#if CONFIG_CUDA 1624 if (p->dev_is_nvidia) 1625 count++; 1626#endif 1627 1628 constraints->valid_sw_formats = av_malloc_array(count + 1, 1629 sizeof(enum AVPixelFormat)); 1630 if (!constraints->valid_sw_formats) 1631 return AVERROR(ENOMEM); 1632 1633 count = 0; 1634 for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) 1635 if (pixfmt_is_supported(ctx, i, p->use_linear_images)) 1636 constraints->valid_sw_formats[count++] = i; 1637 1638#if CONFIG_CUDA 1639 if (p->dev_is_nvidia) 1640 constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA; 1641#endif 1642 constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE; 1643 1644 constraints->min_width = 0; 1645 constraints->min_height = 0; 1646 constraints->max_width = p->props.properties.limits.maxImageDimension2D; 1647 constraints->max_height = p->props.properties.limits.maxImageDimension2D; 1648 1649 constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat)); 1650 if (!constraints->valid_hw_formats) 1651 return AVERROR(ENOMEM); 1652 1653 constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN; 1654 constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; 1655 1656 return 0; 1657} 1658 1659static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, 1660 VkMemoryPropertyFlagBits req_flags, const void *alloc_extension, 1661 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) 1662{ 1663 VkResult ret; 1664 int index = -1; 1665 VulkanDevicePriv *p = ctx->internal->priv; 1666 FFVulkanFunctions *vk = &p->vkfn; 1667 AVVulkanDeviceContext *dev_hwctx = ctx->hwctx; 1668 VkMemoryAllocateInfo alloc_info = { 1669 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 1670 .pNext = alloc_extension, 1671 .allocationSize = req->size, 1672 }; 1673 1674 /* The vulkan spec requires memory types to be sorted in the "optimal" 1675 * order, so the first matching type we find will be the best/fastest one */ 1676 for (int i = 0; i < p->mprops.memoryTypeCount; i++) { 1677 const VkMemoryType *type = &p->mprops.memoryTypes[i]; 1678 1679 /* The memory type must be supported by the requirements (bitfield) */ 1680 if (!(req->memoryTypeBits & (1 << i))) 1681 continue; 1682 1683 /* The memory type flags must include our properties */ 1684 if ((type->propertyFlags & req_flags) != req_flags) 1685 continue; 1686 1687 /* The memory type must be large enough */ 1688 if (req->size > p->mprops.memoryHeaps[type->heapIndex].size) 1689 continue; 1690 1691 /* Found a suitable memory type */ 1692 index = i; 1693 break; 1694 } 1695 1696 if (index < 0) { 1697 av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", 1698 req_flags); 1699 return AVERROR(EINVAL); 1700 } 1701 1702 alloc_info.memoryTypeIndex = index; 1703 1704 ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info, 1705 dev_hwctx->alloc, mem); 1706 if (ret != VK_SUCCESS) { 1707 av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", 1708 vk_ret2str(ret)); 1709 return AVERROR(ENOMEM); 1710 } 1711 1712 *mem_flags |= p->mprops.memoryTypes[index].propertyFlags; 1713 1714 return 0; 1715} 1716 1717static void vulkan_free_internal(AVVkFrame *f) 1718{ 1719 AVVkFrameInternal *internal = f->internal; 1720 1721 if (!internal) 1722 return; 1723 1724#if CONFIG_CUDA 1725 if (internal->cuda_fc_ref) { 1726 AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; 1727 int planes = av_pix_fmt_count_planes(cuda_fc->sw_format); 1728 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; 1729 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; 1730 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; 1731 CudaFunctions *cu = cu_internal->cuda_dl; 1732 1733 for (int i = 0; i < planes; i++) { 1734 if (internal->cu_sem[i]) 1735 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i])); 1736 if (internal->cu_mma[i]) 1737 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i])); 1738 if (internal->ext_mem[i]) 1739 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i])); 1740#ifdef _WIN32 1741 if (internal->ext_sem_handle[i]) 1742 CloseHandle(internal->ext_sem_handle[i]); 1743 if (internal->ext_mem_handle[i]) 1744 CloseHandle(internal->ext_mem_handle[i]); 1745#endif 1746 } 1747 1748 av_buffer_unref(&internal->cuda_fc_ref); 1749 } 1750#endif 1751 1752 av_freep(&f->internal); 1753} 1754 1755static void vulkan_frame_free(void *opaque, uint8_t *data) 1756{ 1757 AVVkFrame *f = (AVVkFrame *)data; 1758 AVHWFramesContext *hwfc = opaque; 1759 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 1760 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 1761 FFVulkanFunctions *vk = &p->vkfn; 1762 int planes = av_pix_fmt_count_planes(hwfc->sw_format); 1763 1764 /* We could use vkWaitSemaphores, but the validation layer seems to have 1765 * issues tracking command buffer execution state on uninit. */ 1766 vk->DeviceWaitIdle(hwctx->act_dev); 1767 1768 vulkan_free_internal(f); 1769 1770 for (int i = 0; i < planes; i++) { 1771 vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); 1772 vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); 1773 vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); 1774 } 1775 1776 av_free(f); 1777} 1778 1779static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, 1780 void *alloc_pnext, size_t alloc_pnext_stride) 1781{ 1782 int err; 1783 VkResult ret; 1784 AVHWDeviceContext *ctx = hwfc->device_ctx; 1785 VulkanDevicePriv *p = ctx->internal->priv; 1786 FFVulkanFunctions *vk = &p->vkfn; 1787 AVVulkanFramesContext *hwfctx = hwfc->hwctx; 1788 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 1789 VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; 1790 1791 VkMemoryRequirements cont_memory_requirements = { 0 }; 1792 int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 }; 1793 int cont_mem_size = 0; 1794 1795 AVVulkanDeviceContext *hwctx = ctx->hwctx; 1796 1797 for (int i = 0; i < planes; i++) { 1798 int use_ded_mem; 1799 VkImageMemoryRequirementsInfo2 req_desc = { 1800 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, 1801 .image = f->img[i], 1802 }; 1803 VkMemoryDedicatedAllocateInfo ded_alloc = { 1804 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 1805 .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride), 1806 }; 1807 VkMemoryDedicatedRequirements ded_req = { 1808 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, 1809 }; 1810 VkMemoryRequirements2 req = { 1811 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, 1812 .pNext = &ded_req, 1813 }; 1814 1815 vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req); 1816 1817 if (f->tiling == VK_IMAGE_TILING_LINEAR) 1818 req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size, 1819 p->props.properties.limits.minMemoryMapAlignment); 1820 1821 if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { 1822 if (ded_req.requiresDedicatedAllocation) { 1823 av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, " 1824 "device requires dedicated image allocation!\n"); 1825 return AVERROR(EINVAL); 1826 } else if (!i) { 1827 cont_memory_requirements = req.memoryRequirements; 1828 } else if (cont_memory_requirements.memoryTypeBits != 1829 req.memoryRequirements.memoryTypeBits) { 1830 av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 " 1831 "and %i, cannot allocate in a single region!\n", 1832 i); 1833 return AVERROR(EINVAL); 1834 } 1835 1836 cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size, 1837 req.memoryRequirements.alignment); 1838 cont_mem_size += cont_mem_size_list[i]; 1839 continue; 1840 } 1841 1842 /* In case the implementation prefers/requires dedicated allocation */ 1843 use_ded_mem = ded_req.prefersDedicatedAllocation | 1844 ded_req.requiresDedicatedAllocation; 1845 if (use_ded_mem) 1846 ded_alloc.image = f->img[i]; 1847 1848 /* Allocate memory */ 1849 if ((err = alloc_mem(ctx, &req.memoryRequirements, 1850 f->tiling == VK_IMAGE_TILING_LINEAR ? 1851 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : 1852 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 1853 use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, 1854 &f->flags, &f->mem[i]))) 1855 return err; 1856 1857 f->size[i] = req.memoryRequirements.size; 1858 bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; 1859 bind_info[i].image = f->img[i]; 1860 bind_info[i].memory = f->mem[i]; 1861 } 1862 1863 if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { 1864 cont_memory_requirements.size = cont_mem_size; 1865 1866 /* Allocate memory */ 1867 if ((err = alloc_mem(ctx, &cont_memory_requirements, 1868 f->tiling == VK_IMAGE_TILING_LINEAR ? 1869 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : 1870 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 1871 (void *)(((uint8_t *)alloc_pnext)), 1872 &f->flags, &f->mem[0]))) 1873 return err; 1874 1875 f->size[0] = cont_memory_requirements.size; 1876 1877 for (int i = 0, offset = 0; i < planes; i++) { 1878 bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; 1879 bind_info[i].image = f->img[i]; 1880 bind_info[i].memory = f->mem[0]; 1881 bind_info[i].memoryOffset = offset; 1882 1883 f->offset[i] = bind_info[i].memoryOffset; 1884 offset += cont_mem_size_list[i]; 1885 } 1886 } 1887 1888 /* Bind the allocated memory to the images */ 1889 ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info); 1890 if (ret != VK_SUCCESS) { 1891 av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", 1892 vk_ret2str(ret)); 1893 return AVERROR_EXTERNAL; 1894 } 1895 1896 return 0; 1897} 1898 1899enum PrepMode { 1900 PREP_MODE_WRITE, 1901 PREP_MODE_EXTERNAL_EXPORT, 1902 PREP_MODE_EXTERNAL_IMPORT 1903}; 1904 1905static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, 1906 AVVkFrame *frame, enum PrepMode pmode) 1907{ 1908 int err; 1909 uint32_t src_qf, dst_qf; 1910 VkImageLayout new_layout; 1911 VkAccessFlags new_access; 1912 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 1913 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 1914 FFVulkanFunctions *vk = &p->vkfn; 1915 uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; 1916 1917 VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; 1918 1919 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { 1920 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, 1921 .pSignalSemaphoreValues = sem_sig_val, 1922 .signalSemaphoreValueCount = planes, 1923 }; 1924 1925 VkSubmitInfo s_info = { 1926 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 1927 .pNext = &s_timeline_sem_info, 1928 .pSignalSemaphores = frame->sem, 1929 .signalSemaphoreCount = planes, 1930 }; 1931 1932 VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; 1933 for (int i = 0; i < planes; i++) { 1934 wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; 1935 sem_sig_val[i] = frame->sem_value[i] + 1; 1936 } 1937 1938 switch (pmode) { 1939 case PREP_MODE_WRITE: 1940 new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; 1941 new_access = VK_ACCESS_TRANSFER_WRITE_BIT; 1942 src_qf = VK_QUEUE_FAMILY_IGNORED; 1943 dst_qf = VK_QUEUE_FAMILY_IGNORED; 1944 break; 1945 case PREP_MODE_EXTERNAL_IMPORT: 1946 new_layout = VK_IMAGE_LAYOUT_GENERAL; 1947 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; 1948 src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; 1949 dst_qf = VK_QUEUE_FAMILY_IGNORED; 1950 s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; 1951 s_timeline_sem_info.waitSemaphoreValueCount = planes; 1952 s_info.pWaitSemaphores = frame->sem; 1953 s_info.pWaitDstStageMask = wait_st; 1954 s_info.waitSemaphoreCount = planes; 1955 break; 1956 case PREP_MODE_EXTERNAL_EXPORT: 1957 new_layout = VK_IMAGE_LAYOUT_GENERAL; 1958 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; 1959 src_qf = VK_QUEUE_FAMILY_IGNORED; 1960 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; 1961 s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; 1962 s_timeline_sem_info.waitSemaphoreValueCount = planes; 1963 s_info.pWaitSemaphores = frame->sem; 1964 s_info.pWaitDstStageMask = wait_st; 1965 s_info.waitSemaphoreCount = planes; 1966 break; 1967 } 1968 1969 if ((err = wait_start_exec_ctx(hwfc, ectx))) 1970 return err; 1971 1972 /* Change the image layout to something more optimal for writes. 1973 * This also signals the newly created semaphore, making it usable 1974 * for synchronization */ 1975 for (int i = 0; i < planes; i++) { 1976 img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 1977 img_bar[i].srcAccessMask = 0x0; 1978 img_bar[i].dstAccessMask = new_access; 1979 img_bar[i].oldLayout = frame->layout[i]; 1980 img_bar[i].newLayout = new_layout; 1981 img_bar[i].srcQueueFamilyIndex = src_qf; 1982 img_bar[i].dstQueueFamilyIndex = dst_qf; 1983 img_bar[i].image = frame->img[i]; 1984 img_bar[i].subresourceRange.levelCount = 1; 1985 img_bar[i].subresourceRange.layerCount = 1; 1986 img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1987 1988 frame->layout[i] = img_bar[i].newLayout; 1989 frame->access[i] = img_bar[i].dstAccessMask; 1990 } 1991 1992 vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx), 1993 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 1994 VK_PIPELINE_STAGE_TRANSFER_BIT, 1995 0, 0, NULL, 0, NULL, planes, img_bar); 1996 1997 return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); 1998} 1999 2000static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, 2001 int frame_w, int frame_h, int plane) 2002{ 2003 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); 2004 2005 /* Currently always true unless gray + alpha support is added */ 2006 if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB || 2007 !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) { 2008 *w = frame_w; 2009 *h = frame_h; 2010 return; 2011 } 2012 2013 *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w); 2014 *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h); 2015} 2016 2017static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, 2018 VkImageTiling tiling, VkImageUsageFlagBits usage, 2019 void *create_pnext) 2020{ 2021 int err; 2022 VkResult ret; 2023 AVHWDeviceContext *ctx = hwfc->device_ctx; 2024 VulkanDevicePriv *p = ctx->internal->priv; 2025 FFVulkanFunctions *vk = &p->vkfn; 2026 AVVulkanDeviceContext *hwctx = ctx->hwctx; 2027 enum AVPixelFormat format = hwfc->sw_format; 2028 const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format); 2029 const int planes = av_pix_fmt_count_planes(format); 2030 2031 VkExportSemaphoreCreateInfo ext_sem_info = { 2032 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, 2033#ifdef _WIN32 2034 .handleTypes = IsWindows8OrGreater() 2035 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT 2036 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, 2037#else 2038 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, 2039#endif 2040 }; 2041 2042 VkSemaphoreTypeCreateInfo sem_type_info = { 2043 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, 2044#ifdef _WIN32 2045 .pNext = p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, 2046#else 2047 .pNext = p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, 2048#endif 2049 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, 2050 .initialValue = 0, 2051 }; 2052 2053 VkSemaphoreCreateInfo sem_spawn = { 2054 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, 2055 .pNext = &sem_type_info, 2056 }; 2057 2058 AVVkFrame *f = av_vk_frame_alloc(); 2059 if (!f) { 2060 av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); 2061 return AVERROR(ENOMEM); 2062 } 2063 2064 /* Create the images */ 2065 for (int i = 0; i < planes; i++) { 2066 VkImageCreateInfo create_info = { 2067 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 2068 .pNext = create_pnext, 2069 .imageType = VK_IMAGE_TYPE_2D, 2070 .format = img_fmts[i], 2071 .extent.depth = 1, 2072 .mipLevels = 1, 2073 .arrayLayers = 1, 2074 .flags = VK_IMAGE_CREATE_ALIAS_BIT, 2075 .tiling = tiling, 2076 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2077 .usage = usage, 2078 .samples = VK_SAMPLE_COUNT_1_BIT, 2079 .pQueueFamilyIndices = p->qfs, 2080 .queueFamilyIndexCount = p->num_qfs, 2081 .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : 2082 VK_SHARING_MODE_EXCLUSIVE, 2083 }; 2084 2085 get_plane_wh(&create_info.extent.width, &create_info.extent.height, 2086 format, hwfc->width, hwfc->height, i); 2087 2088 ret = vk->CreateImage(hwctx->act_dev, &create_info, 2089 hwctx->alloc, &f->img[i]); 2090 if (ret != VK_SUCCESS) { 2091 av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", 2092 vk_ret2str(ret)); 2093 err = AVERROR(EINVAL); 2094 goto fail; 2095 } 2096 2097 /* Create semaphore */ 2098 ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, 2099 hwctx->alloc, &f->sem[i]); 2100 if (ret != VK_SUCCESS) { 2101 av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", 2102 vk_ret2str(ret)); 2103 return AVERROR_EXTERNAL; 2104 } 2105 2106 f->layout[i] = create_info.initialLayout; 2107 f->access[i] = 0x0; 2108 f->sem_value[i] = 0; 2109 } 2110 2111 f->flags = 0x0; 2112 f->tiling = tiling; 2113 2114 *frame = f; 2115 return 0; 2116 2117fail: 2118 vulkan_frame_free(hwfc, (uint8_t *)f); 2119 return err; 2120} 2121 2122/* Checks if an export flag is enabled, and if it is ORs it with *iexp */ 2123static void try_export_flags(AVHWFramesContext *hwfc, 2124 VkExternalMemoryHandleTypeFlags *comp_handle_types, 2125 VkExternalMemoryHandleTypeFlagBits *iexp, 2126 VkExternalMemoryHandleTypeFlagBits exp) 2127{ 2128 VkResult ret; 2129 AVVulkanFramesContext *hwctx = hwfc->hwctx; 2130 AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; 2131 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 2132 FFVulkanFunctions *vk = &p->vkfn; 2133 2134 const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info = 2135 vk_find_struct(hwctx->create_pnext, 2136 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); 2137 int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info; 2138 int nb_mods; 2139 2140 VkExternalImageFormatProperties eprops = { 2141 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, 2142 }; 2143 VkImageFormatProperties2 props = { 2144 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, 2145 .pNext = &eprops, 2146 }; 2147 VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = { 2148 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, 2149 .pNext = NULL, 2150 .pQueueFamilyIndices = p->qfs, 2151 .queueFamilyIndexCount = p->num_qfs, 2152 .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : 2153 VK_SHARING_MODE_EXCLUSIVE, 2154 }; 2155 VkPhysicalDeviceExternalImageFormatInfo enext = { 2156 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, 2157 .handleType = exp, 2158 .pNext = has_mods ? &phy_dev_mod_info : NULL, 2159 }; 2160 VkPhysicalDeviceImageFormatInfo2 pinfo = { 2161 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, 2162 .pNext = !exp ? NULL : &enext, 2163 .format = av_vkfmt_from_pixfmt(hwfc->sw_format)[0], 2164 .type = VK_IMAGE_TYPE_2D, 2165 .tiling = hwctx->tiling, 2166 .usage = hwctx->usage, 2167 .flags = VK_IMAGE_CREATE_ALIAS_BIT, 2168 }; 2169 2170 nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1; 2171 for (int i = 0; i < nb_mods; i++) { 2172 if (has_mods) 2173 phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[i]; 2174 2175 ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev, 2176 &pinfo, &props); 2177 2178 if (ret == VK_SUCCESS) { 2179 *iexp |= exp; 2180 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes; 2181 } 2182 } 2183} 2184 2185static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) 2186{ 2187 int err; 2188 AVVkFrame *f; 2189 AVBufferRef *avbuf = NULL; 2190 AVHWFramesContext *hwfc = opaque; 2191 AVVulkanFramesContext *hwctx = hwfc->hwctx; 2192 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 2193 VulkanFramesPriv *fp = hwfc->internal->priv; 2194 VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; 2195 VkExternalMemoryHandleTypeFlags e = 0x0; 2196 2197 VkExternalMemoryImageCreateInfo eiinfo = { 2198 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, 2199 .pNext = hwctx->create_pnext, 2200 }; 2201 2202#ifdef _WIN32 2203 if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) 2204 try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater() 2205 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 2206 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT); 2207#else 2208 if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) 2209 try_export_flags(hwfc, &eiinfo.handleTypes, &e, 2210 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); 2211 2212 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) 2213 try_export_flags(hwfc, &eiinfo.handleTypes, &e, 2214 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); 2215#endif 2216 2217 for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { 2218 eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; 2219 eminfo[i].pNext = hwctx->alloc_pnext[i]; 2220 eminfo[i].handleTypes = e; 2221 } 2222 2223 err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, 2224 eiinfo.handleTypes ? &eiinfo : NULL); 2225 if (err) 2226 return NULL; 2227 2228 err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo)); 2229 if (err) 2230 goto fail; 2231 2232 err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); 2233 if (err) 2234 goto fail; 2235 2236 avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame), 2237 vulkan_frame_free, hwfc, 0); 2238 if (!avbuf) 2239 goto fail; 2240 2241 return avbuf; 2242 2243fail: 2244 vulkan_frame_free(hwfc, (uint8_t *)f); 2245 return NULL; 2246} 2247 2248static void vulkan_frames_uninit(AVHWFramesContext *hwfc) 2249{ 2250 VulkanFramesPriv *fp = hwfc->internal->priv; 2251 2252 if (fp->modifier_info) { 2253 if (fp->modifier_info->pDrmFormatModifiers) 2254 av_freep(&fp->modifier_info->pDrmFormatModifiers); 2255 av_freep(&fp->modifier_info); 2256 } 2257 2258 free_exec_ctx(hwfc, &fp->conv_ctx); 2259 free_exec_ctx(hwfc, &fp->upload_ctx); 2260 free_exec_ctx(hwfc, &fp->download_ctx); 2261} 2262 2263static int vulkan_frames_init(AVHWFramesContext *hwfc) 2264{ 2265 int err; 2266 AVVkFrame *f; 2267 AVVulkanFramesContext *hwctx = hwfc->hwctx; 2268 VulkanFramesPriv *fp = hwfc->internal->priv; 2269 AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; 2270 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 2271 const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; 2272 const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS); 2273 2274 /* Default tiling flags */ 2275 hwctx->tiling = hwctx->tiling ? hwctx->tiling : 2276 has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : 2277 p->use_linear_images ? VK_IMAGE_TILING_LINEAR : 2278 VK_IMAGE_TILING_OPTIMAL; 2279 2280 if (!hwctx->usage) 2281 hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS; 2282 2283 if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) { 2284 if (p->contiguous_planes == 1 || 2285 ((p->contiguous_planes == -1) && p->dev_is_intel)) 2286 hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY; 2287 } 2288 2289 modifier_info = vk_find_struct(hwctx->create_pnext, 2290 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); 2291 2292 /* Get the supported modifiers if the user has not given any. */ 2293 if (has_modifiers && !modifier_info) { 2294 const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); 2295 VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; 2296 FFVulkanFunctions *vk = &p->vkfn; 2297 VkDrmFormatModifierPropertiesEXT *mod_props; 2298 uint64_t *modifiers; 2299 int modifier_count = 0; 2300 2301 VkDrmFormatModifierPropertiesListEXT mod_props_list = { 2302 .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, 2303 .pNext = NULL, 2304 .drmFormatModifierCount = 0, 2305 .pDrmFormatModifierProperties = NULL, 2306 }; 2307 VkFormatProperties2 prop = { 2308 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, 2309 .pNext = &mod_props_list, 2310 }; 2311 2312 /* Get all supported modifiers */ 2313 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop); 2314 2315 if (!mod_props_list.drmFormatModifierCount) { 2316 av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n"); 2317 return AVERROR(EINVAL); 2318 } 2319 2320 /* Createa structure to hold the modifier list info */ 2321 modifier_info = av_mallocz(sizeof(*modifier_info)); 2322 if (!modifier_info) 2323 return AVERROR(ENOMEM); 2324 2325 modifier_info->pNext = NULL; 2326 modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT; 2327 2328 /* Add structure to the image creation pNext chain */ 2329 if (!hwctx->create_pnext) 2330 hwctx->create_pnext = modifier_info; 2331 else 2332 vk_link_struct(hwctx->create_pnext, (void *)modifier_info); 2333 2334 /* Backup the allocated struct to be freed later */ 2335 fp->modifier_info = modifier_info; 2336 2337 /* Allocate list of modifiers */ 2338 modifiers = av_mallocz(mod_props_list.drmFormatModifierCount * 2339 sizeof(*modifiers)); 2340 if (!modifiers) 2341 return AVERROR(ENOMEM); 2342 2343 modifier_info->pDrmFormatModifiers = modifiers; 2344 2345 /* Allocate a temporary list to hold all modifiers supported */ 2346 mod_props = av_mallocz(mod_props_list.drmFormatModifierCount * 2347 sizeof(*mod_props)); 2348 if (!mod_props) 2349 return AVERROR(ENOMEM); 2350 2351 mod_props_list.pDrmFormatModifierProperties = mod_props; 2352 2353 /* Finally get all modifiers from the device */ 2354 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop); 2355 2356 /* Reject any modifiers that don't match our requirements */ 2357 for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) { 2358 if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage)) 2359 continue; 2360 2361 modifiers[modifier_count++] = mod_props[i].drmFormatModifier; 2362 } 2363 2364 if (!modifier_count) { 2365 av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports" 2366 " the usage flags!\n"); 2367 av_freep(&mod_props); 2368 return AVERROR(EINVAL); 2369 } 2370 2371 modifier_info->drmFormatModifierCount = modifier_count; 2372 av_freep(&mod_props); 2373 } 2374 2375 err = create_exec_ctx(hwfc, &fp->conv_ctx, 2376 dev_hwctx->queue_family_comp_index, 2377 dev_hwctx->nb_comp_queues); 2378 if (err) 2379 return err; 2380 2381 err = create_exec_ctx(hwfc, &fp->upload_ctx, 2382 dev_hwctx->queue_family_tx_index, 2383 dev_hwctx->nb_tx_queues); 2384 if (err) 2385 return err; 2386 2387 err = create_exec_ctx(hwfc, &fp->download_ctx, 2388 dev_hwctx->queue_family_tx_index, 1); 2389 if (err) 2390 return err; 2391 2392 /* Test to see if allocation will fail */ 2393 err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, 2394 hwctx->create_pnext); 2395 if (err) 2396 return err; 2397 2398 vulkan_frame_free(hwfc, (uint8_t *)f); 2399 2400 /* If user did not specify a pool, hwfc->pool will be set to the internal one 2401 * in hwcontext.c just after this gets called */ 2402 if (!hwfc->pool) { 2403 hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), 2404 hwfc, vulkan_pool_alloc, 2405 NULL); 2406 if (!hwfc->internal->pool_internal) 2407 return AVERROR(ENOMEM); 2408 } 2409 2410 return 0; 2411} 2412 2413static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame) 2414{ 2415 frame->buf[0] = av_buffer_pool_get(hwfc->pool); 2416 if (!frame->buf[0]) 2417 return AVERROR(ENOMEM); 2418 2419 frame->data[0] = frame->buf[0]->data; 2420 frame->format = AV_PIX_FMT_VULKAN; 2421 frame->width = hwfc->width; 2422 frame->height = hwfc->height; 2423 2424 return 0; 2425} 2426 2427static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc, 2428 enum AVHWFrameTransferDirection dir, 2429 enum AVPixelFormat **formats) 2430{ 2431 enum AVPixelFormat *fmts = av_malloc_array(2, sizeof(*fmts)); 2432 if (!fmts) 2433 return AVERROR(ENOMEM); 2434 2435 fmts[0] = hwfc->sw_format; 2436 fmts[1] = AV_PIX_FMT_NONE; 2437 2438 *formats = fmts; 2439 return 0; 2440} 2441 2442typedef struct VulkanMapping { 2443 AVVkFrame *frame; 2444 int flags; 2445} VulkanMapping; 2446 2447static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) 2448{ 2449 VulkanMapping *map = hwmap->priv; 2450 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 2451 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 2452 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 2453 FFVulkanFunctions *vk = &p->vkfn; 2454 2455 /* Check if buffer needs flushing */ 2456 if ((map->flags & AV_HWFRAME_MAP_WRITE) && 2457 !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { 2458 VkResult ret; 2459 VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; 2460 2461 for (int i = 0; i < planes; i++) { 2462 flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; 2463 flush_ranges[i].memory = map->frame->mem[i]; 2464 flush_ranges[i].size = VK_WHOLE_SIZE; 2465 } 2466 2467 ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, planes, 2468 flush_ranges); 2469 if (ret != VK_SUCCESS) { 2470 av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n", 2471 vk_ret2str(ret)); 2472 } 2473 } 2474 2475 for (int i = 0; i < planes; i++) 2476 vk->UnmapMemory(hwctx->act_dev, map->frame->mem[i]); 2477 2478 av_free(map); 2479} 2480 2481static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, 2482 const AVFrame *src, int flags) 2483{ 2484 VkResult ret; 2485 int err, mapped_mem_count = 0, mem_planes = 0; 2486 AVVkFrame *f = (AVVkFrame *)src->data[0]; 2487 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 2488 AVVulkanFramesContext *hwfctx = hwfc->hwctx; 2489 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 2490 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 2491 FFVulkanFunctions *vk = &p->vkfn; 2492 2493 VulkanMapping *map = av_mallocz(sizeof(VulkanMapping)); 2494 if (!map) 2495 return AVERROR(EINVAL); 2496 2497 if (src->format != AV_PIX_FMT_VULKAN) { 2498 av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n", 2499 av_get_pix_fmt_name(src->format)); 2500 err = AVERROR(EINVAL); 2501 goto fail; 2502 } 2503 2504 if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) || 2505 !(f->tiling == VK_IMAGE_TILING_LINEAR)) { 2506 av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible " 2507 "and linear!\n"); 2508 err = AVERROR(EINVAL); 2509 goto fail; 2510 } 2511 2512 dst->width = src->width; 2513 dst->height = src->height; 2514 2515 mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes; 2516 for (int i = 0; i < mem_planes; i++) { 2517 ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0, 2518 VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); 2519 if (ret != VK_SUCCESS) { 2520 av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n", 2521 vk_ret2str(ret)); 2522 err = AVERROR_EXTERNAL; 2523 goto fail; 2524 } 2525 mapped_mem_count++; 2526 } 2527 2528 if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { 2529 for (int i = 0; i < planes; i++) 2530 dst->data[i] = dst->data[0] + f->offset[i]; 2531 } 2532 2533 /* Check if the memory contents matter */ 2534 if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) && 2535 !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { 2536 VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; 2537 for (int i = 0; i < planes; i++) { 2538 map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; 2539 map_mem_ranges[i].size = VK_WHOLE_SIZE; 2540 map_mem_ranges[i].memory = f->mem[i]; 2541 } 2542 2543 ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes, 2544 map_mem_ranges); 2545 if (ret != VK_SUCCESS) { 2546 av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", 2547 vk_ret2str(ret)); 2548 err = AVERROR_EXTERNAL; 2549 goto fail; 2550 } 2551 } 2552 2553 for (int i = 0; i < planes; i++) { 2554 VkImageSubresource sub = { 2555 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 2556 }; 2557 VkSubresourceLayout layout; 2558 vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); 2559 dst->linesize[i] = layout.rowPitch; 2560 } 2561 2562 map->frame = f; 2563 map->flags = flags; 2564 2565 err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, 2566 &vulkan_unmap_frame, map); 2567 if (err < 0) 2568 goto fail; 2569 2570 return 0; 2571 2572fail: 2573 for (int i = 0; i < mapped_mem_count; i++) 2574 vk->UnmapMemory(hwctx->act_dev, f->mem[i]); 2575 2576 av_free(map); 2577 return err; 2578} 2579 2580#if CONFIG_LIBDRM 2581static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) 2582{ 2583 AVVkFrame *f = hwmap->priv; 2584 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 2585 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 2586 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 2587 FFVulkanFunctions *vk = &p->vkfn; 2588 2589 VkSemaphoreWaitInfo wait_info = { 2590 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, 2591 .flags = 0x0, 2592 .pSemaphores = f->sem, 2593 .pValues = f->sem_value, 2594 .semaphoreCount = planes, 2595 }; 2596 2597 vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX); 2598 2599 vulkan_free_internal(f); 2600 2601 for (int i = 0; i < planes; i++) { 2602 vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); 2603 vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); 2604 vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); 2605 } 2606 2607 av_free(f); 2608} 2609 2610static const struct { 2611 uint32_t drm_fourcc; 2612 VkFormat vk_format; 2613} vulkan_drm_format_map[] = { 2614 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM }, 2615 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM }, 2616 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM }, 2617 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM }, 2618 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM }, 2619 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM }, 2620 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM }, 2621 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM }, 2622 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM }, 2623 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM }, 2624}; 2625 2626static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc) 2627{ 2628 for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) 2629 if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc) 2630 return vulkan_drm_format_map[i].vk_format; 2631 return VK_FORMAT_UNDEFINED; 2632} 2633 2634static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame, 2635 const AVFrame *src) 2636{ 2637 int err = 0; 2638 VkResult ret; 2639 AVVkFrame *f; 2640 int bind_counts = 0; 2641 AVHWDeviceContext *ctx = hwfc->device_ctx; 2642 AVVulkanDeviceContext *hwctx = ctx->hwctx; 2643 VulkanDevicePriv *p = ctx->internal->priv; 2644 FFVulkanFunctions *vk = &p->vkfn; 2645 VulkanFramesPriv *fp = hwfc->internal->priv; 2646 const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; 2647 VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES]; 2648 VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES]; 2649 2650 for (int i = 0; i < desc->nb_layers; i++) { 2651 if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) { 2652 av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n", 2653 desc->layers[i].format); 2654 return AVERROR(EINVAL); 2655 } 2656 } 2657 2658 if (!(f = av_vk_frame_alloc())) { 2659 av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); 2660 err = AVERROR(ENOMEM); 2661 goto fail; 2662 } 2663 2664 f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; 2665 2666 for (int i = 0; i < desc->nb_layers; i++) { 2667 const int planes = desc->layers[i].nb_planes; 2668 2669 /* Semaphore */ 2670 VkSemaphoreTypeCreateInfo sem_type_info = { 2671 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, 2672 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, 2673 .initialValue = 0, 2674 }; 2675 VkSemaphoreCreateInfo sem_spawn = { 2676 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, 2677 .pNext = &sem_type_info, 2678 }; 2679 2680 /* Image creation */ 2681 VkSubresourceLayout ext_img_layouts[AV_DRM_MAX_PLANES]; 2682 VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = { 2683 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, 2684 .drmFormatModifier = desc->objects[0].format_modifier, 2685 .drmFormatModifierPlaneCount = planes, 2686 .pPlaneLayouts = (const VkSubresourceLayout *)&ext_img_layouts, 2687 }; 2688 VkExternalMemoryImageCreateInfo ext_img_spec = { 2689 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, 2690 .pNext = &ext_img_mod_spec, 2691 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, 2692 }; 2693 VkImageCreateInfo create_info = { 2694 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 2695 .pNext = &ext_img_spec, 2696 .imageType = VK_IMAGE_TYPE_2D, 2697 .format = drm_to_vulkan_fmt(desc->layers[i].format), 2698 .extent.depth = 1, 2699 .mipLevels = 1, 2700 .arrayLayers = 1, 2701 .flags = 0x0, /* ALIAS flag is implicit for imported images */ 2702 .tiling = f->tiling, 2703 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ 2704 .usage = VK_IMAGE_USAGE_SAMPLED_BIT | 2705 VK_IMAGE_USAGE_TRANSFER_SRC_BIT, 2706 .samples = VK_SAMPLE_COUNT_1_BIT, 2707 .pQueueFamilyIndices = p->qfs, 2708 .queueFamilyIndexCount = p->num_qfs, 2709 .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : 2710 VK_SHARING_MODE_EXCLUSIVE, 2711 }; 2712 2713 /* Image format verification */ 2714 VkExternalImageFormatProperties ext_props = { 2715 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, 2716 }; 2717 VkImageFormatProperties2 props_ret = { 2718 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, 2719 .pNext = &ext_props, 2720 }; 2721 VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = { 2722 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, 2723 .drmFormatModifier = ext_img_mod_spec.drmFormatModifier, 2724 .pQueueFamilyIndices = create_info.pQueueFamilyIndices, 2725 .queueFamilyIndexCount = create_info.queueFamilyIndexCount, 2726 .sharingMode = create_info.sharingMode, 2727 }; 2728 VkPhysicalDeviceExternalImageFormatInfo props_ext = { 2729 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, 2730 .pNext = &props_drm_mod, 2731 .handleType = ext_img_spec.handleTypes, 2732 }; 2733 VkPhysicalDeviceImageFormatInfo2 fmt_props = { 2734 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, 2735 .pNext = &props_ext, 2736 .format = create_info.format, 2737 .type = create_info.imageType, 2738 .tiling = create_info.tiling, 2739 .usage = create_info.usage, 2740 .flags = create_info.flags, 2741 }; 2742 2743 /* Check if importing is possible for this combination of parameters */ 2744 ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->phys_dev, 2745 &fmt_props, &props_ret); 2746 if (ret != VK_SUCCESS) { 2747 av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n", 2748 vk_ret2str(ret)); 2749 err = AVERROR_EXTERNAL; 2750 goto fail; 2751 } 2752 2753 /* Set the image width/height */ 2754 get_plane_wh(&create_info.extent.width, &create_info.extent.height, 2755 hwfc->sw_format, src->width, src->height, i); 2756 2757 /* Set the subresource layout based on the layer properties */ 2758 for (int j = 0; j < planes; j++) { 2759 ext_img_layouts[j].offset = desc->layers[i].planes[j].offset; 2760 ext_img_layouts[j].rowPitch = desc->layers[i].planes[j].pitch; 2761 ext_img_layouts[j].size = 0; /* The specs say so for all 3 */ 2762 ext_img_layouts[j].arrayPitch = 0; 2763 ext_img_layouts[j].depthPitch = 0; 2764 } 2765 2766 /* Create image */ 2767 ret = vk->CreateImage(hwctx->act_dev, &create_info, 2768 hwctx->alloc, &f->img[i]); 2769 if (ret != VK_SUCCESS) { 2770 av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", 2771 vk_ret2str(ret)); 2772 err = AVERROR(EINVAL); 2773 goto fail; 2774 } 2775 2776 ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, 2777 hwctx->alloc, &f->sem[i]); 2778 if (ret != VK_SUCCESS) { 2779 av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", 2780 vk_ret2str(ret)); 2781 return AVERROR_EXTERNAL; 2782 } 2783 2784 /* We'd import a semaphore onto the one we created using 2785 * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI 2786 * offer us anything we could import and sync with, so instead 2787 * just signal the semaphore we created. */ 2788 2789 f->layout[i] = create_info.initialLayout; 2790 f->access[i] = 0x0; 2791 f->sem_value[i] = 0; 2792 } 2793 2794 for (int i = 0; i < desc->nb_objects; i++) { 2795 /* Memory requirements */ 2796 VkImageMemoryRequirementsInfo2 req_desc = { 2797 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, 2798 .image = f->img[i], 2799 }; 2800 VkMemoryDedicatedRequirements ded_req = { 2801 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, 2802 }; 2803 VkMemoryRequirements2 req2 = { 2804 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, 2805 .pNext = &ded_req, 2806 }; 2807 2808 /* Allocation/importing */ 2809 VkMemoryFdPropertiesKHR fdmp = { 2810 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, 2811 }; 2812 VkImportMemoryFdInfoKHR idesc = { 2813 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, 2814 .fd = dup(desc->objects[i].fd), 2815 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, 2816 }; 2817 VkMemoryDedicatedAllocateInfo ded_alloc = { 2818 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 2819 .pNext = &idesc, 2820 .image = req_desc.image, 2821 }; 2822 2823 /* Get object properties */ 2824 ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev, 2825 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, 2826 idesc.fd, &fdmp); 2827 if (ret != VK_SUCCESS) { 2828 av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", 2829 vk_ret2str(ret)); 2830 err = AVERROR_EXTERNAL; 2831 close(idesc.fd); 2832 goto fail; 2833 } 2834 2835 vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2); 2836 2837 /* Only a single bit must be set, not a range, and it must match */ 2838 req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits; 2839 2840 err = alloc_mem(ctx, &req2.memoryRequirements, 2841 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 2842 (ded_req.prefersDedicatedAllocation || 2843 ded_req.requiresDedicatedAllocation) ? 2844 &ded_alloc : ded_alloc.pNext, 2845 &f->flags, &f->mem[i]); 2846 if (err) { 2847 close(idesc.fd); 2848 return err; 2849 } 2850 2851 f->size[i] = req2.memoryRequirements.size; 2852 } 2853 2854 for (int i = 0; i < desc->nb_layers; i++) { 2855 const int planes = desc->layers[i].nb_planes; 2856 for (int j = 0; j < planes; j++) { 2857 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT : 2858 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT : 2859 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT; 2860 2861 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO; 2862 plane_info[bind_counts].pNext = NULL; 2863 plane_info[bind_counts].planeAspect = aspect; 2864 2865 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; 2866 bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL; 2867 bind_info[bind_counts].image = f->img[i]; 2868 bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index]; 2869 2870 /* Offset is already signalled via pPlaneLayouts above */ 2871 bind_info[bind_counts].memoryOffset = 0; 2872 2873 bind_counts++; 2874 } 2875 } 2876 2877 /* Bind the allocated memory to the images */ 2878 ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info); 2879 if (ret != VK_SUCCESS) { 2880 av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", 2881 vk_ret2str(ret)); 2882 err = AVERROR_EXTERNAL; 2883 goto fail; 2884 } 2885 2886 err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_IMPORT); 2887 if (err) 2888 goto fail; 2889 2890 *frame = f; 2891 2892 return 0; 2893 2894fail: 2895 for (int i = 0; i < desc->nb_layers; i++) { 2896 vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); 2897 vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); 2898 } 2899 for (int i = 0; i < desc->nb_objects; i++) 2900 vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); 2901 2902 av_free(f); 2903 2904 return err; 2905} 2906 2907static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst, 2908 const AVFrame *src, int flags) 2909{ 2910 int err = 0; 2911 AVVkFrame *f; 2912 2913 if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src))) 2914 return err; 2915 2916 /* The unmapping function will free this */ 2917 dst->data[0] = (uint8_t *)f; 2918 dst->width = src->width; 2919 dst->height = src->height; 2920 2921 err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, 2922 &vulkan_unmap_from_drm, f); 2923 if (err < 0) 2924 goto fail; 2925 2926 av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n"); 2927 2928 return 0; 2929 2930fail: 2931 vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f); 2932 dst->data[0] = NULL; 2933 return err; 2934} 2935 2936#if CONFIG_VAAPI 2937static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc, 2938 AVFrame *dst, const AVFrame *src, 2939 int flags) 2940{ 2941 int err; 2942 AVFrame *tmp = av_frame_alloc(); 2943 AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; 2944 AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx; 2945 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3]; 2946 2947 if (!tmp) 2948 return AVERROR(ENOMEM); 2949 2950 /* We have to sync since like the previous comment said, no semaphores */ 2951 vaSyncSurface(vaapi_ctx->display, surface_id); 2952 2953 tmp->format = AV_PIX_FMT_DRM_PRIME; 2954 2955 err = av_hwframe_map(tmp, src, flags); 2956 if (err < 0) 2957 goto fail; 2958 2959 err = vulkan_map_from_drm(dst_fc, dst, tmp, flags); 2960 if (err < 0) 2961 goto fail; 2962 2963 err = ff_hwframe_map_replace(dst, src); 2964 2965fail: 2966 av_frame_free(&tmp); 2967 return err; 2968} 2969#endif 2970#endif 2971 2972#if CONFIG_CUDA 2973static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, 2974 AVBufferRef *cuda_hwfc, 2975 const AVFrame *frame) 2976{ 2977 int err; 2978 VkResult ret; 2979 AVVkFrame *dst_f; 2980 AVVkFrameInternal *dst_int; 2981 AVHWDeviceContext *ctx = hwfc->device_ctx; 2982 AVVulkanDeviceContext *hwctx = ctx->hwctx; 2983 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 2984 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); 2985 VulkanDevicePriv *p = ctx->internal->priv; 2986 FFVulkanFunctions *vk = &p->vkfn; 2987 2988 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data; 2989 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; 2990 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; 2991 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; 2992 CudaFunctions *cu = cu_internal->cuda_dl; 2993 CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 : 2994 CU_AD_FORMAT_UNSIGNED_INT8; 2995 2996 dst_f = (AVVkFrame *)frame->data[0]; 2997 2998 dst_int = dst_f->internal; 2999 if (!dst_int || !dst_int->cuda_fc_ref) { 3000 if (!dst_f->internal) 3001 dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); 3002 3003 if (!dst_int) 3004 return AVERROR(ENOMEM); 3005 3006 dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); 3007 if (!dst_int->cuda_fc_ref) { 3008 av_freep(&dst_f->internal); 3009 return AVERROR(ENOMEM); 3010 } 3011 3012 for (int i = 0; i < planes; i++) { 3013 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { 3014 .offset = 0, 3015 .arrayDesc = { 3016 .Depth = 0, 3017 .Format = cufmt, 3018 .NumChannels = 1 + ((planes == 2) && i), 3019 .Flags = 0, 3020 }, 3021 .numLevels = 1, 3022 }; 3023 int p_w, p_h; 3024 3025#ifdef _WIN32 3026 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { 3027 .type = IsWindows8OrGreater() 3028 ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 3029 : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT, 3030 .size = dst_f->size[i], 3031 }; 3032 VkMemoryGetWin32HandleInfoKHR export_info = { 3033 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, 3034 .memory = dst_f->mem[i], 3035 .handleType = IsWindows8OrGreater() 3036 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 3037 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, 3038 }; 3039 VkSemaphoreGetWin32HandleInfoKHR sem_export = { 3040 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR, 3041 .semaphore = dst_f->sem[i], 3042 .handleType = IsWindows8OrGreater() 3043 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT 3044 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, 3045 }; 3046 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { 3047 .type = 10 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 */, 3048 }; 3049 3050 ret = vk->GetMemoryWin32HandleKHR(hwctx->act_dev, &export_info, 3051 &ext_desc.handle.win32.handle); 3052 if (ret != VK_SUCCESS) { 3053 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n", 3054 vk_ret2str(ret)); 3055 err = AVERROR_EXTERNAL; 3056 goto fail; 3057 } 3058 dst_int->ext_mem_handle[i] = ext_desc.handle.win32.handle; 3059#else 3060 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { 3061 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, 3062 .size = dst_f->size[i], 3063 }; 3064 VkMemoryGetFdInfoKHR export_info = { 3065 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, 3066 .memory = dst_f->mem[i], 3067 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, 3068 }; 3069 VkSemaphoreGetFdInfoKHR sem_export = { 3070 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, 3071 .semaphore = dst_f->sem[i], 3072 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, 3073 }; 3074 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { 3075 .type = 9 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD */, 3076 }; 3077 3078 ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info, 3079 &ext_desc.handle.fd); 3080 if (ret != VK_SUCCESS) { 3081 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n", 3082 vk_ret2str(ret)); 3083 err = AVERROR_EXTERNAL; 3084 goto fail; 3085 } 3086#endif 3087 3088 ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[i], &ext_desc)); 3089 if (ret < 0) { 3090#ifndef _WIN32 3091 close(ext_desc.handle.fd); 3092#endif 3093 err = AVERROR_EXTERNAL; 3094 goto fail; 3095 } 3096 3097 get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i); 3098 tex_desc.arrayDesc.Width = p_w; 3099 tex_desc.arrayDesc.Height = p_h; 3100 3101 ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i], 3102 dst_int->ext_mem[i], 3103 &tex_desc)); 3104 if (ret < 0) { 3105 err = AVERROR_EXTERNAL; 3106 goto fail; 3107 } 3108 3109 ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i], 3110 dst_int->cu_mma[i], 0)); 3111 if (ret < 0) { 3112 err = AVERROR_EXTERNAL; 3113 goto fail; 3114 } 3115 3116#ifdef _WIN32 3117 ret = vk->GetSemaphoreWin32HandleKHR(hwctx->act_dev, &sem_export, 3118 &ext_sem_desc.handle.win32.handle); 3119#else 3120 ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export, 3121 &ext_sem_desc.handle.fd); 3122#endif 3123 if (ret != VK_SUCCESS) { 3124 av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", 3125 vk_ret2str(ret)); 3126 err = AVERROR_EXTERNAL; 3127 goto fail; 3128 } 3129#ifdef _WIN32 3130 dst_int->ext_sem_handle[i] = ext_sem_desc.handle.win32.handle; 3131#endif 3132 3133 ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i], 3134 &ext_sem_desc)); 3135 if (ret < 0) { 3136#ifndef _WIN32 3137 close(ext_sem_desc.handle.fd); 3138#endif 3139 err = AVERROR_EXTERNAL; 3140 goto fail; 3141 } 3142 } 3143 } 3144 3145 return 0; 3146 3147fail: 3148 vulkan_free_internal(dst_f); 3149 return err; 3150} 3151 3152static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, 3153 AVFrame *dst, const AVFrame *src) 3154{ 3155 int err; 3156 CUcontext dummy; 3157 AVVkFrame *dst_f; 3158 AVVkFrameInternal *dst_int; 3159 VulkanFramesPriv *fp = hwfc->internal->priv; 3160 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 3161 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); 3162 3163 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; 3164 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; 3165 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; 3166 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; 3167 CudaFunctions *cu = cu_internal->cuda_dl; 3168 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; 3169 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; 3170 3171 dst_f = (AVVkFrame *)dst->data[0]; 3172 3173 err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT); 3174 if (err < 0) 3175 return err; 3176 3177 err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); 3178 if (err < 0) 3179 return err; 3180 3181 err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst); 3182 if (err < 0) { 3183 CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 3184 return err; 3185 } 3186 3187 dst_int = dst_f->internal; 3188 3189 for (int i = 0; i < planes; i++) { 3190 s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0; 3191 s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1; 3192 } 3193 3194 err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, 3195 planes, cuda_dev->stream)); 3196 if (err < 0) 3197 goto fail; 3198 3199 for (int i = 0; i < planes; i++) { 3200 CUDA_MEMCPY2D cpy = { 3201 .srcMemoryType = CU_MEMORYTYPE_DEVICE, 3202 .srcDevice = (CUdeviceptr)src->data[i], 3203 .srcPitch = src->linesize[i], 3204 .srcY = 0, 3205 3206 .dstMemoryType = CU_MEMORYTYPE_ARRAY, 3207 .dstArray = dst_int->cu_array[i], 3208 }; 3209 3210 int p_w, p_h; 3211 get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i); 3212 3213 cpy.WidthInBytes = p_w * desc->comp[i].step; 3214 cpy.Height = p_h; 3215 3216 err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); 3217 if (err < 0) 3218 goto fail; 3219 } 3220 3221 err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, 3222 planes, cuda_dev->stream)); 3223 if (err < 0) 3224 goto fail; 3225 3226 for (int i = 0; i < planes; i++) 3227 dst_f->sem_value[i]++; 3228 3229 CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 3230 3231 av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n"); 3232 3233 return err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT); 3234 3235fail: 3236 CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 3237 vulkan_free_internal(dst_f); 3238 dst_f->internal = NULL; 3239 av_buffer_unref(&dst->buf[0]); 3240 return err; 3241} 3242#endif 3243 3244static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, 3245 const AVFrame *src, int flags) 3246{ 3247 av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 3248 3249 switch (src->format) { 3250#if CONFIG_LIBDRM 3251#if CONFIG_VAAPI 3252 case AV_PIX_FMT_VAAPI: 3253 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) 3254 return vulkan_map_from_vaapi(hwfc, dst, src, flags); 3255 else 3256 return AVERROR(ENOSYS); 3257#endif 3258 case AV_PIX_FMT_DRM_PRIME: 3259 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) 3260 return vulkan_map_from_drm(hwfc, dst, src, flags); 3261 else 3262 return AVERROR(ENOSYS); 3263#endif 3264 default: 3265 return AVERROR(ENOSYS); 3266 } 3267} 3268 3269#if CONFIG_LIBDRM 3270typedef struct VulkanDRMMapping { 3271 AVDRMFrameDescriptor drm_desc; 3272 AVVkFrame *source; 3273} VulkanDRMMapping; 3274 3275static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) 3276{ 3277 AVDRMFrameDescriptor *drm_desc = hwmap->priv; 3278 3279 for (int i = 0; i < drm_desc->nb_objects; i++) 3280 close(drm_desc->objects[i].fd); 3281 3282 av_free(drm_desc); 3283} 3284 3285static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt) 3286{ 3287 for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) 3288 if (vulkan_drm_format_map[i].vk_format == vkfmt) 3289 return vulkan_drm_format_map[i].drm_fourcc; 3290 return DRM_FORMAT_INVALID; 3291} 3292 3293static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, 3294 const AVFrame *src, int flags) 3295{ 3296 int err = 0; 3297 VkResult ret; 3298 AVVkFrame *f = (AVVkFrame *)src->data[0]; 3299 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 3300 FFVulkanFunctions *vk = &p->vkfn; 3301 VulkanFramesPriv *fp = hwfc->internal->priv; 3302 AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; 3303 AVVulkanFramesContext *hwfctx = hwfc->hwctx; 3304 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 3305 VkImageDrmFormatModifierPropertiesEXT drm_mod = { 3306 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, 3307 }; 3308 VkSemaphoreWaitInfo wait_info = { 3309 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, 3310 .flags = 0x0, 3311 .semaphoreCount = planes, 3312 }; 3313 3314 AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc)); 3315 if (!drm_desc) 3316 return AVERROR(ENOMEM); 3317 3318 err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT); 3319 if (err < 0) 3320 goto end; 3321 3322 /* Wait for the operation to finish so we can cleanly export it. */ 3323 wait_info.pSemaphores = f->sem; 3324 wait_info.pValues = f->sem_value; 3325 3326 vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX); 3327 3328 err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc); 3329 if (err < 0) 3330 goto end; 3331 3332 ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], 3333 &drm_mod); 3334 if (ret != VK_SUCCESS) { 3335 av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n"); 3336 err = AVERROR_EXTERNAL; 3337 goto end; 3338 } 3339 3340 for (int i = 0; (i < planes) && (f->mem[i]); i++) { 3341 VkMemoryGetFdInfoKHR export_info = { 3342 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, 3343 .memory = f->mem[i], 3344 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, 3345 }; 3346 3347 ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info, 3348 &drm_desc->objects[i].fd); 3349 if (ret != VK_SUCCESS) { 3350 av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); 3351 err = AVERROR_EXTERNAL; 3352 goto end; 3353 } 3354 3355 drm_desc->nb_objects++; 3356 drm_desc->objects[i].size = f->size[i]; 3357 drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier; 3358 } 3359 3360 drm_desc->nb_layers = planes; 3361 for (int i = 0; i < drm_desc->nb_layers; i++) { 3362 VkSubresourceLayout layout; 3363 VkImageSubresource sub = { 3364 .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT, 3365 }; 3366 VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i]; 3367 3368 drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt); 3369 drm_desc->layers[i].nb_planes = 1; 3370 3371 if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) { 3372 av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n"); 3373 err = AVERROR_PATCHWELCOME; 3374 goto end; 3375 } 3376 3377 drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1); 3378 3379 if (f->tiling == VK_IMAGE_TILING_OPTIMAL) 3380 continue; 3381 3382 vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); 3383 drm_desc->layers[i].planes[0].offset = layout.offset; 3384 drm_desc->layers[i].planes[0].pitch = layout.rowPitch; 3385 3386 if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) 3387 drm_desc->layers[i].planes[0].offset += f->offset[i]; 3388 } 3389 3390 dst->width = src->width; 3391 dst->height = src->height; 3392 dst->data[0] = (uint8_t *)drm_desc; 3393 3394 av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n"); 3395 3396 return 0; 3397 3398end: 3399 av_free(drm_desc); 3400 return err; 3401} 3402 3403#if CONFIG_VAAPI 3404static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst, 3405 const AVFrame *src, int flags) 3406{ 3407 int err; 3408 AVFrame *tmp = av_frame_alloc(); 3409 if (!tmp) 3410 return AVERROR(ENOMEM); 3411 3412 tmp->format = AV_PIX_FMT_DRM_PRIME; 3413 3414 err = vulkan_map_to_drm(hwfc, tmp, src, flags); 3415 if (err < 0) 3416 goto fail; 3417 3418 err = av_hwframe_map(dst, tmp, flags); 3419 if (err < 0) 3420 goto fail; 3421 3422 err = ff_hwframe_map_replace(dst, src); 3423 3424fail: 3425 av_frame_free(&tmp); 3426 return err; 3427} 3428#endif 3429#endif 3430 3431static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, 3432 const AVFrame *src, int flags) 3433{ 3434 av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 3435 3436 switch (dst->format) { 3437#if CONFIG_LIBDRM 3438 case AV_PIX_FMT_DRM_PRIME: 3439 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) 3440 return vulkan_map_to_drm(hwfc, dst, src, flags); 3441 else 3442 return AVERROR(ENOSYS); 3443#if CONFIG_VAAPI 3444 case AV_PIX_FMT_VAAPI: 3445 if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) 3446 return vulkan_map_to_vaapi(hwfc, dst, src, flags); 3447 else 3448 return AVERROR(ENOSYS); 3449#endif 3450#endif 3451 default: 3452 return vulkan_map_frame_to_mem(hwfc, dst, src, flags); 3453 } 3454} 3455 3456typedef struct ImageBuffer { 3457 VkBuffer buf; 3458 VkDeviceMemory mem; 3459 VkMemoryPropertyFlagBits flags; 3460 int mapped_mem; 3461} ImageBuffer; 3462 3463static void free_buf(void *opaque, uint8_t *data) 3464{ 3465 AVHWDeviceContext *ctx = opaque; 3466 AVVulkanDeviceContext *hwctx = ctx->hwctx; 3467 VulkanDevicePriv *p = ctx->internal->priv; 3468 FFVulkanFunctions *vk = &p->vkfn; 3469 ImageBuffer *vkbuf = (ImageBuffer *)data; 3470 3471 if (vkbuf->buf) 3472 vk->DestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc); 3473 if (vkbuf->mem) 3474 vk->FreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc); 3475 3476 av_free(data); 3477} 3478 3479static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) 3480{ 3481 size_t size; 3482 *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment); 3483 size = height*(*stride); 3484 size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment); 3485 return size; 3486} 3487 3488static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, 3489 VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags, 3490 size_t size, uint32_t req_memory_bits, int host_mapped, 3491 void *create_pnext, void *alloc_pnext) 3492{ 3493 int err; 3494 VkResult ret; 3495 int use_ded_mem; 3496 AVVulkanDeviceContext *hwctx = ctx->hwctx; 3497 VulkanDevicePriv *p = ctx->internal->priv; 3498 FFVulkanFunctions *vk = &p->vkfn; 3499 3500 VkBufferCreateInfo buf_spawn = { 3501 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 3502 .pNext = create_pnext, 3503 .usage = usage, 3504 .size = size, 3505 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 3506 }; 3507 3508 VkBufferMemoryRequirementsInfo2 req_desc = { 3509 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, 3510 }; 3511 VkMemoryDedicatedAllocateInfo ded_alloc = { 3512 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 3513 .pNext = alloc_pnext, 3514 }; 3515 VkMemoryDedicatedRequirements ded_req = { 3516 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, 3517 }; 3518 VkMemoryRequirements2 req = { 3519 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, 3520 .pNext = &ded_req, 3521 }; 3522 3523 ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf)); 3524 if (!vkbuf) 3525 return AVERROR(ENOMEM); 3526 3527 vkbuf->mapped_mem = host_mapped; 3528 3529 ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf); 3530 if (ret != VK_SUCCESS) { 3531 av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", 3532 vk_ret2str(ret)); 3533 err = AVERROR_EXTERNAL; 3534 goto fail; 3535 } 3536 3537 req_desc.buffer = vkbuf->buf; 3538 3539 vk->GetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req); 3540 3541 /* In case the implementation prefers/requires dedicated allocation */ 3542 use_ded_mem = ded_req.prefersDedicatedAllocation | 3543 ded_req.requiresDedicatedAllocation; 3544 if (use_ded_mem) 3545 ded_alloc.buffer = vkbuf->buf; 3546 3547 /* Additional requirements imposed on us */ 3548 if (req_memory_bits) 3549 req.memoryRequirements.memoryTypeBits &= req_memory_bits; 3550 3551 err = alloc_mem(ctx, &req.memoryRequirements, flags, 3552 use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, 3553 &vkbuf->flags, &vkbuf->mem); 3554 if (err) 3555 goto fail; 3556 3557 ret = vk->BindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0); 3558 if (ret != VK_SUCCESS) { 3559 av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", 3560 vk_ret2str(ret)); 3561 err = AVERROR_EXTERNAL; 3562 goto fail; 3563 } 3564 3565 *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0); 3566 if (!(*buf)) { 3567 err = AVERROR(ENOMEM); 3568 goto fail; 3569 } 3570 3571 return 0; 3572 3573fail: 3574 free_buf(ctx, (uint8_t *)vkbuf); 3575 return err; 3576} 3577 3578/* Skips mapping of host mapped buffers but still invalidates them */ 3579static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[], 3580 int nb_buffers, int invalidate) 3581{ 3582 VkResult ret; 3583 AVVulkanDeviceContext *hwctx = ctx->hwctx; 3584 VulkanDevicePriv *p = ctx->internal->priv; 3585 FFVulkanFunctions *vk = &p->vkfn; 3586 VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS]; 3587 int invalidate_count = 0; 3588 3589 for (int i = 0; i < nb_buffers; i++) { 3590 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; 3591 if (vkbuf->mapped_mem) 3592 continue; 3593 3594 ret = vk->MapMemory(hwctx->act_dev, vkbuf->mem, 0, 3595 VK_WHOLE_SIZE, 0, (void **)&mem[i]); 3596 if (ret != VK_SUCCESS) { 3597 av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", 3598 vk_ret2str(ret)); 3599 return AVERROR_EXTERNAL; 3600 } 3601 } 3602 3603 if (!invalidate) 3604 return 0; 3605 3606 for (int i = 0; i < nb_buffers; i++) { 3607 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; 3608 const VkMappedMemoryRange ival_buf = { 3609 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 3610 .memory = vkbuf->mem, 3611 .size = VK_WHOLE_SIZE, 3612 }; 3613 3614 /* For host imported memory Vulkan says to use platform-defined 3615 * sync methods, but doesn't really say not to call flush or invalidate 3616 * on original host pointers. It does explicitly allow to do that on 3617 * host-mapped pointers which are then mapped again using vkMapMemory, 3618 * but known implementations return the original pointers when mapped 3619 * again. */ 3620 if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) 3621 continue; 3622 3623 invalidate_ctx[invalidate_count++] = ival_buf; 3624 } 3625 3626 if (invalidate_count) { 3627 ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count, 3628 invalidate_ctx); 3629 if (ret != VK_SUCCESS) 3630 av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n", 3631 vk_ret2str(ret)); 3632 } 3633 3634 return 0; 3635} 3636 3637static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, 3638 int nb_buffers, int flush) 3639{ 3640 int err = 0; 3641 VkResult ret; 3642 AVVulkanDeviceContext *hwctx = ctx->hwctx; 3643 VulkanDevicePriv *p = ctx->internal->priv; 3644 FFVulkanFunctions *vk = &p->vkfn; 3645 VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS]; 3646 int flush_count = 0; 3647 3648 if (flush) { 3649 for (int i = 0; i < nb_buffers; i++) { 3650 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; 3651 const VkMappedMemoryRange flush_buf = { 3652 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 3653 .memory = vkbuf->mem, 3654 .size = VK_WHOLE_SIZE, 3655 }; 3656 3657 if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) 3658 continue; 3659 3660 flush_ctx[flush_count++] = flush_buf; 3661 } 3662 } 3663 3664 if (flush_count) { 3665 ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx); 3666 if (ret != VK_SUCCESS) { 3667 av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", 3668 vk_ret2str(ret)); 3669 err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ 3670 } 3671 } 3672 3673 for (int i = 0; i < nb_buffers; i++) { 3674 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; 3675 if (vkbuf->mapped_mem) 3676 continue; 3677 3678 vk->UnmapMemory(hwctx->act_dev, vkbuf->mem); 3679 } 3680 3681 return err; 3682} 3683 3684static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, 3685 AVBufferRef **bufs, size_t *buf_offsets, 3686 const int *buf_stride, int w, 3687 int h, enum AVPixelFormat pix_fmt, int to_buf) 3688{ 3689 int err; 3690 AVVkFrame *frame = (AVVkFrame *)f->data[0]; 3691 VulkanFramesPriv *fp = hwfc->internal->priv; 3692 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 3693 FFVulkanFunctions *vk = &p->vkfn; 3694 3695 int bar_num = 0; 3696 VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; 3697 3698 const int planes = av_pix_fmt_count_planes(pix_fmt); 3699 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); 3700 3701 VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; 3702 VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx; 3703 VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx); 3704 3705 uint64_t sem_signal_values[AV_NUM_DATA_POINTERS]; 3706 3707 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { 3708 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, 3709 .pWaitSemaphoreValues = frame->sem_value, 3710 .pSignalSemaphoreValues = sem_signal_values, 3711 .waitSemaphoreValueCount = planes, 3712 .signalSemaphoreValueCount = planes, 3713 }; 3714 3715 VkSubmitInfo s_info = { 3716 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 3717 .pNext = &s_timeline_sem_info, 3718 .pSignalSemaphores = frame->sem, 3719 .pWaitSemaphores = frame->sem, 3720 .pWaitDstStageMask = sem_wait_dst, 3721 .signalSemaphoreCount = planes, 3722 .waitSemaphoreCount = planes, 3723 }; 3724 3725 for (int i = 0; i < planes; i++) 3726 sem_signal_values[i] = frame->sem_value[i] + 1; 3727 3728 if ((err = wait_start_exec_ctx(hwfc, ectx))) 3729 return err; 3730 3731 /* Change the image layout to something more optimal for transfers */ 3732 for (int i = 0; i < planes; i++) { 3733 VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : 3734 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; 3735 VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT : 3736 VK_ACCESS_TRANSFER_WRITE_BIT; 3737 3738 sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; 3739 3740 /* If the layout matches and we have read access skip the barrier */ 3741 if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access)) 3742 continue; 3743 3744 img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 3745 img_bar[bar_num].srcAccessMask = 0x0; 3746 img_bar[bar_num].dstAccessMask = new_access; 3747 img_bar[bar_num].oldLayout = frame->layout[i]; 3748 img_bar[bar_num].newLayout = new_layout; 3749 img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 3750 img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 3751 img_bar[bar_num].image = frame->img[i]; 3752 img_bar[bar_num].subresourceRange.levelCount = 1; 3753 img_bar[bar_num].subresourceRange.layerCount = 1; 3754 img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 3755 3756 frame->layout[i] = img_bar[bar_num].newLayout; 3757 frame->access[i] = img_bar[bar_num].dstAccessMask; 3758 3759 bar_num++; 3760 } 3761 3762 if (bar_num) 3763 vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 3764 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 3765 0, NULL, 0, NULL, bar_num, img_bar); 3766 3767 /* Schedule a copy for each plane */ 3768 for (int i = 0; i < planes; i++) { 3769 ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; 3770 VkBufferImageCopy buf_reg = { 3771 .bufferOffset = buf_offsets[i], 3772 .bufferRowLength = buf_stride[i] / desc->comp[i].step, 3773 .imageSubresource.layerCount = 1, 3774 .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 3775 .imageOffset = { 0, 0, 0, }, 3776 }; 3777 3778 int p_w, p_h; 3779 get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i); 3780 3781 buf_reg.bufferImageHeight = p_h; 3782 buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; 3783 3784 if (to_buf) 3785 vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i], 3786 vkbuf->buf, 1, &buf_reg); 3787 else 3788 vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i], 3789 frame->layout[i], 1, &buf_reg); 3790 } 3791 3792 /* When uploading, do this asynchronously if the source is refcounted by 3793 * keeping the buffers as a submission dependency. 3794 * The hwcontext is guaranteed to not be freed until all frames are freed 3795 * in the frames_unint function. 3796 * When downloading to buffer, do this synchronously and wait for the 3797 * queue submission to finish executing */ 3798 if (!to_buf) { 3799 int ref; 3800 for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) { 3801 if (!f->buf[ref]) 3802 break; 3803 if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) 3804 return err; 3805 } 3806 if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) 3807 return err; 3808 return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); 3809 } else { 3810 return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); 3811 } 3812} 3813 3814static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, 3815 const AVFrame *swf, int from) 3816{ 3817 int err = 0; 3818 VkResult ret; 3819 AVVkFrame *f = (AVVkFrame *)vkf->data[0]; 3820 AVHWDeviceContext *dev_ctx = hwfc->device_ctx; 3821 AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; 3822 VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 3823 FFVulkanFunctions *vk = &p->vkfn; 3824 3825 AVFrame tmp; 3826 AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; 3827 size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 }; 3828 3829 int p_w, p_h; 3830 const int planes = av_pix_fmt_count_planes(swf->format); 3831 3832 int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; 3833 const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); 3834 3835 if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { 3836 av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); 3837 return AVERROR(EINVAL); 3838 } 3839 3840 if (swf->width > hwfc->width || swf->height > hwfc->height) 3841 return AVERROR(EINVAL); 3842 3843 /* For linear, host visiable images */ 3844 if (f->tiling == VK_IMAGE_TILING_LINEAR && 3845 f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { 3846 AVFrame *map = av_frame_alloc(); 3847 if (!map) 3848 return AVERROR(ENOMEM); 3849 map->format = swf->format; 3850 3851 err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE); 3852 if (err) 3853 return err; 3854 3855 err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf); 3856 av_frame_free(&map); 3857 return err; 3858 } 3859 3860 /* Create buffers */ 3861 for (int i = 0; i < planes; i++) { 3862 size_t req_size; 3863 3864 VkExternalMemoryBufferCreateInfo create_desc = { 3865 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, 3866 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, 3867 }; 3868 3869 VkImportMemoryHostPointerInfoEXT import_desc = { 3870 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, 3871 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, 3872 }; 3873 3874 VkMemoryHostPointerPropertiesEXT p_props = { 3875 .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, 3876 }; 3877 3878 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); 3879 3880 tmp.linesize[i] = FFABS(swf->linesize[i]); 3881 3882 /* Do not map images with a negative stride */ 3883 if (map_host && swf->linesize[i] > 0) { 3884 size_t offs; 3885 offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment; 3886 import_desc.pHostPointer = swf->data[i] - offs; 3887 3888 /* We have to compensate for the few extra bytes of padding we 3889 * completely ignore at the start */ 3890 req_size = FFALIGN(offs + tmp.linesize[i] * p_h, 3891 p->hprops.minImportedHostPointerAlignment); 3892 3893 ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev, 3894 import_desc.handleType, 3895 import_desc.pHostPointer, 3896 &p_props); 3897 3898 if (ret == VK_SUCCESS) { 3899 host_mapped[i] = 1; 3900 buf_offsets[i] = offs; 3901 } 3902 } 3903 3904 if (!host_mapped[i]) 3905 req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h); 3906 3907 err = create_buf(dev_ctx, &bufs[i], 3908 from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : 3909 VK_BUFFER_USAGE_TRANSFER_SRC_BIT, 3910 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 3911 req_size, p_props.memoryTypeBits, host_mapped[i], 3912 host_mapped[i] ? &create_desc : NULL, 3913 host_mapped[i] ? &import_desc : NULL); 3914 if (err) 3915 goto end; 3916 } 3917 3918 if (!from) { 3919 /* Map, copy image TO buffer (which then goes to the VkImage), unmap */ 3920 if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) 3921 goto end; 3922 3923 for (int i = 0; i < planes; i++) { 3924 if (host_mapped[i]) 3925 continue; 3926 3927 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); 3928 3929 av_image_copy_plane(tmp.data[i], tmp.linesize[i], 3930 (const uint8_t *)swf->data[i], swf->linesize[i], 3931 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), 3932 p_h); 3933 } 3934 3935 if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) 3936 goto end; 3937 } 3938 3939 /* Copy buffers into/from image */ 3940 err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize, 3941 swf->width, swf->height, swf->format, from); 3942 3943 if (from) { 3944 /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */ 3945 if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) 3946 goto end; 3947 3948 for (int i = 0; i < planes; i++) { 3949 if (host_mapped[i]) 3950 continue; 3951 3952 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); 3953 3954 av_image_copy_plane_uc_from(swf->data[i], swf->linesize[i], 3955 (const uint8_t *)tmp.data[i], tmp.linesize[i], 3956 FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), 3957 p_h); 3958 } 3959 3960 if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) 3961 goto end; 3962 } 3963 3964end: 3965 for (int i = 0; i < planes; i++) 3966 av_buffer_unref(&bufs[i]); 3967 3968 return err; 3969} 3970 3971static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, 3972 const AVFrame *src) 3973{ 3974 av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 3975 3976 switch (src->format) { 3977#if CONFIG_CUDA 3978 case AV_PIX_FMT_CUDA: 3979#ifdef _WIN32 3980 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && 3981 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) 3982#else 3983 if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && 3984 (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) 3985#endif 3986 return vulkan_transfer_data_from_cuda(hwfc, dst, src); 3987#endif 3988 default: 3989 if (src->hw_frames_ctx) 3990 return AVERROR(ENOSYS); 3991 else 3992 return vulkan_transfer_data(hwfc, dst, src, 0); 3993 } 3994} 3995 3996#if CONFIG_CUDA 3997static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, 3998 const AVFrame *src) 3999{ 4000 int err; 4001 CUcontext dummy; 4002 AVVkFrame *dst_f; 4003 AVVkFrameInternal *dst_int; 4004 VulkanFramesPriv *fp = hwfc->internal->priv; 4005 const int planes = av_pix_fmt_count_planes(hwfc->sw_format); 4006 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); 4007 4008 AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data; 4009 AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; 4010 AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; 4011 AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; 4012 CudaFunctions *cu = cu_internal->cuda_dl; 4013 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; 4014 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; 4015 4016 dst_f = (AVVkFrame *)src->data[0]; 4017 4018 err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT); 4019 if (err < 0) 4020 return err; 4021 4022 err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); 4023 if (err < 0) 4024 return err; 4025 4026 err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src); 4027 if (err < 0) { 4028 CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 4029 return err; 4030 } 4031 4032 dst_int = dst_f->internal; 4033 4034 for (int i = 0; i < planes; i++) { 4035 s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0; 4036 s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1; 4037 } 4038 4039 err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, 4040 planes, cuda_dev->stream)); 4041 if (err < 0) 4042 goto fail; 4043 4044 for (int i = 0; i < planes; i++) { 4045 CUDA_MEMCPY2D cpy = { 4046 .dstMemoryType = CU_MEMORYTYPE_DEVICE, 4047 .dstDevice = (CUdeviceptr)dst->data[i], 4048 .dstPitch = dst->linesize[i], 4049 .dstY = 0, 4050 4051 .srcMemoryType = CU_MEMORYTYPE_ARRAY, 4052 .srcArray = dst_int->cu_array[i], 4053 }; 4054 4055 int w, h; 4056 get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i); 4057 4058 cpy.WidthInBytes = w * desc->comp[i].step; 4059 cpy.Height = h; 4060 4061 err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); 4062 if (err < 0) 4063 goto fail; 4064 } 4065 4066 err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, 4067 planes, cuda_dev->stream)); 4068 if (err < 0) 4069 goto fail; 4070 4071 for (int i = 0; i < planes; i++) 4072 dst_f->sem_value[i]++; 4073 4074 CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 4075 4076 av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n"); 4077 4078 return prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT); 4079 4080fail: 4081 CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 4082 vulkan_free_internal(dst_f); 4083 dst_f->internal = NULL; 4084 av_buffer_unref(&dst->buf[0]); 4085 return err; 4086} 4087#endif 4088 4089static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, 4090 const AVFrame *src) 4091{ 4092 av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; 4093 4094 switch (dst->format) { 4095#if CONFIG_CUDA 4096 case AV_PIX_FMT_CUDA: 4097#ifdef _WIN32 4098 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && 4099 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) 4100#else 4101 if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && 4102 (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) 4103#endif 4104 return vulkan_transfer_data_to_cuda(hwfc, dst, src); 4105#endif 4106 default: 4107 if (dst->hw_frames_ctx) 4108 return AVERROR(ENOSYS); 4109 else 4110 return vulkan_transfer_data(hwfc, src, dst, 1); 4111 } 4112} 4113 4114static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, 4115 AVHWFramesContext *src_fc, int flags) 4116{ 4117 return vulkan_frames_init(dst_fc); 4118} 4119 4120AVVkFrame *av_vk_frame_alloc(void) 4121{ 4122 return av_mallocz(sizeof(AVVkFrame)); 4123} 4124 4125const HWContextType ff_hwcontext_type_vulkan = { 4126 .type = AV_HWDEVICE_TYPE_VULKAN, 4127 .name = "Vulkan", 4128 4129 .device_hwctx_size = sizeof(AVVulkanDeviceContext), 4130 .device_priv_size = sizeof(VulkanDevicePriv), 4131 .frames_hwctx_size = sizeof(AVVulkanFramesContext), 4132 .frames_priv_size = sizeof(VulkanFramesPriv), 4133 4134 .device_init = &vulkan_device_init, 4135 .device_create = &vulkan_device_create, 4136 .device_derive = &vulkan_device_derive, 4137 4138 .frames_get_constraints = &vulkan_frames_get_constraints, 4139 .frames_init = vulkan_frames_init, 4140 .frames_get_buffer = vulkan_get_buffer, 4141 .frames_uninit = vulkan_frames_uninit, 4142 4143 .transfer_get_formats = vulkan_transfer_get_formats, 4144 .transfer_data_to = vulkan_transfer_data_to, 4145 .transfer_data_from = vulkan_transfer_data_from, 4146 4147 .map_to = vulkan_map_to, 4148 .map_from = vulkan_map_from, 4149 .frames_derive_to = &vulkan_frames_derive_to, 4150 4151 .pix_fmts = (const enum AVPixelFormat []) { 4152 AV_PIX_FMT_VULKAN, 4153 AV_PIX_FMT_NONE 4154 }, 4155}; 4156