1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * This file is part of FFmpeg. 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 5cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 6cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 7cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 10cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 11cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12cabdff1aSopenharmony_ci * Lesser General Public License for more details. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 15cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 16cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17cabdff1aSopenharmony_ci */ 18cabdff1aSopenharmony_ci 19cabdff1aSopenharmony_ci#include "buffer.h" 20cabdff1aSopenharmony_ci#include "common.h" 21cabdff1aSopenharmony_ci#include "hwcontext.h" 22cabdff1aSopenharmony_ci#include "hwcontext_internal.h" 23cabdff1aSopenharmony_ci#include "hwcontext_cuda_internal.h" 24cabdff1aSopenharmony_ci#if CONFIG_VULKAN 25cabdff1aSopenharmony_ci#include "hwcontext_vulkan.h" 26cabdff1aSopenharmony_ci#endif 27cabdff1aSopenharmony_ci#include "cuda_check.h" 28cabdff1aSopenharmony_ci#include "mem.h" 29cabdff1aSopenharmony_ci#include "pixdesc.h" 30cabdff1aSopenharmony_ci#include "pixfmt.h" 31cabdff1aSopenharmony_ci#include "imgutils.h" 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_citypedef struct CUDAFramesContext { 34cabdff1aSopenharmony_ci int shift_width, shift_height; 35cabdff1aSopenharmony_ci int tex_alignment; 36cabdff1aSopenharmony_ci} CUDAFramesContext; 37cabdff1aSopenharmony_ci 38cabdff1aSopenharmony_cistatic const enum AVPixelFormat supported_formats[] = { 39cabdff1aSopenharmony_ci AV_PIX_FMT_NV12, 40cabdff1aSopenharmony_ci AV_PIX_FMT_YUV420P, 41cabdff1aSopenharmony_ci AV_PIX_FMT_YUVA420P, 42cabdff1aSopenharmony_ci AV_PIX_FMT_YUV444P, 43cabdff1aSopenharmony_ci AV_PIX_FMT_P010, 44cabdff1aSopenharmony_ci AV_PIX_FMT_P016, 45cabdff1aSopenharmony_ci AV_PIX_FMT_YUV444P16, 46cabdff1aSopenharmony_ci AV_PIX_FMT_0RGB32, 47cabdff1aSopenharmony_ci AV_PIX_FMT_0BGR32, 48cabdff1aSopenharmony_ci#if CONFIG_VULKAN 49cabdff1aSopenharmony_ci AV_PIX_FMT_VULKAN, 50cabdff1aSopenharmony_ci#endif 51cabdff1aSopenharmony_ci}; 52cabdff1aSopenharmony_ci 53cabdff1aSopenharmony_ci#define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x) 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_cistatic int cuda_frames_get_constraints(AVHWDeviceContext *ctx, 56cabdff1aSopenharmony_ci const void *hwconfig, 57cabdff1aSopenharmony_ci AVHWFramesConstraints *constraints) 58cabdff1aSopenharmony_ci{ 59cabdff1aSopenharmony_ci int i; 60cabdff1aSopenharmony_ci 61cabdff1aSopenharmony_ci constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1, 62cabdff1aSopenharmony_ci sizeof(*constraints->valid_sw_formats)); 63cabdff1aSopenharmony_ci if (!constraints->valid_sw_formats) 64cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 65cabdff1aSopenharmony_ci 66cabdff1aSopenharmony_ci for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) 67cabdff1aSopenharmony_ci constraints->valid_sw_formats[i] = supported_formats[i]; 68cabdff1aSopenharmony_ci constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE; 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_ci constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats)); 71cabdff1aSopenharmony_ci if (!constraints->valid_hw_formats) 72cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 73cabdff1aSopenharmony_ci 74cabdff1aSopenharmony_ci constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA; 75cabdff1aSopenharmony_ci constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; 76cabdff1aSopenharmony_ci 77cabdff1aSopenharmony_ci return 0; 78cabdff1aSopenharmony_ci} 79cabdff1aSopenharmony_ci 80cabdff1aSopenharmony_cistatic void cuda_buffer_free(void *opaque, uint8_t *data) 81cabdff1aSopenharmony_ci{ 82cabdff1aSopenharmony_ci AVHWFramesContext *ctx = opaque; 83cabdff1aSopenharmony_ci AVHWDeviceContext *device_ctx = ctx->device_ctx; 84cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = device_ctx->hwctx; 85cabdff1aSopenharmony_ci CudaFunctions *cu = hwctx->internal->cuda_dl; 86cabdff1aSopenharmony_ci 87cabdff1aSopenharmony_ci CUcontext dummy; 88cabdff1aSopenharmony_ci 89cabdff1aSopenharmony_ci CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); 90cabdff1aSopenharmony_ci 91cabdff1aSopenharmony_ci CHECK_CU(cu->cuMemFree((CUdeviceptr)data)); 92cabdff1aSopenharmony_ci 93cabdff1aSopenharmony_ci CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 94cabdff1aSopenharmony_ci} 95cabdff1aSopenharmony_ci 96cabdff1aSopenharmony_cistatic AVBufferRef *cuda_pool_alloc(void *opaque, size_t size) 97cabdff1aSopenharmony_ci{ 98cabdff1aSopenharmony_ci AVHWFramesContext *ctx = opaque; 99cabdff1aSopenharmony_ci AVHWDeviceContext *device_ctx = ctx->device_ctx; 100cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = device_ctx->hwctx; 101cabdff1aSopenharmony_ci CudaFunctions *cu = hwctx->internal->cuda_dl; 102cabdff1aSopenharmony_ci 103cabdff1aSopenharmony_ci AVBufferRef *ret = NULL; 104cabdff1aSopenharmony_ci CUcontext dummy = NULL; 105cabdff1aSopenharmony_ci CUdeviceptr data; 106cabdff1aSopenharmony_ci int err; 107cabdff1aSopenharmony_ci 108cabdff1aSopenharmony_ci err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); 109cabdff1aSopenharmony_ci if (err < 0) 110cabdff1aSopenharmony_ci return NULL; 111cabdff1aSopenharmony_ci 112cabdff1aSopenharmony_ci err = CHECK_CU(cu->cuMemAlloc(&data, size)); 113cabdff1aSopenharmony_ci if (err < 0) 114cabdff1aSopenharmony_ci goto fail; 115cabdff1aSopenharmony_ci 116cabdff1aSopenharmony_ci ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); 117cabdff1aSopenharmony_ci if (!ret) { 118cabdff1aSopenharmony_ci CHECK_CU(cu->cuMemFree(data)); 119cabdff1aSopenharmony_ci goto fail; 120cabdff1aSopenharmony_ci } 121cabdff1aSopenharmony_ci 122cabdff1aSopenharmony_cifail: 123cabdff1aSopenharmony_ci CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 124cabdff1aSopenharmony_ci return ret; 125cabdff1aSopenharmony_ci} 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_cistatic int cuda_frames_init(AVHWFramesContext *ctx) 128cabdff1aSopenharmony_ci{ 129cabdff1aSopenharmony_ci AVHWDeviceContext *device_ctx = ctx->device_ctx; 130cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = device_ctx->hwctx; 131cabdff1aSopenharmony_ci CUDAFramesContext *priv = ctx->internal->priv; 132cabdff1aSopenharmony_ci CudaFunctions *cu = hwctx->internal->cuda_dl; 133cabdff1aSopenharmony_ci int err, i; 134cabdff1aSopenharmony_ci 135cabdff1aSopenharmony_ci for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { 136cabdff1aSopenharmony_ci if (ctx->sw_format == supported_formats[i]) 137cabdff1aSopenharmony_ci break; 138cabdff1aSopenharmony_ci } 139cabdff1aSopenharmony_ci if (i == FF_ARRAY_ELEMS(supported_formats)) { 140cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n", 141cabdff1aSopenharmony_ci av_get_pix_fmt_name(ctx->sw_format)); 142cabdff1aSopenharmony_ci return AVERROR(ENOSYS); 143cabdff1aSopenharmony_ci } 144cabdff1aSopenharmony_ci 145cabdff1aSopenharmony_ci err = CHECK_CU(cu->cuDeviceGetAttribute(&priv->tex_alignment, 146cabdff1aSopenharmony_ci 14 /* CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT */, 147cabdff1aSopenharmony_ci hwctx->internal->cuda_device)); 148cabdff1aSopenharmony_ci if (err < 0) 149cabdff1aSopenharmony_ci return err; 150cabdff1aSopenharmony_ci 151cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_DEBUG, "CUDA texture alignment: %d\n", priv->tex_alignment); 152cabdff1aSopenharmony_ci 153cabdff1aSopenharmony_ci // YUV420P is a special case. 154cabdff1aSopenharmony_ci // Since nvenc expects the U/V planes to have half the linesize of the Y plane 155cabdff1aSopenharmony_ci // alignment has to be doubled to ensure the U/V planes still end up aligned. 156cabdff1aSopenharmony_ci if (ctx->sw_format == AV_PIX_FMT_YUV420P) 157cabdff1aSopenharmony_ci priv->tex_alignment *= 2; 158cabdff1aSopenharmony_ci 159cabdff1aSopenharmony_ci av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height); 160cabdff1aSopenharmony_ci 161cabdff1aSopenharmony_ci if (!ctx->pool) { 162cabdff1aSopenharmony_ci int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, ctx->height, priv->tex_alignment); 163cabdff1aSopenharmony_ci if (size < 0) 164cabdff1aSopenharmony_ci return size; 165cabdff1aSopenharmony_ci 166cabdff1aSopenharmony_ci ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL); 167cabdff1aSopenharmony_ci if (!ctx->internal->pool_internal) 168cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 169cabdff1aSopenharmony_ci } 170cabdff1aSopenharmony_ci 171cabdff1aSopenharmony_ci return 0; 172cabdff1aSopenharmony_ci} 173cabdff1aSopenharmony_ci 174cabdff1aSopenharmony_cistatic int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) 175cabdff1aSopenharmony_ci{ 176cabdff1aSopenharmony_ci CUDAFramesContext *priv = ctx->internal->priv; 177cabdff1aSopenharmony_ci int res; 178cabdff1aSopenharmony_ci 179cabdff1aSopenharmony_ci frame->buf[0] = av_buffer_pool_get(ctx->pool); 180cabdff1aSopenharmony_ci if (!frame->buf[0]) 181cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 182cabdff1aSopenharmony_ci 183cabdff1aSopenharmony_ci res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data, 184cabdff1aSopenharmony_ci ctx->sw_format, ctx->width, ctx->height, priv->tex_alignment); 185cabdff1aSopenharmony_ci if (res < 0) 186cabdff1aSopenharmony_ci return res; 187cabdff1aSopenharmony_ci 188cabdff1aSopenharmony_ci // YUV420P is a special case. 189cabdff1aSopenharmony_ci // Nvenc expects the U/V planes in swapped order from how ffmpeg expects them, also chroma is half-aligned 190cabdff1aSopenharmony_ci if (ctx->sw_format == AV_PIX_FMT_YUV420P) { 191cabdff1aSopenharmony_ci frame->linesize[1] = frame->linesize[2] = frame->linesize[0] / 2; 192cabdff1aSopenharmony_ci frame->data[2] = frame->data[1]; 193cabdff1aSopenharmony_ci frame->data[1] = frame->data[2] + frame->linesize[2] * (ctx->height / 2); 194cabdff1aSopenharmony_ci } 195cabdff1aSopenharmony_ci 196cabdff1aSopenharmony_ci frame->format = AV_PIX_FMT_CUDA; 197cabdff1aSopenharmony_ci frame->width = ctx->width; 198cabdff1aSopenharmony_ci frame->height = ctx->height; 199cabdff1aSopenharmony_ci 200cabdff1aSopenharmony_ci return 0; 201cabdff1aSopenharmony_ci} 202cabdff1aSopenharmony_ci 203cabdff1aSopenharmony_cistatic int cuda_transfer_get_formats(AVHWFramesContext *ctx, 204cabdff1aSopenharmony_ci enum AVHWFrameTransferDirection dir, 205cabdff1aSopenharmony_ci enum AVPixelFormat **formats) 206cabdff1aSopenharmony_ci{ 207cabdff1aSopenharmony_ci enum AVPixelFormat *fmts; 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_ci fmts = av_malloc_array(2, sizeof(*fmts)); 210cabdff1aSopenharmony_ci if (!fmts) 211cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_ci fmts[0] = ctx->sw_format; 214cabdff1aSopenharmony_ci fmts[1] = AV_PIX_FMT_NONE; 215cabdff1aSopenharmony_ci 216cabdff1aSopenharmony_ci *formats = fmts; 217cabdff1aSopenharmony_ci 218cabdff1aSopenharmony_ci return 0; 219cabdff1aSopenharmony_ci} 220cabdff1aSopenharmony_ci 221cabdff1aSopenharmony_cistatic int cuda_transfer_data(AVHWFramesContext *ctx, AVFrame *dst, 222cabdff1aSopenharmony_ci const AVFrame *src) 223cabdff1aSopenharmony_ci{ 224cabdff1aSopenharmony_ci CUDAFramesContext *priv = ctx->internal->priv; 225cabdff1aSopenharmony_ci AVHWDeviceContext *device_ctx = ctx->device_ctx; 226cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = device_ctx->hwctx; 227cabdff1aSopenharmony_ci CudaFunctions *cu = hwctx->internal->cuda_dl; 228cabdff1aSopenharmony_ci 229cabdff1aSopenharmony_ci CUcontext dummy; 230cabdff1aSopenharmony_ci int i, ret; 231cabdff1aSopenharmony_ci 232cabdff1aSopenharmony_ci if ((src->hw_frames_ctx && ((AVHWFramesContext*)src->hw_frames_ctx->data)->format != AV_PIX_FMT_CUDA) || 233cabdff1aSopenharmony_ci (dst->hw_frames_ctx && ((AVHWFramesContext*)dst->hw_frames_ctx->data)->format != AV_PIX_FMT_CUDA)) 234cabdff1aSopenharmony_ci return AVERROR(ENOSYS); 235cabdff1aSopenharmony_ci 236cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx)); 237cabdff1aSopenharmony_ci if (ret < 0) 238cabdff1aSopenharmony_ci return ret; 239cabdff1aSopenharmony_ci 240cabdff1aSopenharmony_ci for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { 241cabdff1aSopenharmony_ci CUDA_MEMCPY2D cpy = { 242cabdff1aSopenharmony_ci .srcPitch = src->linesize[i], 243cabdff1aSopenharmony_ci .dstPitch = dst->linesize[i], 244cabdff1aSopenharmony_ci .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]), 245cabdff1aSopenharmony_ci .Height = src->height >> ((i == 0 || i == 3) ? 0 : priv->shift_height), 246cabdff1aSopenharmony_ci }; 247cabdff1aSopenharmony_ci 248cabdff1aSopenharmony_ci if (src->hw_frames_ctx) { 249cabdff1aSopenharmony_ci cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE; 250cabdff1aSopenharmony_ci cpy.srcDevice = (CUdeviceptr)src->data[i]; 251cabdff1aSopenharmony_ci } else { 252cabdff1aSopenharmony_ci cpy.srcMemoryType = CU_MEMORYTYPE_HOST; 253cabdff1aSopenharmony_ci cpy.srcHost = src->data[i]; 254cabdff1aSopenharmony_ci } 255cabdff1aSopenharmony_ci 256cabdff1aSopenharmony_ci if (dst->hw_frames_ctx) { 257cabdff1aSopenharmony_ci cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE; 258cabdff1aSopenharmony_ci cpy.dstDevice = (CUdeviceptr)dst->data[i]; 259cabdff1aSopenharmony_ci } else { 260cabdff1aSopenharmony_ci cpy.dstMemoryType = CU_MEMORYTYPE_HOST; 261cabdff1aSopenharmony_ci cpy.dstHost = dst->data[i]; 262cabdff1aSopenharmony_ci } 263cabdff1aSopenharmony_ci 264cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream)); 265cabdff1aSopenharmony_ci if (ret < 0) 266cabdff1aSopenharmony_ci goto exit; 267cabdff1aSopenharmony_ci } 268cabdff1aSopenharmony_ci 269cabdff1aSopenharmony_ci if (!dst->hw_frames_ctx) { 270cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream)); 271cabdff1aSopenharmony_ci if (ret < 0) 272cabdff1aSopenharmony_ci goto exit; 273cabdff1aSopenharmony_ci } 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_ciexit: 276cabdff1aSopenharmony_ci CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 277cabdff1aSopenharmony_ci 278cabdff1aSopenharmony_ci return 0; 279cabdff1aSopenharmony_ci} 280cabdff1aSopenharmony_ci 281cabdff1aSopenharmony_cistatic void cuda_device_uninit(AVHWDeviceContext *device_ctx) 282cabdff1aSopenharmony_ci{ 283cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = device_ctx->hwctx; 284cabdff1aSopenharmony_ci 285cabdff1aSopenharmony_ci if (hwctx->internal) { 286cabdff1aSopenharmony_ci CudaFunctions *cu = hwctx->internal->cuda_dl; 287cabdff1aSopenharmony_ci 288cabdff1aSopenharmony_ci if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { 289cabdff1aSopenharmony_ci if (hwctx->internal->flags & AV_CUDA_USE_PRIMARY_CONTEXT) 290cabdff1aSopenharmony_ci CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->internal->cuda_device)); 291cabdff1aSopenharmony_ci else 292cabdff1aSopenharmony_ci CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); 293cabdff1aSopenharmony_ci 294cabdff1aSopenharmony_ci hwctx->cuda_ctx = NULL; 295cabdff1aSopenharmony_ci } 296cabdff1aSopenharmony_ci 297cabdff1aSopenharmony_ci cuda_free_functions(&hwctx->internal->cuda_dl); 298cabdff1aSopenharmony_ci } 299cabdff1aSopenharmony_ci 300cabdff1aSopenharmony_ci av_freep(&hwctx->internal); 301cabdff1aSopenharmony_ci} 302cabdff1aSopenharmony_ci 303cabdff1aSopenharmony_cistatic int cuda_device_init(AVHWDeviceContext *ctx) 304cabdff1aSopenharmony_ci{ 305cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = ctx->hwctx; 306cabdff1aSopenharmony_ci int ret; 307cabdff1aSopenharmony_ci 308cabdff1aSopenharmony_ci if (!hwctx->internal) { 309cabdff1aSopenharmony_ci hwctx->internal = av_mallocz(sizeof(*hwctx->internal)); 310cabdff1aSopenharmony_ci if (!hwctx->internal) 311cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 312cabdff1aSopenharmony_ci } 313cabdff1aSopenharmony_ci 314cabdff1aSopenharmony_ci if (!hwctx->internal->cuda_dl) { 315cabdff1aSopenharmony_ci ret = cuda_load_functions(&hwctx->internal->cuda_dl, ctx); 316cabdff1aSopenharmony_ci if (ret < 0) { 317cabdff1aSopenharmony_ci av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n"); 318cabdff1aSopenharmony_ci goto error; 319cabdff1aSopenharmony_ci } 320cabdff1aSopenharmony_ci } 321cabdff1aSopenharmony_ci 322cabdff1aSopenharmony_ci return 0; 323cabdff1aSopenharmony_ci 324cabdff1aSopenharmony_cierror: 325cabdff1aSopenharmony_ci cuda_device_uninit(ctx); 326cabdff1aSopenharmony_ci return ret; 327cabdff1aSopenharmony_ci} 328cabdff1aSopenharmony_ci 329cabdff1aSopenharmony_cistatic int cuda_context_init(AVHWDeviceContext *device_ctx, int flags) { 330cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = device_ctx->hwctx; 331cabdff1aSopenharmony_ci CudaFunctions *cu; 332cabdff1aSopenharmony_ci CUcontext dummy; 333cabdff1aSopenharmony_ci int ret, dev_active = 0; 334cabdff1aSopenharmony_ci unsigned int dev_flags = 0; 335cabdff1aSopenharmony_ci 336cabdff1aSopenharmony_ci const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC; 337cabdff1aSopenharmony_ci 338cabdff1aSopenharmony_ci cu = hwctx->internal->cuda_dl; 339cabdff1aSopenharmony_ci 340cabdff1aSopenharmony_ci hwctx->internal->flags = flags; 341cabdff1aSopenharmony_ci 342cabdff1aSopenharmony_ci if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) { 343cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device, 344cabdff1aSopenharmony_ci &dev_flags, &dev_active)); 345cabdff1aSopenharmony_ci if (ret < 0) 346cabdff1aSopenharmony_ci return ret; 347cabdff1aSopenharmony_ci 348cabdff1aSopenharmony_ci if (dev_active && dev_flags != desired_flags) { 349cabdff1aSopenharmony_ci av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n"); 350cabdff1aSopenharmony_ci return AVERROR(ENOTSUP); 351cabdff1aSopenharmony_ci } else if (dev_flags != desired_flags) { 352cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device, 353cabdff1aSopenharmony_ci desired_flags)); 354cabdff1aSopenharmony_ci if (ret < 0) 355cabdff1aSopenharmony_ci return ret; 356cabdff1aSopenharmony_ci } 357cabdff1aSopenharmony_ci 358cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, 359cabdff1aSopenharmony_ci hwctx->internal->cuda_device)); 360cabdff1aSopenharmony_ci if (ret < 0) 361cabdff1aSopenharmony_ci return ret; 362cabdff1aSopenharmony_ci } else { 363cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags, 364cabdff1aSopenharmony_ci hwctx->internal->cuda_device)); 365cabdff1aSopenharmony_ci if (ret < 0) 366cabdff1aSopenharmony_ci return ret; 367cabdff1aSopenharmony_ci 368cabdff1aSopenharmony_ci CHECK_CU(cu->cuCtxPopCurrent(&dummy)); 369cabdff1aSopenharmony_ci } 370cabdff1aSopenharmony_ci 371cabdff1aSopenharmony_ci hwctx->internal->is_allocated = 1; 372cabdff1aSopenharmony_ci 373cabdff1aSopenharmony_ci // Setting stream to NULL will make functions automatically use the default CUstream 374cabdff1aSopenharmony_ci hwctx->stream = NULL; 375cabdff1aSopenharmony_ci 376cabdff1aSopenharmony_ci return 0; 377cabdff1aSopenharmony_ci} 378cabdff1aSopenharmony_ci 379cabdff1aSopenharmony_cistatic int cuda_flags_from_opts(AVHWDeviceContext *device_ctx, 380cabdff1aSopenharmony_ci AVDictionary *opts, int *flags) 381cabdff1aSopenharmony_ci{ 382cabdff1aSopenharmony_ci AVDictionaryEntry *primary_ctx_opt = av_dict_get(opts, "primary_ctx", NULL, 0); 383cabdff1aSopenharmony_ci 384cabdff1aSopenharmony_ci if (primary_ctx_opt && strtol(primary_ctx_opt->value, NULL, 10)) { 385cabdff1aSopenharmony_ci av_log(device_ctx, AV_LOG_VERBOSE, "Using CUDA primary device context\n"); 386cabdff1aSopenharmony_ci *flags |= AV_CUDA_USE_PRIMARY_CONTEXT; 387cabdff1aSopenharmony_ci } else if (primary_ctx_opt) { 388cabdff1aSopenharmony_ci av_log(device_ctx, AV_LOG_VERBOSE, "Disabling use of CUDA primary device context\n"); 389cabdff1aSopenharmony_ci *flags &= ~AV_CUDA_USE_PRIMARY_CONTEXT; 390cabdff1aSopenharmony_ci } 391cabdff1aSopenharmony_ci 392cabdff1aSopenharmony_ci return 0; 393cabdff1aSopenharmony_ci} 394cabdff1aSopenharmony_ci 395cabdff1aSopenharmony_cistatic int cuda_device_create(AVHWDeviceContext *device_ctx, 396cabdff1aSopenharmony_ci const char *device, 397cabdff1aSopenharmony_ci AVDictionary *opts, int flags) 398cabdff1aSopenharmony_ci{ 399cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = device_ctx->hwctx; 400cabdff1aSopenharmony_ci CudaFunctions *cu; 401cabdff1aSopenharmony_ci int ret, device_idx = 0; 402cabdff1aSopenharmony_ci 403cabdff1aSopenharmony_ci ret = cuda_flags_from_opts(device_ctx, opts, &flags); 404cabdff1aSopenharmony_ci if (ret < 0) 405cabdff1aSopenharmony_ci goto error; 406cabdff1aSopenharmony_ci 407cabdff1aSopenharmony_ci if (device) 408cabdff1aSopenharmony_ci device_idx = strtol(device, NULL, 0); 409cabdff1aSopenharmony_ci 410cabdff1aSopenharmony_ci ret = cuda_device_init(device_ctx); 411cabdff1aSopenharmony_ci if (ret < 0) 412cabdff1aSopenharmony_ci goto error; 413cabdff1aSopenharmony_ci 414cabdff1aSopenharmony_ci cu = hwctx->internal->cuda_dl; 415cabdff1aSopenharmony_ci 416cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuInit(0)); 417cabdff1aSopenharmony_ci if (ret < 0) 418cabdff1aSopenharmony_ci goto error; 419cabdff1aSopenharmony_ci 420cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuDeviceGet(&hwctx->internal->cuda_device, device_idx)); 421cabdff1aSopenharmony_ci if (ret < 0) 422cabdff1aSopenharmony_ci goto error; 423cabdff1aSopenharmony_ci 424cabdff1aSopenharmony_ci ret = cuda_context_init(device_ctx, flags); 425cabdff1aSopenharmony_ci if (ret < 0) 426cabdff1aSopenharmony_ci goto error; 427cabdff1aSopenharmony_ci 428cabdff1aSopenharmony_ci return 0; 429cabdff1aSopenharmony_ci 430cabdff1aSopenharmony_cierror: 431cabdff1aSopenharmony_ci cuda_device_uninit(device_ctx); 432cabdff1aSopenharmony_ci return ret; 433cabdff1aSopenharmony_ci} 434cabdff1aSopenharmony_ci 435cabdff1aSopenharmony_cistatic int cuda_device_derive(AVHWDeviceContext *device_ctx, 436cabdff1aSopenharmony_ci AVHWDeviceContext *src_ctx, AVDictionary *opts, 437cabdff1aSopenharmony_ci int flags) { 438cabdff1aSopenharmony_ci AVCUDADeviceContext *hwctx = device_ctx->hwctx; 439cabdff1aSopenharmony_ci CudaFunctions *cu; 440cabdff1aSopenharmony_ci const char *src_uuid = NULL; 441cabdff1aSopenharmony_ci int ret, i, device_count; 442cabdff1aSopenharmony_ci 443cabdff1aSopenharmony_ci ret = cuda_flags_from_opts(device_ctx, opts, &flags); 444cabdff1aSopenharmony_ci if (ret < 0) 445cabdff1aSopenharmony_ci goto error; 446cabdff1aSopenharmony_ci 447cabdff1aSopenharmony_ci#if CONFIG_VULKAN 448cabdff1aSopenharmony_ci VkPhysicalDeviceIDProperties vk_idp = { 449cabdff1aSopenharmony_ci .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES, 450cabdff1aSopenharmony_ci }; 451cabdff1aSopenharmony_ci#endif 452cabdff1aSopenharmony_ci 453cabdff1aSopenharmony_ci switch (src_ctx->type) { 454cabdff1aSopenharmony_ci#if CONFIG_VULKAN 455cabdff1aSopenharmony_ci#define TYPE PFN_vkGetPhysicalDeviceProperties2 456cabdff1aSopenharmony_ci case AV_HWDEVICE_TYPE_VULKAN: { 457cabdff1aSopenharmony_ci AVVulkanDeviceContext *vkctx = src_ctx->hwctx; 458cabdff1aSopenharmony_ci TYPE prop_fn = (TYPE)vkctx->get_proc_addr(vkctx->inst, "vkGetPhysicalDeviceProperties2"); 459cabdff1aSopenharmony_ci VkPhysicalDeviceProperties2 vk_dev_props = { 460cabdff1aSopenharmony_ci .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, 461cabdff1aSopenharmony_ci .pNext = &vk_idp, 462cabdff1aSopenharmony_ci }; 463cabdff1aSopenharmony_ci prop_fn(vkctx->phys_dev, &vk_dev_props); 464cabdff1aSopenharmony_ci src_uuid = vk_idp.deviceUUID; 465cabdff1aSopenharmony_ci break; 466cabdff1aSopenharmony_ci } 467cabdff1aSopenharmony_ci#undef TYPE 468cabdff1aSopenharmony_ci#endif 469cabdff1aSopenharmony_ci default: 470cabdff1aSopenharmony_ci ret = AVERROR(ENOSYS); 471cabdff1aSopenharmony_ci goto error; 472cabdff1aSopenharmony_ci } 473cabdff1aSopenharmony_ci 474cabdff1aSopenharmony_ci if (!src_uuid) { 475cabdff1aSopenharmony_ci av_log(device_ctx, AV_LOG_ERROR, 476cabdff1aSopenharmony_ci "Failed to get UUID of source device.\n"); 477cabdff1aSopenharmony_ci ret = AVERROR(EINVAL); 478cabdff1aSopenharmony_ci goto error; 479cabdff1aSopenharmony_ci } 480cabdff1aSopenharmony_ci 481cabdff1aSopenharmony_ci ret = cuda_device_init(device_ctx); 482cabdff1aSopenharmony_ci if (ret < 0) 483cabdff1aSopenharmony_ci goto error; 484cabdff1aSopenharmony_ci 485cabdff1aSopenharmony_ci cu = hwctx->internal->cuda_dl; 486cabdff1aSopenharmony_ci 487cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuInit(0)); 488cabdff1aSopenharmony_ci if (ret < 0) 489cabdff1aSopenharmony_ci goto error; 490cabdff1aSopenharmony_ci 491cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuDeviceGetCount(&device_count)); 492cabdff1aSopenharmony_ci if (ret < 0) 493cabdff1aSopenharmony_ci goto error; 494cabdff1aSopenharmony_ci 495cabdff1aSopenharmony_ci hwctx->internal->cuda_device = -1; 496cabdff1aSopenharmony_ci for (i = 0; i < device_count; i++) { 497cabdff1aSopenharmony_ci CUdevice dev; 498cabdff1aSopenharmony_ci CUuuid uuid; 499cabdff1aSopenharmony_ci 500cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuDeviceGet(&dev, i)); 501cabdff1aSopenharmony_ci if (ret < 0) 502cabdff1aSopenharmony_ci goto error; 503cabdff1aSopenharmony_ci 504cabdff1aSopenharmony_ci ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev)); 505cabdff1aSopenharmony_ci if (ret < 0) 506cabdff1aSopenharmony_ci goto error; 507cabdff1aSopenharmony_ci 508cabdff1aSopenharmony_ci if (memcmp(src_uuid, uuid.bytes, sizeof (uuid.bytes)) == 0) { 509cabdff1aSopenharmony_ci hwctx->internal->cuda_device = dev; 510cabdff1aSopenharmony_ci break; 511cabdff1aSopenharmony_ci } 512cabdff1aSopenharmony_ci } 513cabdff1aSopenharmony_ci 514cabdff1aSopenharmony_ci if (hwctx->internal->cuda_device == -1) { 515cabdff1aSopenharmony_ci av_log(device_ctx, AV_LOG_ERROR, "Could not derive CUDA device.\n"); 516cabdff1aSopenharmony_ci goto error; 517cabdff1aSopenharmony_ci } 518cabdff1aSopenharmony_ci 519cabdff1aSopenharmony_ci ret = cuda_context_init(device_ctx, flags); 520cabdff1aSopenharmony_ci if (ret < 0) 521cabdff1aSopenharmony_ci goto error; 522cabdff1aSopenharmony_ci 523cabdff1aSopenharmony_ci return 0; 524cabdff1aSopenharmony_ci 525cabdff1aSopenharmony_cierror: 526cabdff1aSopenharmony_ci cuda_device_uninit(device_ctx); 527cabdff1aSopenharmony_ci return ret; 528cabdff1aSopenharmony_ci} 529cabdff1aSopenharmony_ci 530cabdff1aSopenharmony_ciconst HWContextType ff_hwcontext_type_cuda = { 531cabdff1aSopenharmony_ci .type = AV_HWDEVICE_TYPE_CUDA, 532cabdff1aSopenharmony_ci .name = "CUDA", 533cabdff1aSopenharmony_ci 534cabdff1aSopenharmony_ci .device_hwctx_size = sizeof(AVCUDADeviceContext), 535cabdff1aSopenharmony_ci .frames_priv_size = sizeof(CUDAFramesContext), 536cabdff1aSopenharmony_ci 537cabdff1aSopenharmony_ci .device_create = cuda_device_create, 538cabdff1aSopenharmony_ci .device_derive = cuda_device_derive, 539cabdff1aSopenharmony_ci .device_init = cuda_device_init, 540cabdff1aSopenharmony_ci .device_uninit = cuda_device_uninit, 541cabdff1aSopenharmony_ci .frames_get_constraints = cuda_frames_get_constraints, 542cabdff1aSopenharmony_ci .frames_init = cuda_frames_init, 543cabdff1aSopenharmony_ci .frames_get_buffer = cuda_get_buffer, 544cabdff1aSopenharmony_ci .transfer_get_formats = cuda_transfer_get_formats, 545cabdff1aSopenharmony_ci .transfer_data_to = cuda_transfer_data, 546cabdff1aSopenharmony_ci .transfer_data_from = cuda_transfer_data, 547cabdff1aSopenharmony_ci 548cabdff1aSopenharmony_ci .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE }, 549cabdff1aSopenharmony_ci}; 550