1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * This file is part of FFmpeg.
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
5cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
6cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
7cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
10cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
11cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12cabdff1aSopenharmony_ci * Lesser General Public License for more details.
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
15cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
16cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17cabdff1aSopenharmony_ci */
18cabdff1aSopenharmony_ci
19cabdff1aSopenharmony_ci#include "buffer.h"
20cabdff1aSopenharmony_ci#include "common.h"
21cabdff1aSopenharmony_ci#include "hwcontext.h"
22cabdff1aSopenharmony_ci#include "hwcontext_internal.h"
23cabdff1aSopenharmony_ci#include "hwcontext_cuda_internal.h"
24cabdff1aSopenharmony_ci#if CONFIG_VULKAN
25cabdff1aSopenharmony_ci#include "hwcontext_vulkan.h"
26cabdff1aSopenharmony_ci#endif
27cabdff1aSopenharmony_ci#include "cuda_check.h"
28cabdff1aSopenharmony_ci#include "mem.h"
29cabdff1aSopenharmony_ci#include "pixdesc.h"
30cabdff1aSopenharmony_ci#include "pixfmt.h"
31cabdff1aSopenharmony_ci#include "imgutils.h"
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_citypedef struct CUDAFramesContext {
34cabdff1aSopenharmony_ci    int shift_width, shift_height;
35cabdff1aSopenharmony_ci    int tex_alignment;
36cabdff1aSopenharmony_ci} CUDAFramesContext;
37cabdff1aSopenharmony_ci
38cabdff1aSopenharmony_cistatic const enum AVPixelFormat supported_formats[] = {
39cabdff1aSopenharmony_ci    AV_PIX_FMT_NV12,
40cabdff1aSopenharmony_ci    AV_PIX_FMT_YUV420P,
41cabdff1aSopenharmony_ci    AV_PIX_FMT_YUVA420P,
42cabdff1aSopenharmony_ci    AV_PIX_FMT_YUV444P,
43cabdff1aSopenharmony_ci    AV_PIX_FMT_P010,
44cabdff1aSopenharmony_ci    AV_PIX_FMT_P016,
45cabdff1aSopenharmony_ci    AV_PIX_FMT_YUV444P16,
46cabdff1aSopenharmony_ci    AV_PIX_FMT_0RGB32,
47cabdff1aSopenharmony_ci    AV_PIX_FMT_0BGR32,
48cabdff1aSopenharmony_ci#if CONFIG_VULKAN
49cabdff1aSopenharmony_ci    AV_PIX_FMT_VULKAN,
50cabdff1aSopenharmony_ci#endif
51cabdff1aSopenharmony_ci};
52cabdff1aSopenharmony_ci
53cabdff1aSopenharmony_ci#define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
54cabdff1aSopenharmony_ci
55cabdff1aSopenharmony_cistatic int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
56cabdff1aSopenharmony_ci                                       const void *hwconfig,
57cabdff1aSopenharmony_ci                                       AVHWFramesConstraints *constraints)
58cabdff1aSopenharmony_ci{
59cabdff1aSopenharmony_ci    int i;
60cabdff1aSopenharmony_ci
61cabdff1aSopenharmony_ci    constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
62cabdff1aSopenharmony_ci                                                    sizeof(*constraints->valid_sw_formats));
63cabdff1aSopenharmony_ci    if (!constraints->valid_sw_formats)
64cabdff1aSopenharmony_ci        return AVERROR(ENOMEM);
65cabdff1aSopenharmony_ci
66cabdff1aSopenharmony_ci    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
67cabdff1aSopenharmony_ci        constraints->valid_sw_formats[i] = supported_formats[i];
68cabdff1aSopenharmony_ci    constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
69cabdff1aSopenharmony_ci
70cabdff1aSopenharmony_ci    constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
71cabdff1aSopenharmony_ci    if (!constraints->valid_hw_formats)
72cabdff1aSopenharmony_ci        return AVERROR(ENOMEM);
73cabdff1aSopenharmony_ci
74cabdff1aSopenharmony_ci    constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
75cabdff1aSopenharmony_ci    constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_ci    return 0;
78cabdff1aSopenharmony_ci}
79cabdff1aSopenharmony_ci
80cabdff1aSopenharmony_cistatic void cuda_buffer_free(void *opaque, uint8_t *data)
81cabdff1aSopenharmony_ci{
82cabdff1aSopenharmony_ci    AVHWFramesContext        *ctx = opaque;
83cabdff1aSopenharmony_ci    AVHWDeviceContext *device_ctx = ctx->device_ctx;
84cabdff1aSopenharmony_ci    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
85cabdff1aSopenharmony_ci    CudaFunctions             *cu = hwctx->internal->cuda_dl;
86cabdff1aSopenharmony_ci
87cabdff1aSopenharmony_ci    CUcontext dummy;
88cabdff1aSopenharmony_ci
89cabdff1aSopenharmony_ci    CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
90cabdff1aSopenharmony_ci
91cabdff1aSopenharmony_ci    CHECK_CU(cu->cuMemFree((CUdeviceptr)data));
92cabdff1aSopenharmony_ci
93cabdff1aSopenharmony_ci    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
94cabdff1aSopenharmony_ci}
95cabdff1aSopenharmony_ci
96cabdff1aSopenharmony_cistatic AVBufferRef *cuda_pool_alloc(void *opaque, size_t size)
97cabdff1aSopenharmony_ci{
98cabdff1aSopenharmony_ci    AVHWFramesContext        *ctx = opaque;
99cabdff1aSopenharmony_ci    AVHWDeviceContext *device_ctx = ctx->device_ctx;
100cabdff1aSopenharmony_ci    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
101cabdff1aSopenharmony_ci    CudaFunctions             *cu = hwctx->internal->cuda_dl;
102cabdff1aSopenharmony_ci
103cabdff1aSopenharmony_ci    AVBufferRef *ret = NULL;
104cabdff1aSopenharmony_ci    CUcontext dummy = NULL;
105cabdff1aSopenharmony_ci    CUdeviceptr data;
106cabdff1aSopenharmony_ci    int err;
107cabdff1aSopenharmony_ci
108cabdff1aSopenharmony_ci    err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
109cabdff1aSopenharmony_ci    if (err < 0)
110cabdff1aSopenharmony_ci        return NULL;
111cabdff1aSopenharmony_ci
112cabdff1aSopenharmony_ci    err = CHECK_CU(cu->cuMemAlloc(&data, size));
113cabdff1aSopenharmony_ci    if (err < 0)
114cabdff1aSopenharmony_ci        goto fail;
115cabdff1aSopenharmony_ci
116cabdff1aSopenharmony_ci    ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
117cabdff1aSopenharmony_ci    if (!ret) {
118cabdff1aSopenharmony_ci        CHECK_CU(cu->cuMemFree(data));
119cabdff1aSopenharmony_ci        goto fail;
120cabdff1aSopenharmony_ci    }
121cabdff1aSopenharmony_ci
122cabdff1aSopenharmony_cifail:
123cabdff1aSopenharmony_ci    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
124cabdff1aSopenharmony_ci    return ret;
125cabdff1aSopenharmony_ci}
126cabdff1aSopenharmony_ci
127cabdff1aSopenharmony_cistatic int cuda_frames_init(AVHWFramesContext *ctx)
128cabdff1aSopenharmony_ci{
129cabdff1aSopenharmony_ci    AVHWDeviceContext *device_ctx = ctx->device_ctx;
130cabdff1aSopenharmony_ci    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
131cabdff1aSopenharmony_ci    CUDAFramesContext       *priv = ctx->internal->priv;
132cabdff1aSopenharmony_ci    CudaFunctions             *cu = hwctx->internal->cuda_dl;
133cabdff1aSopenharmony_ci    int err, i;
134cabdff1aSopenharmony_ci
135cabdff1aSopenharmony_ci    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
136cabdff1aSopenharmony_ci        if (ctx->sw_format == supported_formats[i])
137cabdff1aSopenharmony_ci            break;
138cabdff1aSopenharmony_ci    }
139cabdff1aSopenharmony_ci    if (i == FF_ARRAY_ELEMS(supported_formats)) {
140cabdff1aSopenharmony_ci        av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
141cabdff1aSopenharmony_ci               av_get_pix_fmt_name(ctx->sw_format));
142cabdff1aSopenharmony_ci        return AVERROR(ENOSYS);
143cabdff1aSopenharmony_ci    }
144cabdff1aSopenharmony_ci
145cabdff1aSopenharmony_ci    err = CHECK_CU(cu->cuDeviceGetAttribute(&priv->tex_alignment,
146cabdff1aSopenharmony_ci                                            14 /* CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT */,
147cabdff1aSopenharmony_ci                                            hwctx->internal->cuda_device));
148cabdff1aSopenharmony_ci    if (err < 0)
149cabdff1aSopenharmony_ci        return err;
150cabdff1aSopenharmony_ci
151cabdff1aSopenharmony_ci    av_log(ctx, AV_LOG_DEBUG, "CUDA texture alignment: %d\n", priv->tex_alignment);
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_ci    // YUV420P is a special case.
154cabdff1aSopenharmony_ci    // Since nvenc expects the U/V planes to have half the linesize of the Y plane
155cabdff1aSopenharmony_ci    // alignment has to be doubled to ensure the U/V planes still end up aligned.
156cabdff1aSopenharmony_ci    if (ctx->sw_format == AV_PIX_FMT_YUV420P)
157cabdff1aSopenharmony_ci        priv->tex_alignment *= 2;
158cabdff1aSopenharmony_ci
159cabdff1aSopenharmony_ci    av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
160cabdff1aSopenharmony_ci
161cabdff1aSopenharmony_ci    if (!ctx->pool) {
162cabdff1aSopenharmony_ci        int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, ctx->height, priv->tex_alignment);
163cabdff1aSopenharmony_ci        if (size < 0)
164cabdff1aSopenharmony_ci            return size;
165cabdff1aSopenharmony_ci
166cabdff1aSopenharmony_ci        ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
167cabdff1aSopenharmony_ci        if (!ctx->internal->pool_internal)
168cabdff1aSopenharmony_ci            return AVERROR(ENOMEM);
169cabdff1aSopenharmony_ci    }
170cabdff1aSopenharmony_ci
171cabdff1aSopenharmony_ci    return 0;
172cabdff1aSopenharmony_ci}
173cabdff1aSopenharmony_ci
174cabdff1aSopenharmony_cistatic int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
175cabdff1aSopenharmony_ci{
176cabdff1aSopenharmony_ci    CUDAFramesContext *priv = ctx->internal->priv;
177cabdff1aSopenharmony_ci    int res;
178cabdff1aSopenharmony_ci
179cabdff1aSopenharmony_ci    frame->buf[0] = av_buffer_pool_get(ctx->pool);
180cabdff1aSopenharmony_ci    if (!frame->buf[0])
181cabdff1aSopenharmony_ci        return AVERROR(ENOMEM);
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ci    res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data,
184cabdff1aSopenharmony_ci                               ctx->sw_format, ctx->width, ctx->height, priv->tex_alignment);
185cabdff1aSopenharmony_ci    if (res < 0)
186cabdff1aSopenharmony_ci        return res;
187cabdff1aSopenharmony_ci
188cabdff1aSopenharmony_ci    // YUV420P is a special case.
189cabdff1aSopenharmony_ci    // Nvenc expects the U/V planes in swapped order from how ffmpeg expects them, also chroma is half-aligned
190cabdff1aSopenharmony_ci    if (ctx->sw_format == AV_PIX_FMT_YUV420P) {
191cabdff1aSopenharmony_ci        frame->linesize[1] = frame->linesize[2] = frame->linesize[0] / 2;
192cabdff1aSopenharmony_ci        frame->data[2]     = frame->data[1];
193cabdff1aSopenharmony_ci        frame->data[1]     = frame->data[2] + frame->linesize[2] * (ctx->height / 2);
194cabdff1aSopenharmony_ci    }
195cabdff1aSopenharmony_ci
196cabdff1aSopenharmony_ci    frame->format = AV_PIX_FMT_CUDA;
197cabdff1aSopenharmony_ci    frame->width  = ctx->width;
198cabdff1aSopenharmony_ci    frame->height = ctx->height;
199cabdff1aSopenharmony_ci
200cabdff1aSopenharmony_ci    return 0;
201cabdff1aSopenharmony_ci}
202cabdff1aSopenharmony_ci
203cabdff1aSopenharmony_cistatic int cuda_transfer_get_formats(AVHWFramesContext *ctx,
204cabdff1aSopenharmony_ci                                     enum AVHWFrameTransferDirection dir,
205cabdff1aSopenharmony_ci                                     enum AVPixelFormat **formats)
206cabdff1aSopenharmony_ci{
207cabdff1aSopenharmony_ci    enum AVPixelFormat *fmts;
208cabdff1aSopenharmony_ci
209cabdff1aSopenharmony_ci    fmts = av_malloc_array(2, sizeof(*fmts));
210cabdff1aSopenharmony_ci    if (!fmts)
211cabdff1aSopenharmony_ci        return AVERROR(ENOMEM);
212cabdff1aSopenharmony_ci
213cabdff1aSopenharmony_ci    fmts[0] = ctx->sw_format;
214cabdff1aSopenharmony_ci    fmts[1] = AV_PIX_FMT_NONE;
215cabdff1aSopenharmony_ci
216cabdff1aSopenharmony_ci    *formats = fmts;
217cabdff1aSopenharmony_ci
218cabdff1aSopenharmony_ci    return 0;
219cabdff1aSopenharmony_ci}
220cabdff1aSopenharmony_ci
221cabdff1aSopenharmony_cistatic int cuda_transfer_data(AVHWFramesContext *ctx, AVFrame *dst,
222cabdff1aSopenharmony_ci                                 const AVFrame *src)
223cabdff1aSopenharmony_ci{
224cabdff1aSopenharmony_ci    CUDAFramesContext       *priv = ctx->internal->priv;
225cabdff1aSopenharmony_ci    AVHWDeviceContext *device_ctx = ctx->device_ctx;
226cabdff1aSopenharmony_ci    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
227cabdff1aSopenharmony_ci    CudaFunctions             *cu = hwctx->internal->cuda_dl;
228cabdff1aSopenharmony_ci
229cabdff1aSopenharmony_ci    CUcontext dummy;
230cabdff1aSopenharmony_ci    int i, ret;
231cabdff1aSopenharmony_ci
232cabdff1aSopenharmony_ci    if ((src->hw_frames_ctx && ((AVHWFramesContext*)src->hw_frames_ctx->data)->format != AV_PIX_FMT_CUDA) ||
233cabdff1aSopenharmony_ci        (dst->hw_frames_ctx && ((AVHWFramesContext*)dst->hw_frames_ctx->data)->format != AV_PIX_FMT_CUDA))
234cabdff1aSopenharmony_ci        return AVERROR(ENOSYS);
235cabdff1aSopenharmony_ci
236cabdff1aSopenharmony_ci    ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
237cabdff1aSopenharmony_ci    if (ret < 0)
238cabdff1aSopenharmony_ci        return ret;
239cabdff1aSopenharmony_ci
240cabdff1aSopenharmony_ci    for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
241cabdff1aSopenharmony_ci        CUDA_MEMCPY2D cpy = {
242cabdff1aSopenharmony_ci            .srcPitch      = src->linesize[i],
243cabdff1aSopenharmony_ci            .dstPitch      = dst->linesize[i],
244cabdff1aSopenharmony_ci            .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
245cabdff1aSopenharmony_ci            .Height        = src->height >> ((i == 0 || i == 3) ? 0 : priv->shift_height),
246cabdff1aSopenharmony_ci        };
247cabdff1aSopenharmony_ci
248cabdff1aSopenharmony_ci        if (src->hw_frames_ctx) {
249cabdff1aSopenharmony_ci            cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
250cabdff1aSopenharmony_ci            cpy.srcDevice     = (CUdeviceptr)src->data[i];
251cabdff1aSopenharmony_ci        } else {
252cabdff1aSopenharmony_ci            cpy.srcMemoryType = CU_MEMORYTYPE_HOST;
253cabdff1aSopenharmony_ci            cpy.srcHost       = src->data[i];
254cabdff1aSopenharmony_ci        }
255cabdff1aSopenharmony_ci
256cabdff1aSopenharmony_ci        if (dst->hw_frames_ctx) {
257cabdff1aSopenharmony_ci            cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE;
258cabdff1aSopenharmony_ci            cpy.dstDevice     = (CUdeviceptr)dst->data[i];
259cabdff1aSopenharmony_ci        } else {
260cabdff1aSopenharmony_ci            cpy.dstMemoryType = CU_MEMORYTYPE_HOST;
261cabdff1aSopenharmony_ci            cpy.dstHost       = dst->data[i];
262cabdff1aSopenharmony_ci        }
263cabdff1aSopenharmony_ci
264cabdff1aSopenharmony_ci        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
265cabdff1aSopenharmony_ci        if (ret < 0)
266cabdff1aSopenharmony_ci            goto exit;
267cabdff1aSopenharmony_ci    }
268cabdff1aSopenharmony_ci
269cabdff1aSopenharmony_ci    if (!dst->hw_frames_ctx) {
270cabdff1aSopenharmony_ci        ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
271cabdff1aSopenharmony_ci        if (ret < 0)
272cabdff1aSopenharmony_ci            goto exit;
273cabdff1aSopenharmony_ci    }
274cabdff1aSopenharmony_ci
275cabdff1aSopenharmony_ciexit:
276cabdff1aSopenharmony_ci    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
277cabdff1aSopenharmony_ci
278cabdff1aSopenharmony_ci    return 0;
279cabdff1aSopenharmony_ci}
280cabdff1aSopenharmony_ci
281cabdff1aSopenharmony_cistatic void cuda_device_uninit(AVHWDeviceContext *device_ctx)
282cabdff1aSopenharmony_ci{
283cabdff1aSopenharmony_ci    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
284cabdff1aSopenharmony_ci
285cabdff1aSopenharmony_ci    if (hwctx->internal) {
286cabdff1aSopenharmony_ci        CudaFunctions *cu = hwctx->internal->cuda_dl;
287cabdff1aSopenharmony_ci
288cabdff1aSopenharmony_ci        if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
289cabdff1aSopenharmony_ci            if (hwctx->internal->flags & AV_CUDA_USE_PRIMARY_CONTEXT)
290cabdff1aSopenharmony_ci                CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->internal->cuda_device));
291cabdff1aSopenharmony_ci            else
292cabdff1aSopenharmony_ci                CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
293cabdff1aSopenharmony_ci
294cabdff1aSopenharmony_ci            hwctx->cuda_ctx = NULL;
295cabdff1aSopenharmony_ci        }
296cabdff1aSopenharmony_ci
297cabdff1aSopenharmony_ci        cuda_free_functions(&hwctx->internal->cuda_dl);
298cabdff1aSopenharmony_ci    }
299cabdff1aSopenharmony_ci
300cabdff1aSopenharmony_ci    av_freep(&hwctx->internal);
301cabdff1aSopenharmony_ci}
302cabdff1aSopenharmony_ci
303cabdff1aSopenharmony_cistatic int cuda_device_init(AVHWDeviceContext *ctx)
304cabdff1aSopenharmony_ci{
305cabdff1aSopenharmony_ci    AVCUDADeviceContext *hwctx = ctx->hwctx;
306cabdff1aSopenharmony_ci    int ret;
307cabdff1aSopenharmony_ci
308cabdff1aSopenharmony_ci    if (!hwctx->internal) {
309cabdff1aSopenharmony_ci        hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
310cabdff1aSopenharmony_ci        if (!hwctx->internal)
311cabdff1aSopenharmony_ci            return AVERROR(ENOMEM);
312cabdff1aSopenharmony_ci    }
313cabdff1aSopenharmony_ci
314cabdff1aSopenharmony_ci    if (!hwctx->internal->cuda_dl) {
315cabdff1aSopenharmony_ci        ret = cuda_load_functions(&hwctx->internal->cuda_dl, ctx);
316cabdff1aSopenharmony_ci        if (ret < 0) {
317cabdff1aSopenharmony_ci            av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
318cabdff1aSopenharmony_ci            goto error;
319cabdff1aSopenharmony_ci        }
320cabdff1aSopenharmony_ci    }
321cabdff1aSopenharmony_ci
322cabdff1aSopenharmony_ci    return 0;
323cabdff1aSopenharmony_ci
324cabdff1aSopenharmony_cierror:
325cabdff1aSopenharmony_ci    cuda_device_uninit(ctx);
326cabdff1aSopenharmony_ci    return ret;
327cabdff1aSopenharmony_ci}
328cabdff1aSopenharmony_ci
329cabdff1aSopenharmony_cistatic int cuda_context_init(AVHWDeviceContext *device_ctx, int flags) {
330cabdff1aSopenharmony_ci    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
331cabdff1aSopenharmony_ci    CudaFunctions *cu;
332cabdff1aSopenharmony_ci    CUcontext dummy;
333cabdff1aSopenharmony_ci    int ret, dev_active = 0;
334cabdff1aSopenharmony_ci    unsigned int dev_flags = 0;
335cabdff1aSopenharmony_ci
336cabdff1aSopenharmony_ci    const unsigned int desired_flags = CU_CTX_SCHED_BLOCKING_SYNC;
337cabdff1aSopenharmony_ci
338cabdff1aSopenharmony_ci    cu = hwctx->internal->cuda_dl;
339cabdff1aSopenharmony_ci
340cabdff1aSopenharmony_ci    hwctx->internal->flags = flags;
341cabdff1aSopenharmony_ci
342cabdff1aSopenharmony_ci    if (flags & AV_CUDA_USE_PRIMARY_CONTEXT) {
343cabdff1aSopenharmony_ci        ret = CHECK_CU(cu->cuDevicePrimaryCtxGetState(hwctx->internal->cuda_device,
344cabdff1aSopenharmony_ci                       &dev_flags, &dev_active));
345cabdff1aSopenharmony_ci        if (ret < 0)
346cabdff1aSopenharmony_ci            return ret;
347cabdff1aSopenharmony_ci
348cabdff1aSopenharmony_ci        if (dev_active && dev_flags != desired_flags) {
349cabdff1aSopenharmony_ci            av_log(device_ctx, AV_LOG_ERROR, "Primary context already active with incompatible flags.\n");
350cabdff1aSopenharmony_ci            return AVERROR(ENOTSUP);
351cabdff1aSopenharmony_ci        } else if (dev_flags != desired_flags) {
352cabdff1aSopenharmony_ci            ret = CHECK_CU(cu->cuDevicePrimaryCtxSetFlags(hwctx->internal->cuda_device,
353cabdff1aSopenharmony_ci                           desired_flags));
354cabdff1aSopenharmony_ci            if (ret < 0)
355cabdff1aSopenharmony_ci                return ret;
356cabdff1aSopenharmony_ci        }
357cabdff1aSopenharmony_ci
358cabdff1aSopenharmony_ci        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx,
359cabdff1aSopenharmony_ci                                                    hwctx->internal->cuda_device));
360cabdff1aSopenharmony_ci        if (ret < 0)
361cabdff1aSopenharmony_ci            return ret;
362cabdff1aSopenharmony_ci    } else {
363cabdff1aSopenharmony_ci        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, desired_flags,
364cabdff1aSopenharmony_ci                                       hwctx->internal->cuda_device));
365cabdff1aSopenharmony_ci        if (ret < 0)
366cabdff1aSopenharmony_ci            return ret;
367cabdff1aSopenharmony_ci
368cabdff1aSopenharmony_ci        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
369cabdff1aSopenharmony_ci    }
370cabdff1aSopenharmony_ci
371cabdff1aSopenharmony_ci    hwctx->internal->is_allocated = 1;
372cabdff1aSopenharmony_ci
373cabdff1aSopenharmony_ci    // Setting stream to NULL will make functions automatically use the default CUstream
374cabdff1aSopenharmony_ci    hwctx->stream = NULL;
375cabdff1aSopenharmony_ci
376cabdff1aSopenharmony_ci    return 0;
377cabdff1aSopenharmony_ci}
378cabdff1aSopenharmony_ci
379cabdff1aSopenharmony_cistatic int cuda_flags_from_opts(AVHWDeviceContext *device_ctx,
380cabdff1aSopenharmony_ci                                AVDictionary *opts, int *flags)
381cabdff1aSopenharmony_ci{
382cabdff1aSopenharmony_ci    AVDictionaryEntry *primary_ctx_opt = av_dict_get(opts, "primary_ctx", NULL, 0);
383cabdff1aSopenharmony_ci
384cabdff1aSopenharmony_ci    if (primary_ctx_opt && strtol(primary_ctx_opt->value, NULL, 10)) {
385cabdff1aSopenharmony_ci        av_log(device_ctx, AV_LOG_VERBOSE, "Using CUDA primary device context\n");
386cabdff1aSopenharmony_ci        *flags |= AV_CUDA_USE_PRIMARY_CONTEXT;
387cabdff1aSopenharmony_ci    } else if (primary_ctx_opt) {
388cabdff1aSopenharmony_ci        av_log(device_ctx, AV_LOG_VERBOSE, "Disabling use of CUDA primary device context\n");
389cabdff1aSopenharmony_ci        *flags &= ~AV_CUDA_USE_PRIMARY_CONTEXT;
390cabdff1aSopenharmony_ci    }
391cabdff1aSopenharmony_ci
392cabdff1aSopenharmony_ci    return 0;
393cabdff1aSopenharmony_ci}
394cabdff1aSopenharmony_ci
395cabdff1aSopenharmony_cistatic int cuda_device_create(AVHWDeviceContext *device_ctx,
396cabdff1aSopenharmony_ci                              const char *device,
397cabdff1aSopenharmony_ci                              AVDictionary *opts, int flags)
398cabdff1aSopenharmony_ci{
399cabdff1aSopenharmony_ci    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
400cabdff1aSopenharmony_ci    CudaFunctions *cu;
401cabdff1aSopenharmony_ci    int ret, device_idx = 0;
402cabdff1aSopenharmony_ci
403cabdff1aSopenharmony_ci    ret = cuda_flags_from_opts(device_ctx, opts, &flags);
404cabdff1aSopenharmony_ci    if (ret < 0)
405cabdff1aSopenharmony_ci        goto error;
406cabdff1aSopenharmony_ci
407cabdff1aSopenharmony_ci    if (device)
408cabdff1aSopenharmony_ci        device_idx = strtol(device, NULL, 0);
409cabdff1aSopenharmony_ci
410cabdff1aSopenharmony_ci    ret = cuda_device_init(device_ctx);
411cabdff1aSopenharmony_ci    if (ret < 0)
412cabdff1aSopenharmony_ci        goto error;
413cabdff1aSopenharmony_ci
414cabdff1aSopenharmony_ci    cu = hwctx->internal->cuda_dl;
415cabdff1aSopenharmony_ci
416cabdff1aSopenharmony_ci    ret = CHECK_CU(cu->cuInit(0));
417cabdff1aSopenharmony_ci    if (ret < 0)
418cabdff1aSopenharmony_ci        goto error;
419cabdff1aSopenharmony_ci
420cabdff1aSopenharmony_ci    ret = CHECK_CU(cu->cuDeviceGet(&hwctx->internal->cuda_device, device_idx));
421cabdff1aSopenharmony_ci    if (ret < 0)
422cabdff1aSopenharmony_ci        goto error;
423cabdff1aSopenharmony_ci
424cabdff1aSopenharmony_ci    ret = cuda_context_init(device_ctx, flags);
425cabdff1aSopenharmony_ci    if (ret < 0)
426cabdff1aSopenharmony_ci        goto error;
427cabdff1aSopenharmony_ci
428cabdff1aSopenharmony_ci    return 0;
429cabdff1aSopenharmony_ci
430cabdff1aSopenharmony_cierror:
431cabdff1aSopenharmony_ci    cuda_device_uninit(device_ctx);
432cabdff1aSopenharmony_ci    return ret;
433cabdff1aSopenharmony_ci}
434cabdff1aSopenharmony_ci
435cabdff1aSopenharmony_cistatic int cuda_device_derive(AVHWDeviceContext *device_ctx,
436cabdff1aSopenharmony_ci                              AVHWDeviceContext *src_ctx, AVDictionary *opts,
437cabdff1aSopenharmony_ci                              int flags) {
438cabdff1aSopenharmony_ci    AVCUDADeviceContext *hwctx = device_ctx->hwctx;
439cabdff1aSopenharmony_ci    CudaFunctions *cu;
440cabdff1aSopenharmony_ci    const char *src_uuid = NULL;
441cabdff1aSopenharmony_ci    int ret, i, device_count;
442cabdff1aSopenharmony_ci
443cabdff1aSopenharmony_ci    ret = cuda_flags_from_opts(device_ctx, opts, &flags);
444cabdff1aSopenharmony_ci    if (ret < 0)
445cabdff1aSopenharmony_ci        goto error;
446cabdff1aSopenharmony_ci
447cabdff1aSopenharmony_ci#if CONFIG_VULKAN
448cabdff1aSopenharmony_ci    VkPhysicalDeviceIDProperties vk_idp = {
449cabdff1aSopenharmony_ci        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES,
450cabdff1aSopenharmony_ci    };
451cabdff1aSopenharmony_ci#endif
452cabdff1aSopenharmony_ci
453cabdff1aSopenharmony_ci    switch (src_ctx->type) {
454cabdff1aSopenharmony_ci#if CONFIG_VULKAN
455cabdff1aSopenharmony_ci#define TYPE PFN_vkGetPhysicalDeviceProperties2
456cabdff1aSopenharmony_ci    case AV_HWDEVICE_TYPE_VULKAN: {
457cabdff1aSopenharmony_ci        AVVulkanDeviceContext *vkctx = src_ctx->hwctx;
458cabdff1aSopenharmony_ci        TYPE prop_fn = (TYPE)vkctx->get_proc_addr(vkctx->inst, "vkGetPhysicalDeviceProperties2");
459cabdff1aSopenharmony_ci        VkPhysicalDeviceProperties2 vk_dev_props = {
460cabdff1aSopenharmony_ci            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
461cabdff1aSopenharmony_ci            .pNext = &vk_idp,
462cabdff1aSopenharmony_ci        };
463cabdff1aSopenharmony_ci        prop_fn(vkctx->phys_dev, &vk_dev_props);
464cabdff1aSopenharmony_ci        src_uuid = vk_idp.deviceUUID;
465cabdff1aSopenharmony_ci        break;
466cabdff1aSopenharmony_ci    }
467cabdff1aSopenharmony_ci#undef TYPE
468cabdff1aSopenharmony_ci#endif
469cabdff1aSopenharmony_ci    default:
470cabdff1aSopenharmony_ci        ret = AVERROR(ENOSYS);
471cabdff1aSopenharmony_ci        goto error;
472cabdff1aSopenharmony_ci    }
473cabdff1aSopenharmony_ci
474cabdff1aSopenharmony_ci    if (!src_uuid) {
475cabdff1aSopenharmony_ci        av_log(device_ctx, AV_LOG_ERROR,
476cabdff1aSopenharmony_ci               "Failed to get UUID of source device.\n");
477cabdff1aSopenharmony_ci        ret = AVERROR(EINVAL);
478cabdff1aSopenharmony_ci        goto error;
479cabdff1aSopenharmony_ci    }
480cabdff1aSopenharmony_ci
481cabdff1aSopenharmony_ci    ret = cuda_device_init(device_ctx);
482cabdff1aSopenharmony_ci    if (ret < 0)
483cabdff1aSopenharmony_ci        goto error;
484cabdff1aSopenharmony_ci
485cabdff1aSopenharmony_ci    cu = hwctx->internal->cuda_dl;
486cabdff1aSopenharmony_ci
487cabdff1aSopenharmony_ci    ret = CHECK_CU(cu->cuInit(0));
488cabdff1aSopenharmony_ci    if (ret < 0)
489cabdff1aSopenharmony_ci        goto error;
490cabdff1aSopenharmony_ci
491cabdff1aSopenharmony_ci    ret = CHECK_CU(cu->cuDeviceGetCount(&device_count));
492cabdff1aSopenharmony_ci    if (ret < 0)
493cabdff1aSopenharmony_ci        goto error;
494cabdff1aSopenharmony_ci
495cabdff1aSopenharmony_ci    hwctx->internal->cuda_device = -1;
496cabdff1aSopenharmony_ci    for (i = 0; i < device_count; i++) {
497cabdff1aSopenharmony_ci        CUdevice dev;
498cabdff1aSopenharmony_ci        CUuuid uuid;
499cabdff1aSopenharmony_ci
500cabdff1aSopenharmony_ci        ret = CHECK_CU(cu->cuDeviceGet(&dev, i));
501cabdff1aSopenharmony_ci        if (ret < 0)
502cabdff1aSopenharmony_ci            goto error;
503cabdff1aSopenharmony_ci
504cabdff1aSopenharmony_ci        ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev));
505cabdff1aSopenharmony_ci        if (ret < 0)
506cabdff1aSopenharmony_ci            goto error;
507cabdff1aSopenharmony_ci
508cabdff1aSopenharmony_ci        if (memcmp(src_uuid, uuid.bytes, sizeof (uuid.bytes)) == 0) {
509cabdff1aSopenharmony_ci            hwctx->internal->cuda_device = dev;
510cabdff1aSopenharmony_ci            break;
511cabdff1aSopenharmony_ci        }
512cabdff1aSopenharmony_ci    }
513cabdff1aSopenharmony_ci
514cabdff1aSopenharmony_ci    if (hwctx->internal->cuda_device == -1) {
515cabdff1aSopenharmony_ci        av_log(device_ctx, AV_LOG_ERROR, "Could not derive CUDA device.\n");
516cabdff1aSopenharmony_ci        goto error;
517cabdff1aSopenharmony_ci    }
518cabdff1aSopenharmony_ci
519cabdff1aSopenharmony_ci    ret = cuda_context_init(device_ctx, flags);
520cabdff1aSopenharmony_ci    if (ret < 0)
521cabdff1aSopenharmony_ci        goto error;
522cabdff1aSopenharmony_ci
523cabdff1aSopenharmony_ci    return 0;
524cabdff1aSopenharmony_ci
525cabdff1aSopenharmony_cierror:
526cabdff1aSopenharmony_ci    cuda_device_uninit(device_ctx);
527cabdff1aSopenharmony_ci    return ret;
528cabdff1aSopenharmony_ci}
529cabdff1aSopenharmony_ci
530cabdff1aSopenharmony_ciconst HWContextType ff_hwcontext_type_cuda = {
531cabdff1aSopenharmony_ci    .type                 = AV_HWDEVICE_TYPE_CUDA,
532cabdff1aSopenharmony_ci    .name                 = "CUDA",
533cabdff1aSopenharmony_ci
534cabdff1aSopenharmony_ci    .device_hwctx_size    = sizeof(AVCUDADeviceContext),
535cabdff1aSopenharmony_ci    .frames_priv_size     = sizeof(CUDAFramesContext),
536cabdff1aSopenharmony_ci
537cabdff1aSopenharmony_ci    .device_create        = cuda_device_create,
538cabdff1aSopenharmony_ci    .device_derive        = cuda_device_derive,
539cabdff1aSopenharmony_ci    .device_init          = cuda_device_init,
540cabdff1aSopenharmony_ci    .device_uninit        = cuda_device_uninit,
541cabdff1aSopenharmony_ci    .frames_get_constraints = cuda_frames_get_constraints,
542cabdff1aSopenharmony_ci    .frames_init          = cuda_frames_init,
543cabdff1aSopenharmony_ci    .frames_get_buffer    = cuda_get_buffer,
544cabdff1aSopenharmony_ci    .transfer_get_formats = cuda_transfer_get_formats,
545cabdff1aSopenharmony_ci    .transfer_data_to     = cuda_transfer_data,
546cabdff1aSopenharmony_ci    .transfer_data_from   = cuda_transfer_data,
547cabdff1aSopenharmony_ci
548cabdff1aSopenharmony_ci    .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
549cabdff1aSopenharmony_ci};
550