xref: /third_party/ffmpeg/libavcodec/nvenc.c (revision cabdff1a)
1/*
2 * H.264/HEVC hardware encoding using nvidia nvenc
3 * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "config.h"
23#include "config_components.h"
24
25#include "nvenc.h"
26#include "hevc_sei.h"
27
28#include "libavutil/hwcontext_cuda.h"
29#include "libavutil/hwcontext.h"
30#include "libavutil/cuda_check.h"
31#include "libavutil/imgutils.h"
32#include "libavutil/mem.h"
33#include "libavutil/pixdesc.h"
34#include "atsc_a53.h"
35#include "encode.h"
36#include "internal.h"
37#include "packet_internal.h"
38
39#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
40
41#define NVENC_CAP 0x30
42#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR ||             \
43                    rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
44                    rc == NV_ENC_PARAMS_RC_CBR_HQ)
45
46const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
47    AV_PIX_FMT_YUV420P,
48    AV_PIX_FMT_NV12,
49    AV_PIX_FMT_P010,
50    AV_PIX_FMT_YUV444P,
51    AV_PIX_FMT_P016,      // Truncated to 10bits
52    AV_PIX_FMT_YUV444P16, // Truncated to 10bits
53    AV_PIX_FMT_0RGB32,
54    AV_PIX_FMT_RGB32,
55    AV_PIX_FMT_0BGR32,
56    AV_PIX_FMT_BGR32,
57    AV_PIX_FMT_X2RGB10,
58    AV_PIX_FMT_X2BGR10,
59    AV_PIX_FMT_GBRP,
60    AV_PIX_FMT_GBRP16,    // Truncated to 10bits
61    AV_PIX_FMT_CUDA,
62#if CONFIG_D3D11VA
63    AV_PIX_FMT_D3D11,
64#endif
65    AV_PIX_FMT_NONE
66};
67
68const AVCodecHWConfigInternal *const ff_nvenc_hw_configs[] = {
69    HW_CONFIG_ENCODER_FRAMES(CUDA,  CUDA),
70    HW_CONFIG_ENCODER_DEVICE(NONE,  CUDA),
71#if CONFIG_D3D11VA
72    HW_CONFIG_ENCODER_FRAMES(D3D11, D3D11VA),
73    HW_CONFIG_ENCODER_DEVICE(NONE,  D3D11VA),
74#endif
75    NULL,
76};
77
78#define IS_10BIT(pix_fmt)  (pix_fmt == AV_PIX_FMT_P010      || \
79                            pix_fmt == AV_PIX_FMT_P016      || \
80                            pix_fmt == AV_PIX_FMT_YUV444P16 || \
81                            pix_fmt == AV_PIX_FMT_GBRP16)
82
83#define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P   || \
84                            pix_fmt == AV_PIX_FMT_YUV444P16 || \
85                            pix_fmt == AV_PIX_FMT_GBRP      || \
86                            pix_fmt == AV_PIX_FMT_GBRP16)
87
88#define IS_GBRP(pix_fmt) (pix_fmt == AV_PIX_FMT_GBRP || \
89                          pix_fmt == AV_PIX_FMT_GBRP16)
90
91static const struct {
92    NVENCSTATUS nverr;
93    int         averr;
94    const char *desc;
95} nvenc_errors[] = {
96    { NV_ENC_SUCCESS,                      0,                "success"                  },
97    { NV_ENC_ERR_NO_ENCODE_DEVICE,         AVERROR(ENOENT),  "no encode device"         },
98    { NV_ENC_ERR_UNSUPPORTED_DEVICE,       AVERROR(ENOSYS),  "unsupported device"       },
99    { NV_ENC_ERR_INVALID_ENCODERDEVICE,    AVERROR(EINVAL),  "invalid encoder device"   },
100    { NV_ENC_ERR_INVALID_DEVICE,           AVERROR(EINVAL),  "invalid device"           },
101    { NV_ENC_ERR_DEVICE_NOT_EXIST,         AVERROR(EIO),     "device does not exist"    },
102    { NV_ENC_ERR_INVALID_PTR,              AVERROR(EFAULT),  "invalid ptr"              },
103    { NV_ENC_ERR_INVALID_EVENT,            AVERROR(EINVAL),  "invalid event"            },
104    { NV_ENC_ERR_INVALID_PARAM,            AVERROR(EINVAL),  "invalid param"            },
105    { NV_ENC_ERR_INVALID_CALL,             AVERROR(EINVAL),  "invalid call"             },
106    { NV_ENC_ERR_OUT_OF_MEMORY,            AVERROR(ENOMEM),  "out of memory"            },
107    { NV_ENC_ERR_ENCODER_NOT_INITIALIZED,  AVERROR(EINVAL),  "encoder not initialized"  },
108    { NV_ENC_ERR_UNSUPPORTED_PARAM,        AVERROR(ENOSYS),  "unsupported param"        },
109    { NV_ENC_ERR_LOCK_BUSY,                AVERROR(EAGAIN),  "lock busy"                },
110    { NV_ENC_ERR_NOT_ENOUGH_BUFFER,        AVERROR_BUFFER_TOO_SMALL, "not enough buffer"},
111    { NV_ENC_ERR_INVALID_VERSION,          AVERROR(EINVAL),  "invalid version"          },
112    { NV_ENC_ERR_MAP_FAILED,               AVERROR(EIO),     "map failed"               },
113    { NV_ENC_ERR_NEED_MORE_INPUT,          AVERROR(EAGAIN),  "need more input"          },
114    { NV_ENC_ERR_ENCODER_BUSY,             AVERROR(EAGAIN),  "encoder busy"             },
115    { NV_ENC_ERR_EVENT_NOT_REGISTERD,      AVERROR(EBADF),   "event not registered"     },
116    { NV_ENC_ERR_GENERIC,                  AVERROR_UNKNOWN,  "generic error"            },
117    { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY,  AVERROR(EINVAL),  "incompatible client key"  },
118    { NV_ENC_ERR_UNIMPLEMENTED,            AVERROR(ENOSYS),  "unimplemented"            },
119    { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO),     "resource register failed" },
120    { NV_ENC_ERR_RESOURCE_NOT_REGISTERED,  AVERROR(EBADF),   "resource not registered"  },
121    { NV_ENC_ERR_RESOURCE_NOT_MAPPED,      AVERROR(EBADF),   "resource not mapped"      },
122};
123
124static int nvenc_map_error(NVENCSTATUS err, const char **desc)
125{
126    int i;
127    for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) {
128        if (nvenc_errors[i].nverr == err) {
129            if (desc)
130                *desc = nvenc_errors[i].desc;
131            return nvenc_errors[i].averr;
132        }
133    }
134    if (desc)
135        *desc = "unknown error";
136    return AVERROR_UNKNOWN;
137}
138
139static int nvenc_print_error(AVCodecContext *avctx, NVENCSTATUS err,
140                             const char *error_string)
141{
142    const char *desc;
143    const char *details = "(no details)";
144    int ret = nvenc_map_error(err, &desc);
145
146#ifdef NVENC_HAVE_GETLASTERRORSTRING
147    NvencContext *ctx = avctx->priv_data;
148    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
149
150    if (p_nvenc && ctx->nvencoder)
151        details = p_nvenc->nvEncGetLastErrorString(ctx->nvencoder);
152#endif
153
154    av_log(avctx, AV_LOG_ERROR, "%s: %s (%d): %s\n", error_string, desc, err, details);
155
156    return ret;
157}
158
159typedef struct GUIDTuple {
160    const GUID guid;
161    int flags;
162} GUIDTuple;
163
164#define PRESET_ALIAS(alias, name, ...) \
165    [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
166
167#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
168
169static void nvenc_map_preset(NvencContext *ctx)
170{
171    GUIDTuple presets[] = {
172#ifdef NVENC_HAVE_NEW_PRESETS
173        PRESET(P1),
174        PRESET(P2),
175        PRESET(P3),
176        PRESET(P4),
177        PRESET(P5),
178        PRESET(P6),
179        PRESET(P7),
180        PRESET_ALIAS(SLOW,   P7, NVENC_TWO_PASSES),
181        PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS),
182        PRESET_ALIAS(FAST,   P1, NVENC_ONE_PASS),
183        // Compat aliases
184        PRESET_ALIAS(DEFAULT,             P4, NVENC_DEPRECATED_PRESET),
185        PRESET_ALIAS(HP,                  P1, NVENC_DEPRECATED_PRESET),
186        PRESET_ALIAS(HQ,                  P7, NVENC_DEPRECATED_PRESET),
187        PRESET_ALIAS(BD,                  P5, NVENC_DEPRECATED_PRESET),
188        PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
189        PRESET_ALIAS(LOW_LATENCY_HP,      P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
190        PRESET_ALIAS(LOW_LATENCY_HQ,      P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
191        PRESET_ALIAS(LOSSLESS_DEFAULT,    P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
192        PRESET_ALIAS(LOSSLESS_HP,         P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
193#else
194        PRESET(DEFAULT),
195        PRESET(HP),
196        PRESET(HQ),
197        PRESET(BD),
198        PRESET_ALIAS(SLOW,   HQ,    NVENC_TWO_PASSES),
199        PRESET_ALIAS(MEDIUM, HQ,    NVENC_ONE_PASS),
200        PRESET_ALIAS(FAST,   HP,    NVENC_ONE_PASS),
201        PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
202        PRESET(LOW_LATENCY_HP,      NVENC_LOWLATENCY),
203        PRESET(LOW_LATENCY_HQ,      NVENC_LOWLATENCY),
204        PRESET(LOSSLESS_DEFAULT,    NVENC_LOSSLESS),
205        PRESET(LOSSLESS_HP,         NVENC_LOSSLESS),
206#endif
207    };
208
209    GUIDTuple *t = &presets[ctx->preset];
210
211    ctx->init_encode_params.presetGUID = t->guid;
212    ctx->flags = t->flags;
213
214#ifdef NVENC_HAVE_NEW_PRESETS
215    if (ctx->tuning_info == NV_ENC_TUNING_INFO_LOSSLESS)
216        ctx->flags |= NVENC_LOSSLESS;
217#endif
218}
219
220#undef PRESET
221#undef PRESET_ALIAS
222
223static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
224{
225#if NVENCAPI_CHECK_VERSION(11, 2)
226    const char *minver = "(unknown)";
227#elif NVENCAPI_CHECK_VERSION(11, 1)
228# if defined(_WIN32) || defined(__CYGWIN__)
229    const char *minver = "471.41";
230# else
231    const char *minver = "470.57.02";
232# endif
233#elif NVENCAPI_CHECK_VERSION(11, 0)
234# if defined(_WIN32) || defined(__CYGWIN__)
235    const char *minver = "456.71";
236# else
237    const char *minver = "455.28";
238# endif
239#elif NVENCAPI_CHECK_VERSION(10, 0)
240# if defined(_WIN32) || defined(__CYGWIN__)
241    const char *minver = "450.51";
242# else
243    const char *minver = "445.87";
244# endif
245#elif NVENCAPI_CHECK_VERSION(9, 1)
246# if defined(_WIN32) || defined(__CYGWIN__)
247    const char *minver = "436.15";
248# else
249    const char *minver = "435.21";
250# endif
251#elif NVENCAPI_CHECK_VERSION(9, 0)
252# if defined(_WIN32) || defined(__CYGWIN__)
253    const char *minver = "418.81";
254# else
255    const char *minver = "418.30";
256# endif
257#elif NVENCAPI_CHECK_VERSION(8, 2)
258# if defined(_WIN32) || defined(__CYGWIN__)
259    const char *minver = "397.93";
260# else
261    const char *minver = "396.24";
262#endif
263#elif NVENCAPI_CHECK_VERSION(8, 1)
264# if defined(_WIN32) || defined(__CYGWIN__)
265    const char *minver = "390.77";
266# else
267    const char *minver = "390.25";
268# endif
269#else
270# if defined(_WIN32) || defined(__CYGWIN__)
271    const char *minver = "378.66";
272# else
273    const char *minver = "378.13";
274# endif
275#endif
276    av_log(avctx, level, "The minimum required Nvidia driver for nvenc is %s or newer\n", minver);
277}
278
279static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
280{
281    NvencContext *ctx            = avctx->priv_data;
282    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
283    NVENCSTATUS err;
284    uint32_t nvenc_max_ver;
285    int ret;
286
287    ret = cuda_load_functions(&dl_fn->cuda_dl, avctx);
288    if (ret < 0)
289        return ret;
290
291    ret = nvenc_load_functions(&dl_fn->nvenc_dl, avctx);
292    if (ret < 0) {
293        nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
294        return ret;
295    }
296
297    err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver);
298    if (err != NV_ENC_SUCCESS)
299        return nvenc_print_error(avctx, err, "Failed to query nvenc max version");
300
301    av_log(avctx, AV_LOG_VERBOSE, "Loaded Nvenc version %d.%d\n", nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
302
303    if ((NVENCAPI_MAJOR_VERSION << 4 | NVENCAPI_MINOR_VERSION) > nvenc_max_ver) {
304        av_log(avctx, AV_LOG_ERROR, "Driver does not support the required nvenc API version. "
305               "Required: %d.%d Found: %d.%d\n",
306               NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION,
307               nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
308        nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
309        return AVERROR(ENOSYS);
310    }
311
312    dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
313
314    err = dl_fn->nvenc_dl->NvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
315    if (err != NV_ENC_SUCCESS)
316        return nvenc_print_error(avctx, err, "Failed to create nvenc instance");
317
318    av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
319
320    return 0;
321}
322
323static int nvenc_push_context(AVCodecContext *avctx)
324{
325    NvencContext *ctx            = avctx->priv_data;
326    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
327
328    if (ctx->d3d11_device)
329        return 0;
330
331    return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context));
332}
333
334static int nvenc_pop_context(AVCodecContext *avctx)
335{
336    NvencContext *ctx            = avctx->priv_data;
337    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
338    CUcontext dummy;
339
340    if (ctx->d3d11_device)
341        return 0;
342
343    return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy));
344}
345
346static av_cold int nvenc_open_session(AVCodecContext *avctx)
347{
348    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
349    NvencContext *ctx = avctx->priv_data;
350    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
351    NVENCSTATUS ret;
352
353    params.version    = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
354    params.apiVersion = NVENCAPI_VERSION;
355    if (ctx->d3d11_device) {
356        params.device     = ctx->d3d11_device;
357        params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
358    } else {
359        params.device     = ctx->cu_context;
360        params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
361    }
362
363    ret = p_nvenc->nvEncOpenEncodeSessionEx(&params, &ctx->nvencoder);
364    if (ret != NV_ENC_SUCCESS) {
365        ctx->nvencoder = NULL;
366        return nvenc_print_error(avctx, ret, "OpenEncodeSessionEx failed");
367    }
368
369    return 0;
370}
371
372static int nvenc_check_codec_support(AVCodecContext *avctx)
373{
374    NvencContext *ctx                    = avctx->priv_data;
375    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
376    int i, ret, count = 0;
377    GUID *guids = NULL;
378
379    ret = p_nvenc->nvEncGetEncodeGUIDCount(ctx->nvencoder, &count);
380
381    if (ret != NV_ENC_SUCCESS || !count)
382        return AVERROR(ENOSYS);
383
384    guids = av_malloc(count * sizeof(GUID));
385    if (!guids)
386        return AVERROR(ENOMEM);
387
388    ret = p_nvenc->nvEncGetEncodeGUIDs(ctx->nvencoder, guids, count, &count);
389    if (ret != NV_ENC_SUCCESS) {
390        ret = AVERROR(ENOSYS);
391        goto fail;
392    }
393
394    ret = AVERROR(ENOSYS);
395    for (i = 0; i < count; i++) {
396        if (!memcmp(&guids[i], &ctx->init_encode_params.encodeGUID, sizeof(*guids))) {
397            ret = 0;
398            break;
399        }
400    }
401
402fail:
403    av_free(guids);
404
405    return ret;
406}
407
408static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
409{
410    NvencContext *ctx = avctx->priv_data;
411    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
412    NV_ENC_CAPS_PARAM params        = { 0 };
413    int ret, val = 0;
414
415    params.version     = NV_ENC_CAPS_PARAM_VER;
416    params.capsToQuery = cap;
417
418    ret = p_nvenc->nvEncGetEncodeCaps(ctx->nvencoder, ctx->init_encode_params.encodeGUID, &params, &val);
419
420    if (ret == NV_ENC_SUCCESS)
421        return val;
422    return 0;
423}
424
425static int nvenc_check_capabilities(AVCodecContext *avctx)
426{
427    NvencContext *ctx = avctx->priv_data;
428    int ret;
429
430    ret = nvenc_check_codec_support(avctx);
431    if (ret < 0) {
432        av_log(avctx, AV_LOG_WARNING, "Codec not supported\n");
433        return ret;
434    }
435
436    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
437    if (IS_YUV444(ctx->data_pix_fmt) && ret <= 0) {
438        av_log(avctx, AV_LOG_WARNING, "YUV444P not supported\n");
439        return AVERROR(ENOSYS);
440    }
441
442    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
443    if (ctx->flags & NVENC_LOSSLESS && ret <= 0) {
444        av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n");
445        return AVERROR(ENOSYS);
446    }
447
448    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX);
449    if (ret < avctx->width) {
450        av_log(avctx, AV_LOG_WARNING, "Width %d exceeds %d\n",
451               avctx->width, ret);
452        return AVERROR(ENOSYS);
453    }
454
455    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX);
456    if (ret < avctx->height) {
457        av_log(avctx, AV_LOG_WARNING, "Height %d exceeds %d\n",
458               avctx->height, ret);
459        return AVERROR(ENOSYS);
460    }
461
462    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES);
463    if (ret < avctx->max_b_frames) {
464        av_log(avctx, AV_LOG_WARNING, "Max B-frames %d exceed %d\n",
465               avctx->max_b_frames, ret);
466
467        return AVERROR(ENOSYS);
468    }
469
470    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_FIELD_ENCODING);
471    if (ret < 1 && avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
472        av_log(avctx, AV_LOG_WARNING,
473               "Interlaced encoding is not supported. Supported level: %d\n",
474               ret);
475        return AVERROR(ENOSYS);
476    }
477
478    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
479    if (IS_10BIT(ctx->data_pix_fmt) && ret <= 0) {
480        av_log(avctx, AV_LOG_WARNING, "10 bit encode not supported\n");
481        return AVERROR(ENOSYS);
482    }
483
484    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD);
485    if (ctx->rc_lookahead > 0 && ret <= 0) {
486        av_log(avctx, AV_LOG_WARNING, "RC lookahead not supported\n");
487        return AVERROR(ENOSYS);
488    }
489
490    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ);
491    if (ctx->temporal_aq > 0 && ret <= 0) {
492        av_log(avctx, AV_LOG_WARNING, "Temporal AQ not supported\n");
493        return AVERROR(ENOSYS);
494    }
495
496    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION);
497    if (ctx->weighted_pred > 0 && ret <= 0) {
498        av_log (avctx, AV_LOG_WARNING, "Weighted Prediction not supported\n");
499        return AVERROR(ENOSYS);
500    }
501
502    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_CABAC);
503    if (ctx->coder == NV_ENC_H264_ENTROPY_CODING_MODE_CABAC && ret <= 0) {
504        av_log(avctx, AV_LOG_WARNING, "CABAC entropy coding not supported\n");
505        return AVERROR(ENOSYS);
506    }
507
508#ifdef NVENC_HAVE_BFRAME_REF_MODE
509    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE);
510    if (ctx->b_ref_mode == NV_ENC_BFRAME_REF_MODE_EACH && ret != 1 && ret != 3) {
511        av_log(avctx, AV_LOG_WARNING, "Each B frame as reference is not supported\n");
512        return AVERROR(ENOSYS);
513    } else if (ctx->b_ref_mode != NV_ENC_BFRAME_REF_MODE_DISABLED && ret == 0) {
514        av_log(avctx, AV_LOG_WARNING, "B frames as references are not supported\n");
515        return AVERROR(ENOSYS);
516    }
517#else
518    if (ctx->b_ref_mode != 0) {
519        av_log(avctx, AV_LOG_WARNING, "B frames as references need SDK 8.1 at build time\n");
520        return AVERROR(ENOSYS);
521    }
522#endif
523
524#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
525    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES);
526    if(avctx->refs != NV_ENC_NUM_REF_FRAMES_AUTOSELECT && ret <= 0) {
527        av_log(avctx, AV_LOG_WARNING, "Multiple reference frames are not supported by the device\n");
528        return AVERROR(ENOSYS);
529    }
530#else
531    if(avctx->refs != 0) {
532        av_log(avctx, AV_LOG_WARNING, "Multiple reference frames need SDK 9.1 at build time\n");
533        return AVERROR(ENOSYS);
534    }
535#endif
536
537#ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH
538    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SINGLE_SLICE_INTRA_REFRESH);
539    if(ctx->single_slice_intra_refresh && ret <= 0) {
540        av_log(avctx, AV_LOG_WARNING, "Single slice intra refresh not supported by the device\n");
541        return AVERROR(ENOSYS);
542    }
543#else
544    if(ctx->single_slice_intra_refresh) {
545        av_log(avctx, AV_LOG_WARNING, "Single slice intra refresh needs SDK 11.1 at build time\n");
546        return AVERROR(ENOSYS);
547    }
548#endif
549
550    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_INTRA_REFRESH);
551    if((ctx->intra_refresh || ctx->single_slice_intra_refresh) && ret <= 0) {
552        av_log(avctx, AV_LOG_WARNING, "Intra refresh not supported by the device\n");
553        return AVERROR(ENOSYS);
554    }
555
556#ifndef NVENC_HAVE_HEVC_CONSTRAINED_ENCODING
557    if (ctx->constrained_encoding && avctx->codec->id == AV_CODEC_ID_HEVC) {
558        av_log(avctx, AV_LOG_WARNING, "HEVC constrained encoding needs SDK 10.0 at build time\n");
559        return AVERROR(ENOSYS);
560    }
561#endif
562
563    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING);
564    if(ctx->constrained_encoding && ret <= 0) {
565        av_log(avctx, AV_LOG_WARNING, "Constrained encoding not supported by the device\n");
566        return AVERROR(ENOSYS);
567    }
568
569    ctx->support_dyn_bitrate = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
570
571    return 0;
572}
573
574static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
575{
576    NvencContext *ctx = avctx->priv_data;
577    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
578    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
579    char name[128] = { 0};
580    int major, minor, ret;
581    CUdevice cu_device;
582    int loglevel = AV_LOG_VERBOSE;
583
584    if (ctx->device == LIST_DEVICES)
585        loglevel = AV_LOG_INFO;
586
587    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx));
588    if (ret < 0)
589        return ret;
590
591    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device));
592    if (ret < 0)
593        return ret;
594
595    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device));
596    if (ret < 0)
597        return ret;
598
599    av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
600    if (((major << 4) | minor) < NVENC_CAP) {
601        av_log(avctx, loglevel, "does not support NVENC\n");
602        goto fail;
603    }
604
605    if (ctx->device != idx && ctx->device != ANY_DEVICE)
606        return -1;
607
608    ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device));
609    if (ret < 0)
610        goto fail;
611
612    ctx->cu_context = ctx->cu_context_internal;
613    ctx->cu_stream = NULL;
614
615    if ((ret = nvenc_pop_context(avctx)) < 0)
616        goto fail2;
617
618    if ((ret = nvenc_open_session(avctx)) < 0)
619        goto fail2;
620
621    if ((ret = nvenc_check_capabilities(avctx)) < 0)
622        goto fail3;
623
624    av_log(avctx, loglevel, "supports NVENC\n");
625
626    dl_fn->nvenc_device_count++;
627
628    if (ctx->device == idx || ctx->device == ANY_DEVICE)
629        return 0;
630
631fail3:
632    if ((ret = nvenc_push_context(avctx)) < 0)
633        return ret;
634
635    p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
636    ctx->nvencoder = NULL;
637
638    if ((ret = nvenc_pop_context(avctx)) < 0)
639        return ret;
640
641fail2:
642    CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
643    ctx->cu_context_internal = NULL;
644
645fail:
646    return AVERROR(ENOSYS);
647}
648
649static av_cold int nvenc_setup_device(AVCodecContext *avctx)
650{
651    NvencContext *ctx            = avctx->priv_data;
652    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
653
654    switch (avctx->codec->id) {
655    case AV_CODEC_ID_H264:
656        ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
657        break;
658    case AV_CODEC_ID_HEVC:
659        ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
660        break;
661    default:
662        return AVERROR_BUG;
663    }
664
665    nvenc_map_preset(ctx);
666
667    if (ctx->flags & NVENC_DEPRECATED_PRESET)
668        av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n");
669
670    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
671        AVHWFramesContext   *frames_ctx;
672        AVHWDeviceContext   *hwdev_ctx;
673        AVCUDADeviceContext *cuda_device_hwctx = NULL;
674#if CONFIG_D3D11VA
675        AVD3D11VADeviceContext *d3d11_device_hwctx = NULL;
676#endif
677        int ret;
678
679        if (avctx->hw_frames_ctx) {
680            frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
681            if (frames_ctx->format == AV_PIX_FMT_CUDA)
682                cuda_device_hwctx = frames_ctx->device_ctx->hwctx;
683#if CONFIG_D3D11VA
684            else if (frames_ctx->format == AV_PIX_FMT_D3D11)
685                d3d11_device_hwctx = frames_ctx->device_ctx->hwctx;
686#endif
687            else
688                return AVERROR(EINVAL);
689        } else if (avctx->hw_device_ctx) {
690            hwdev_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
691            if (hwdev_ctx->type == AV_HWDEVICE_TYPE_CUDA)
692                cuda_device_hwctx = hwdev_ctx->hwctx;
693#if CONFIG_D3D11VA
694            else if (hwdev_ctx->type == AV_HWDEVICE_TYPE_D3D11VA)
695                d3d11_device_hwctx = hwdev_ctx->hwctx;
696#endif
697            else
698                return AVERROR(EINVAL);
699        } else {
700            return AVERROR(EINVAL);
701        }
702
703        if (cuda_device_hwctx) {
704            ctx->cu_context = cuda_device_hwctx->cuda_ctx;
705            ctx->cu_stream = cuda_device_hwctx->stream;
706        }
707#if CONFIG_D3D11VA
708        else if (d3d11_device_hwctx) {
709            ctx->d3d11_device = d3d11_device_hwctx->device;
710            ID3D11Device_AddRef(ctx->d3d11_device);
711        }
712#endif
713
714        ret = nvenc_open_session(avctx);
715        if (ret < 0)
716            return ret;
717
718        ret = nvenc_check_capabilities(avctx);
719        if (ret < 0) {
720            av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n");
721            return ret;
722        }
723    } else {
724        int i, nb_devices = 0;
725
726        if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0)
727            return AVERROR_UNKNOWN;
728
729        if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0)
730            return AVERROR_UNKNOWN;
731
732        if (!nb_devices) {
733            av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
734                return AVERROR_EXTERNAL;
735        }
736
737        av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", nb_devices);
738
739        dl_fn->nvenc_device_count = 0;
740        for (i = 0; i < nb_devices; ++i) {
741            if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES)
742                return 0;
743        }
744
745        if (ctx->device == LIST_DEVICES)
746            return AVERROR_EXIT;
747
748        if (!dl_fn->nvenc_device_count) {
749            av_log(avctx, AV_LOG_FATAL, "No capable devices found\n");
750            return AVERROR_EXTERNAL;
751        }
752
753        av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->device, nb_devices);
754        return AVERROR(EINVAL);
755    }
756
757    return 0;
758}
759
760static av_cold void set_constqp(AVCodecContext *avctx)
761{
762    NvencContext *ctx = avctx->priv_data;
763    NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
764
765    rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
766
767    if (ctx->init_qp_p >= 0) {
768        rc->constQP.qpInterP = ctx->init_qp_p;
769        if (ctx->init_qp_i >= 0 && ctx->init_qp_b >= 0) {
770            rc->constQP.qpIntra = ctx->init_qp_i;
771            rc->constQP.qpInterB = ctx->init_qp_b;
772        } else if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
773            rc->constQP.qpIntra = av_clip(
774                rc->constQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
775            rc->constQP.qpInterB = av_clip(
776                rc->constQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
777        } else {
778            rc->constQP.qpIntra = rc->constQP.qpInterP;
779            rc->constQP.qpInterB = rc->constQP.qpInterP;
780        }
781    } else if (ctx->cqp >= 0) {
782        rc->constQP.qpInterP = rc->constQP.qpInterB = rc->constQP.qpIntra = ctx->cqp;
783        if (avctx->b_quant_factor != 0.0)
784            rc->constQP.qpInterB = av_clip(ctx->cqp * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
785        if (avctx->i_quant_factor != 0.0)
786            rc->constQP.qpIntra = av_clip(ctx->cqp * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
787    }
788
789    avctx->qmin = -1;
790    avctx->qmax = -1;
791}
792
793static av_cold void set_vbr(AVCodecContext *avctx)
794{
795    NvencContext *ctx = avctx->priv_data;
796    NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
797    int qp_inter_p;
798
799    if (avctx->qmin >= 0 && avctx->qmax >= 0) {
800        rc->enableMinQP = 1;
801        rc->enableMaxQP = 1;
802
803        rc->minQP.qpInterB = avctx->qmin;
804        rc->minQP.qpInterP = avctx->qmin;
805        rc->minQP.qpIntra  = avctx->qmin;
806
807        rc->maxQP.qpInterB = avctx->qmax;
808        rc->maxQP.qpInterP = avctx->qmax;
809        rc->maxQP.qpIntra = avctx->qmax;
810
811        qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
812    } else if (avctx->qmin >= 0) {
813        rc->enableMinQP = 1;
814
815        rc->minQP.qpInterB = avctx->qmin;
816        rc->minQP.qpInterP = avctx->qmin;
817        rc->minQP.qpIntra = avctx->qmin;
818
819        qp_inter_p = avctx->qmin;
820    } else {
821        qp_inter_p = 26; // default to 26
822    }
823
824    rc->enableInitialRCQP = 1;
825
826    if (ctx->init_qp_p < 0) {
827        rc->initialRCQP.qpInterP  = qp_inter_p;
828    } else {
829        rc->initialRCQP.qpInterP = ctx->init_qp_p;
830    }
831
832    if (ctx->init_qp_i < 0) {
833        if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
834            rc->initialRCQP.qpIntra = av_clip(
835                rc->initialRCQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
836        } else {
837            rc->initialRCQP.qpIntra = rc->initialRCQP.qpInterP;
838        }
839    } else {
840        rc->initialRCQP.qpIntra = ctx->init_qp_i;
841    }
842
843    if (ctx->init_qp_b < 0) {
844        if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
845            rc->initialRCQP.qpInterB = av_clip(
846                rc->initialRCQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
847        } else {
848            rc->initialRCQP.qpInterB = rc->initialRCQP.qpInterP;
849        }
850    } else {
851        rc->initialRCQP.qpInterB = ctx->init_qp_b;
852    }
853}
854
855static av_cold void set_lossless(AVCodecContext *avctx)
856{
857    NvencContext *ctx = avctx->priv_data;
858    NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
859
860    rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
861    rc->constQP.qpInterB = 0;
862    rc->constQP.qpInterP = 0;
863    rc->constQP.qpIntra  = 0;
864
865    avctx->qmin = -1;
866    avctx->qmax = -1;
867}
868
869static void nvenc_override_rate_control(AVCodecContext *avctx)
870{
871    NvencContext *ctx    = avctx->priv_data;
872    NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
873
874    switch (ctx->rc) {
875    case NV_ENC_PARAMS_RC_CONSTQP:
876        set_constqp(avctx);
877        return;
878    case NV_ENC_PARAMS_RC_VBR_MINQP:
879        if (avctx->qmin < 0) {
880            av_log(avctx, AV_LOG_WARNING,
881                   "The variable bitrate rate-control requires "
882                   "the 'qmin' option set.\n");
883            set_vbr(avctx);
884            return;
885        }
886        /* fall through */
887    case NV_ENC_PARAMS_RC_VBR_HQ:
888    case NV_ENC_PARAMS_RC_VBR:
889        set_vbr(avctx);
890        break;
891    case NV_ENC_PARAMS_RC_CBR:
892    case NV_ENC_PARAMS_RC_CBR_HQ:
893    case NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ:
894        break;
895    }
896
897    rc->rateControlMode = ctx->rc;
898}
899
900static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
901{
902    NvencContext *ctx = avctx->priv_data;
903    // default minimum of 4 surfaces
904    // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
905    // another multiply by 2 to avoid blocking next PBB group
906    int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
907
908    // lookahead enabled
909    if (ctx->rc_lookahead > 0) {
910        // +1 is to account for lkd_bound calculation later
911        // +4 is to allow sufficient pipelining with lookahead
912        nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
913        if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
914        {
915            av_log(avctx, AV_LOG_WARNING,
916                   "Defined rc_lookahead requires more surfaces, "
917                   "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
918        }
919        ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
920    } else {
921        if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
922        {
923            av_log(avctx, AV_LOG_WARNING,
924                   "Defined b-frame requires more surfaces, "
925                   "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
926            ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
927        }
928        else if (ctx->nb_surfaces <= 0)
929            ctx->nb_surfaces = nb_surfaces;
930        // otherwise use user specified value
931    }
932
933    ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
934    ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
935
936    return 0;
937}
938
939static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx)
940{
941    NvencContext *ctx = avctx->priv_data;
942
943    if (avctx->global_quality > 0)
944        av_log(avctx, AV_LOG_WARNING, "Using global_quality with nvenc is deprecated. Use qp instead.\n");
945
946    if (ctx->cqp < 0 && avctx->global_quality > 0)
947        ctx->cqp = avctx->global_quality;
948
949    if (avctx->bit_rate > 0) {
950        ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
951    } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
952        ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
953    }
954
955    if (avctx->rc_max_rate > 0)
956        ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
957
958#ifdef NVENC_HAVE_MULTIPASS
959    ctx->encode_config.rcParams.multiPass = ctx->multipass;
960
961    if (ctx->flags & NVENC_ONE_PASS)
962        ctx->encode_config.rcParams.multiPass = NV_ENC_MULTI_PASS_DISABLED;
963    if (ctx->flags & NVENC_TWO_PASSES || ctx->twopass > 0)
964        ctx->encode_config.rcParams.multiPass = NV_ENC_TWO_PASS_FULL_RESOLUTION;
965
966    if (ctx->rc < 0) {
967        if (ctx->cbr) {
968            ctx->rc = NV_ENC_PARAMS_RC_CBR;
969        } else if (ctx->cqp >= 0) {
970            ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
971        } else if (ctx->quality >= 0.0f) {
972            ctx->rc = NV_ENC_PARAMS_RC_VBR;
973        }
974    }
975#else
976    if (ctx->rc < 0) {
977        if (ctx->flags & NVENC_ONE_PASS)
978            ctx->twopass = 0;
979        if (ctx->flags & NVENC_TWO_PASSES)
980            ctx->twopass = 1;
981
982        if (ctx->twopass < 0)
983            ctx->twopass = (ctx->flags & NVENC_LOWLATENCY) != 0;
984
985        if (ctx->cbr) {
986            if (ctx->twopass) {
987                ctx->rc = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ;
988            } else {
989                ctx->rc = NV_ENC_PARAMS_RC_CBR;
990            }
991        } else if (ctx->cqp >= 0) {
992            ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
993        } else if (ctx->twopass) {
994            ctx->rc = NV_ENC_PARAMS_RC_VBR_HQ;
995        } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
996            ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP;
997        }
998    }
999#endif
1000
1001    if (ctx->rc >= 0 && ctx->rc & RC_MODE_DEPRECATED) {
1002        av_log(avctx, AV_LOG_WARNING, "Specified rc mode is deprecated.\n");
1003        av_log(avctx, AV_LOG_WARNING, "Use -rc constqp/cbr/vbr, -tune and -multipass instead.\n");
1004
1005        ctx->rc &= ~RC_MODE_DEPRECATED;
1006    }
1007
1008#ifdef NVENC_HAVE_QP_CHROMA_OFFSETS
1009    ctx->encode_config.rcParams.cbQPIndexOffset = ctx->qp_cb_offset;
1010    ctx->encode_config.rcParams.crQPIndexOffset = ctx->qp_cr_offset;
1011#else
1012    if (ctx->qp_cb_offset || ctx->qp_cr_offset)
1013        av_log(avctx, AV_LOG_WARNING, "Failed setting QP CB/CR offsets, SDK 11.1 or greater required at compile time.\n");
1014#endif
1015
1016#ifdef NVENC_HAVE_LDKFS
1017    if (ctx->ldkfs)
1018         ctx->encode_config.rcParams.lowDelayKeyFrameScale = ctx->ldkfs;
1019#endif
1020
1021    if (ctx->flags & NVENC_LOSSLESS) {
1022        set_lossless(avctx);
1023    } else if (ctx->rc >= 0) {
1024        nvenc_override_rate_control(avctx);
1025    } else {
1026        ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
1027        set_vbr(avctx);
1028    }
1029
1030    if (avctx->rc_buffer_size > 0) {
1031        ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
1032    } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
1033        avctx->rc_buffer_size = ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
1034    }
1035
1036    if (ctx->aq) {
1037        ctx->encode_config.rcParams.enableAQ   = 1;
1038        ctx->encode_config.rcParams.aqStrength = ctx->aq_strength;
1039        av_log(avctx, AV_LOG_VERBOSE, "AQ enabled.\n");
1040    }
1041
1042    if (ctx->temporal_aq) {
1043        ctx->encode_config.rcParams.enableTemporalAQ = 1;
1044        av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ enabled.\n");
1045    }
1046
1047    if (ctx->rc_lookahead > 0) {
1048        int lkd_bound = FFMIN(ctx->nb_surfaces, ctx->async_depth) -
1049                        ctx->encode_config.frameIntervalP - 4;
1050
1051        if (lkd_bound < 0) {
1052            av_log(avctx, AV_LOG_WARNING,
1053                   "Lookahead not enabled. Increase buffer delay (-delay).\n");
1054        } else {
1055            ctx->encode_config.rcParams.enableLookahead = 1;
1056            ctx->encode_config.rcParams.lookaheadDepth  = av_clip(ctx->rc_lookahead, 0, lkd_bound);
1057            ctx->encode_config.rcParams.disableIadapt   = ctx->no_scenecut;
1058            ctx->encode_config.rcParams.disableBadapt   = !ctx->b_adapt;
1059            av_log(avctx, AV_LOG_VERBOSE,
1060                   "Lookahead enabled: depth %d, scenecut %s, B-adapt %s.\n",
1061                   ctx->encode_config.rcParams.lookaheadDepth,
1062                   ctx->encode_config.rcParams.disableIadapt ? "disabled" : "enabled",
1063                   ctx->encode_config.rcParams.disableBadapt ? "disabled" : "enabled");
1064        }
1065    }
1066
1067    if (ctx->strict_gop) {
1068        ctx->encode_config.rcParams.strictGOPTarget = 1;
1069        av_log(avctx, AV_LOG_VERBOSE, "Strict GOP target enabled.\n");
1070    }
1071
1072    if (ctx->nonref_p)
1073        ctx->encode_config.rcParams.enableNonRefP = 1;
1074
1075    if (ctx->zerolatency)
1076        ctx->encode_config.rcParams.zeroReorderDelay = 1;
1077
1078    if (ctx->quality) {
1079        //convert from float to fixed point 8.8
1080        int tmp_quality = (int)(ctx->quality * 256.0f);
1081        ctx->encode_config.rcParams.targetQuality = (uint8_t)(tmp_quality >> 8);
1082        ctx->encode_config.rcParams.targetQualityLSB = (uint8_t)(tmp_quality & 0xff);
1083
1084        av_log(avctx, AV_LOG_VERBOSE, "CQ(%d) mode enabled.\n", tmp_quality);
1085
1086        // CQ mode shall discard avg bitrate/vbv buffer size and honor only max bitrate
1087        ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate = 0;
1088        ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size = 0;
1089        ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
1090    }
1091}
1092
1093static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
1094{
1095    NvencContext *ctx                      = avctx->priv_data;
1096    NV_ENC_CONFIG *cc                      = &ctx->encode_config;
1097    NV_ENC_CONFIG_H264 *h264               = &cc->encodeCodecConfig.h264Config;
1098    NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
1099
1100    vui->colourMatrix = IS_GBRP(ctx->data_pix_fmt) ? AVCOL_SPC_RGB : avctx->colorspace;
1101    vui->colourPrimaries = avctx->color_primaries;
1102    vui->transferCharacteristics = avctx->color_trc;
1103    vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
1104        || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
1105
1106    vui->colourDescriptionPresentFlag =
1107        (vui->colourMatrix != 2 || vui->colourPrimaries != 2 || vui->transferCharacteristics != 2);
1108
1109    vui->videoSignalTypePresentFlag =
1110        (vui->colourDescriptionPresentFlag
1111        || vui->videoFormat != 5
1112        || vui->videoFullRangeFlag != 0);
1113
1114    h264->sliceMode = 3;
1115    h264->sliceModeData = avctx->slices > 0 ? avctx->slices : 1;
1116
1117    if (ctx->intra_refresh) {
1118        h264->enableIntraRefresh = 1;
1119        h264->intraRefreshPeriod = avctx->gop_size;
1120        h264->intraRefreshCnt = avctx->gop_size - 1;
1121#ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH
1122        h264->singleSliceIntraRefresh = ctx->single_slice_intra_refresh;
1123#endif
1124    }
1125
1126    if (ctx->constrained_encoding)
1127        h264->enableConstrainedEncoding = 1;
1128
1129    h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
1130    h264->repeatSPSPPS  = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
1131    h264->outputAUD     = ctx->aud;
1132
1133    if (ctx->dpb_size >= 0) {
1134        /* 0 means "let the hardware decide" */
1135        h264->maxNumRefFrames = ctx->dpb_size;
1136    }
1137
1138    if (ctx->intra_refresh) {
1139        h264->idrPeriod = NVENC_INFINITE_GOPLENGTH;
1140    } else if (avctx->gop_size >= 0) {
1141        h264->idrPeriod = avctx->gop_size;
1142    }
1143
1144    if (IS_CBR(cc->rcParams.rateControlMode)) {
1145        h264->outputBufferingPeriodSEI = 1;
1146    }
1147
1148    h264->outputPictureTimingSEI = 1;
1149
1150    if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ||
1151        cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_HQ ||
1152        cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_VBR_HQ) {
1153        h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
1154        h264->fmoMode = NV_ENC_H264_FMO_DISABLE;
1155    }
1156
1157    if (ctx->flags & NVENC_LOSSLESS) {
1158        h264->qpPrimeYZeroTransformBypassFlag = 1;
1159    } else {
1160        switch(ctx->profile) {
1161        case NV_ENC_H264_PROFILE_BASELINE:
1162            cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
1163            avctx->profile = FF_PROFILE_H264_BASELINE;
1164            break;
1165        case NV_ENC_H264_PROFILE_MAIN:
1166            cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
1167            avctx->profile = FF_PROFILE_H264_MAIN;
1168            break;
1169        case NV_ENC_H264_PROFILE_HIGH:
1170            cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
1171            avctx->profile = FF_PROFILE_H264_HIGH;
1172            break;
1173        case NV_ENC_H264_PROFILE_HIGH_444P:
1174            cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
1175            avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
1176            break;
1177        }
1178    }
1179
1180    // force setting profile as high444p if input is AV_PIX_FMT_YUV444P
1181    if (IS_YUV444(ctx->data_pix_fmt)) {
1182        cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
1183        avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
1184    }
1185
1186    h264->chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
1187
1188    h264->level = ctx->level;
1189
1190    if (ctx->coder >= 0)
1191        h264->entropyCodingMode = ctx->coder;
1192
1193#ifdef NVENC_HAVE_BFRAME_REF_MODE
1194    h264->useBFramesAsRef = ctx->b_ref_mode;
1195#endif
1196
1197#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
1198    h264->numRefL0 = avctx->refs;
1199    h264->numRefL1 = avctx->refs;
1200#endif
1201
1202    return 0;
1203}
1204
1205static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
1206{
1207    NvencContext *ctx                      = avctx->priv_data;
1208    NV_ENC_CONFIG *cc                      = &ctx->encode_config;
1209    NV_ENC_CONFIG_HEVC *hevc               = &cc->encodeCodecConfig.hevcConfig;
1210    NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters;
1211
1212    vui->colourMatrix = IS_GBRP(ctx->data_pix_fmt) ? AVCOL_SPC_RGB : avctx->colorspace;
1213    vui->colourPrimaries = avctx->color_primaries;
1214    vui->transferCharacteristics = avctx->color_trc;
1215    vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
1216        || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
1217
1218    vui->colourDescriptionPresentFlag =
1219        (vui->colourMatrix != 2 || vui->colourPrimaries != 2 || vui->transferCharacteristics != 2);
1220
1221    vui->videoSignalTypePresentFlag =
1222        (vui->colourDescriptionPresentFlag
1223        || vui->videoFormat != 5
1224        || vui->videoFullRangeFlag != 0);
1225
1226    hevc->sliceMode = 3;
1227    hevc->sliceModeData = avctx->slices > 0 ? avctx->slices : 1;
1228
1229    if (ctx->intra_refresh) {
1230        hevc->enableIntraRefresh = 1;
1231        hevc->intraRefreshPeriod = avctx->gop_size;
1232        hevc->intraRefreshCnt = avctx->gop_size - 1;
1233#ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH
1234        hevc->singleSliceIntraRefresh = ctx->single_slice_intra_refresh;
1235#endif
1236    }
1237
1238#ifdef NVENC_HAVE_HEVC_CONSTRAINED_ENCODING
1239    if (ctx->constrained_encoding)
1240        hevc->enableConstrainedEncoding = 1;
1241#endif
1242
1243    hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
1244    hevc->repeatSPSPPS  = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
1245    hevc->outputAUD     = ctx->aud;
1246
1247    if (ctx->dpb_size >= 0) {
1248        /* 0 means "let the hardware decide" */
1249        hevc->maxNumRefFramesInDPB = ctx->dpb_size;
1250    }
1251
1252    if (ctx->intra_refresh) {
1253        hevc->idrPeriod = NVENC_INFINITE_GOPLENGTH;
1254    } else if (avctx->gop_size >= 0) {
1255        hevc->idrPeriod = avctx->gop_size;
1256    }
1257
1258    if (IS_CBR(cc->rcParams.rateControlMode)) {
1259        hevc->outputBufferingPeriodSEI = 1;
1260    }
1261
1262    hevc->outputPictureTimingSEI = 1;
1263
1264    switch (ctx->profile) {
1265    case NV_ENC_HEVC_PROFILE_MAIN:
1266        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
1267        avctx->profile  = FF_PROFILE_HEVC_MAIN;
1268        break;
1269    case NV_ENC_HEVC_PROFILE_MAIN_10:
1270        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
1271        avctx->profile  = FF_PROFILE_HEVC_MAIN_10;
1272        break;
1273    case NV_ENC_HEVC_PROFILE_REXT:
1274        cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
1275        avctx->profile  = FF_PROFILE_HEVC_REXT;
1276        break;
1277    }
1278
1279    // force setting profile as main10 if input is 10 bit
1280    if (IS_10BIT(ctx->data_pix_fmt)) {
1281        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
1282        avctx->profile = FF_PROFILE_HEVC_MAIN_10;
1283    }
1284
1285    // force setting profile as rext if input is yuv444
1286    if (IS_YUV444(ctx->data_pix_fmt)) {
1287        cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
1288        avctx->profile = FF_PROFILE_HEVC_REXT;
1289    }
1290
1291    hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
1292
1293    hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0;
1294
1295    hevc->level = ctx->level;
1296
1297    hevc->tier = ctx->tier;
1298
1299#ifdef NVENC_HAVE_HEVC_BFRAME_REF_MODE
1300    hevc->useBFramesAsRef = ctx->b_ref_mode;
1301#endif
1302
1303#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
1304    hevc->numRefL0 = avctx->refs;
1305    hevc->numRefL1 = avctx->refs;
1306#endif
1307
1308    return 0;
1309}
1310
1311static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx)
1312{
1313    switch (avctx->codec->id) {
1314    case AV_CODEC_ID_H264:
1315        return nvenc_setup_h264_config(avctx);
1316    case AV_CODEC_ID_HEVC:
1317        return nvenc_setup_hevc_config(avctx);
1318    /* Earlier switch/case will return if unknown codec is passed. */
1319    }
1320
1321    return 0;
1322}
1323
1324static void compute_dar(AVCodecContext *avctx, int *dw, int *dh) {
1325    int sw, sh;
1326
1327    sw = avctx->width;
1328    sh = avctx->height;
1329
1330    if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) {
1331        sw *= avctx->sample_aspect_ratio.num;
1332        sh *= avctx->sample_aspect_ratio.den;
1333    }
1334
1335    av_reduce(dw, dh, sw, sh, 1024 * 1024);
1336}
1337
1338static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
1339{
1340    NvencContext *ctx = avctx->priv_data;
1341    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1342    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1343
1344    NV_ENC_PRESET_CONFIG preset_config = { 0 };
1345    NVENCSTATUS nv_status = NV_ENC_SUCCESS;
1346    AVCPBProperties *cpb_props;
1347    int res = 0;
1348    int dw, dh;
1349
1350    ctx->encode_config.version = NV_ENC_CONFIG_VER;
1351    ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
1352
1353    ctx->init_encode_params.encodeHeight = avctx->height;
1354    ctx->init_encode_params.encodeWidth = avctx->width;
1355
1356    ctx->init_encode_params.encodeConfig = &ctx->encode_config;
1357
1358    preset_config.version = NV_ENC_PRESET_CONFIG_VER;
1359    preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
1360
1361#ifdef NVENC_HAVE_NEW_PRESETS
1362    ctx->init_encode_params.tuningInfo = ctx->tuning_info;
1363
1364    if (ctx->flags & NVENC_LOSSLESS)
1365        ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOSSLESS;
1366    else if (ctx->flags & NVENC_LOWLATENCY)
1367        ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY;
1368
1369    nv_status = p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder,
1370        ctx->init_encode_params.encodeGUID,
1371        ctx->init_encode_params.presetGUID,
1372        ctx->init_encode_params.tuningInfo,
1373        &preset_config);
1374#else
1375    nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder,
1376        ctx->init_encode_params.encodeGUID,
1377        ctx->init_encode_params.presetGUID,
1378        &preset_config);
1379#endif
1380    if (nv_status != NV_ENC_SUCCESS)
1381        return nvenc_print_error(avctx, nv_status, "Cannot get the preset configuration");
1382
1383    memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
1384
1385    ctx->encode_config.version = NV_ENC_CONFIG_VER;
1386
1387    compute_dar(avctx, &dw, &dh);
1388    ctx->init_encode_params.darHeight = dh;
1389    ctx->init_encode_params.darWidth = dw;
1390
1391    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
1392        ctx->init_encode_params.frameRateNum = avctx->framerate.num;
1393        ctx->init_encode_params.frameRateDen = avctx->framerate.den;
1394    } else {
1395        ctx->init_encode_params.frameRateNum = avctx->time_base.den;
1396        ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
1397    }
1398
1399    ctx->init_encode_params.enableEncodeAsync = 0;
1400    ctx->init_encode_params.enablePTD = 1;
1401
1402#ifdef NVENC_HAVE_NEW_PRESETS
1403    /* If lookahead isn't set from CLI, use value from preset.
1404     * P6 & P7 presets may enable lookahead for better quality.
1405     * */
1406    if (ctx->rc_lookahead == 0 && ctx->encode_config.rcParams.enableLookahead)
1407        ctx->rc_lookahead = ctx->encode_config.rcParams.lookaheadDepth;
1408#endif
1409
1410    if (ctx->weighted_pred == 1)
1411        ctx->init_encode_params.enableWeightedPrediction = 1;
1412
1413    if (ctx->bluray_compat) {
1414        ctx->aud = 1;
1415        ctx->dpb_size = FFMIN(FFMAX(avctx->refs, 0), 6);
1416        avctx->max_b_frames = FFMIN(avctx->max_b_frames, 3);
1417        switch (avctx->codec->id) {
1418        case AV_CODEC_ID_H264:
1419            /* maximum level depends on used resolution */
1420            break;
1421        case AV_CODEC_ID_HEVC:
1422            ctx->level = NV_ENC_LEVEL_HEVC_51;
1423            ctx->tier = NV_ENC_TIER_HEVC_HIGH;
1424            break;
1425        }
1426    }
1427
1428    if (avctx->gop_size > 0) {
1429        if (avctx->max_b_frames >= 0) {
1430            /* 0 is intra-only, 1 is I/P only, 2 is one B-Frame, 3 two B-frames, and so on. */
1431            ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
1432        }
1433
1434        ctx->encode_config.gopLength = avctx->gop_size;
1435    } else if (avctx->gop_size == 0) {
1436        ctx->encode_config.frameIntervalP = 0;
1437        ctx->encode_config.gopLength = 1;
1438    }
1439
1440    /* force to enable intra refresh */
1441    if(ctx->single_slice_intra_refresh)
1442        ctx->intra_refresh = 1;
1443
1444    if (ctx->intra_refresh)
1445        ctx->encode_config.gopLength = NVENC_INFINITE_GOPLENGTH;
1446
1447    nvenc_recalc_surfaces(avctx);
1448
1449    nvenc_setup_rate_control(avctx);
1450
1451    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
1452        ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
1453    } else {
1454        ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
1455    }
1456
1457    res = nvenc_setup_codec_config(avctx);
1458    if (res)
1459        return res;
1460
1461    res = nvenc_push_context(avctx);
1462    if (res < 0)
1463        return res;
1464
1465    nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
1466    if (nv_status != NV_ENC_SUCCESS) {
1467        nvenc_pop_context(avctx);
1468        return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
1469    }
1470
1471#ifdef NVENC_HAVE_CUSTREAM_PTR
1472    if (ctx->cu_context) {
1473        nv_status = p_nvenc->nvEncSetIOCudaStreams(ctx->nvencoder, &ctx->cu_stream, &ctx->cu_stream);
1474        if (nv_status != NV_ENC_SUCCESS) {
1475            nvenc_pop_context(avctx);
1476            return nvenc_print_error(avctx, nv_status, "SetIOCudaStreams failed");
1477        }
1478    }
1479#endif
1480
1481    res = nvenc_pop_context(avctx);
1482    if (res < 0)
1483        return res;
1484
1485    if (ctx->encode_config.frameIntervalP > 1)
1486        avctx->has_b_frames = 2;
1487
1488    if (ctx->encode_config.rcParams.averageBitRate > 0)
1489        avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
1490
1491    cpb_props = ff_add_cpb_side_data(avctx);
1492    if (!cpb_props)
1493        return AVERROR(ENOMEM);
1494    cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate;
1495    cpb_props->avg_bitrate = avctx->bit_rate;
1496    cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
1497
1498    return 0;
1499}
1500
1501static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt)
1502{
1503    switch (pix_fmt) {
1504    case AV_PIX_FMT_YUV420P:
1505        return NV_ENC_BUFFER_FORMAT_YV12_PL;
1506    case AV_PIX_FMT_NV12:
1507        return NV_ENC_BUFFER_FORMAT_NV12_PL;
1508    case AV_PIX_FMT_P010:
1509    case AV_PIX_FMT_P016:
1510        return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
1511    case AV_PIX_FMT_GBRP:
1512    case AV_PIX_FMT_YUV444P:
1513        return NV_ENC_BUFFER_FORMAT_YUV444_PL;
1514    case AV_PIX_FMT_GBRP16:
1515    case AV_PIX_FMT_YUV444P16:
1516        return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
1517    case AV_PIX_FMT_0RGB32:
1518    case AV_PIX_FMT_RGB32:
1519        return NV_ENC_BUFFER_FORMAT_ARGB;
1520    case AV_PIX_FMT_0BGR32:
1521    case AV_PIX_FMT_BGR32:
1522        return NV_ENC_BUFFER_FORMAT_ABGR;
1523    case AV_PIX_FMT_X2RGB10:
1524        return NV_ENC_BUFFER_FORMAT_ARGB10;
1525    case AV_PIX_FMT_X2BGR10:
1526        return NV_ENC_BUFFER_FORMAT_ABGR10;
1527    default:
1528        return NV_ENC_BUFFER_FORMAT_UNDEFINED;
1529    }
1530}
1531
1532static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
1533{
1534    NvencContext *ctx = avctx->priv_data;
1535    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1536    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1537    NvencSurface* tmp_surface = &ctx->surfaces[idx];
1538
1539    NVENCSTATUS nv_status;
1540    NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
1541    allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
1542
1543    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1544        ctx->surfaces[idx].in_ref = av_frame_alloc();
1545        if (!ctx->surfaces[idx].in_ref)
1546            return AVERROR(ENOMEM);
1547    } else {
1548        NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
1549
1550        ctx->surfaces[idx].format = nvenc_map_buffer_format(ctx->data_pix_fmt);
1551        if (ctx->surfaces[idx].format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
1552            av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
1553                   av_get_pix_fmt_name(ctx->data_pix_fmt));
1554            return AVERROR(EINVAL);
1555        }
1556
1557        allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
1558        allocSurf.width = avctx->width;
1559        allocSurf.height = avctx->height;
1560        allocSurf.bufferFmt = ctx->surfaces[idx].format;
1561
1562        nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
1563        if (nv_status != NV_ENC_SUCCESS) {
1564            return nvenc_print_error(avctx, nv_status, "CreateInputBuffer failed");
1565        }
1566
1567        ctx->surfaces[idx].input_surface = allocSurf.inputBuffer;
1568        ctx->surfaces[idx].width = allocSurf.width;
1569        ctx->surfaces[idx].height = allocSurf.height;
1570    }
1571
1572    nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
1573    if (nv_status != NV_ENC_SUCCESS) {
1574        int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
1575        if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
1576            p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface);
1577        av_frame_free(&ctx->surfaces[idx].in_ref);
1578        return err;
1579    }
1580
1581    ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
1582
1583    av_fifo_write(ctx->unused_surface_queue, &tmp_surface, 1);
1584
1585    return 0;
1586}
1587
1588static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
1589{
1590    NvencContext *ctx = avctx->priv_data;
1591    int i, res = 0, res2;
1592
1593    ctx->surfaces = av_calloc(ctx->nb_surfaces, sizeof(*ctx->surfaces));
1594    if (!ctx->surfaces)
1595        return AVERROR(ENOMEM);
1596
1597    ctx->timestamp_list = av_fifo_alloc2(ctx->nb_surfaces, sizeof(int64_t), 0);
1598    if (!ctx->timestamp_list)
1599        return AVERROR(ENOMEM);
1600
1601    ctx->unused_surface_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0);
1602    if (!ctx->unused_surface_queue)
1603        return AVERROR(ENOMEM);
1604
1605    ctx->output_surface_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0);
1606    if (!ctx->output_surface_queue)
1607        return AVERROR(ENOMEM);
1608    ctx->output_surface_ready_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0);
1609    if (!ctx->output_surface_ready_queue)
1610        return AVERROR(ENOMEM);
1611
1612    res = nvenc_push_context(avctx);
1613    if (res < 0)
1614        return res;
1615
1616    for (i = 0; i < ctx->nb_surfaces; i++) {
1617        if ((res = nvenc_alloc_surface(avctx, i)) < 0)
1618            goto fail;
1619    }
1620
1621fail:
1622    res2 = nvenc_pop_context(avctx);
1623    if (res2 < 0)
1624        return res2;
1625
1626    return res;
1627}
1628
1629static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
1630{
1631    NvencContext *ctx = avctx->priv_data;
1632    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1633    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1634
1635    NVENCSTATUS nv_status;
1636    uint32_t outSize = 0;
1637    char tmpHeader[256];
1638    NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
1639    payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
1640
1641    payload.spsppsBuffer = tmpHeader;
1642    payload.inBufferSize = sizeof(tmpHeader);
1643    payload.outSPSPPSPayloadSize = &outSize;
1644
1645    nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
1646    if (nv_status != NV_ENC_SUCCESS) {
1647        return nvenc_print_error(avctx, nv_status, "GetSequenceParams failed");
1648    }
1649
1650    avctx->extradata_size = outSize;
1651    avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
1652
1653    if (!avctx->extradata) {
1654        return AVERROR(ENOMEM);
1655    }
1656
1657    memcpy(avctx->extradata, tmpHeader, outSize);
1658
1659    return 0;
1660}
1661
1662av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
1663{
1664    NvencContext *ctx               = avctx->priv_data;
1665    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1666    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1667    int i, res;
1668
1669    /* the encoder has to be flushed before it can be closed */
1670    if (ctx->nvencoder) {
1671        NV_ENC_PIC_PARAMS params        = { .version        = NV_ENC_PIC_PARAMS_VER,
1672                                            .encodePicFlags = NV_ENC_PIC_FLAG_EOS };
1673
1674        res = nvenc_push_context(avctx);
1675        if (res < 0)
1676            return res;
1677
1678        p_nvenc->nvEncEncodePicture(ctx->nvencoder, &params);
1679    }
1680
1681    av_fifo_freep2(&ctx->timestamp_list);
1682    av_fifo_freep2(&ctx->output_surface_ready_queue);
1683    av_fifo_freep2(&ctx->output_surface_queue);
1684    av_fifo_freep2(&ctx->unused_surface_queue);
1685
1686    if (ctx->surfaces && (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11)) {
1687        for (i = 0; i < ctx->nb_registered_frames; i++) {
1688            if (ctx->registered_frames[i].mapped)
1689                p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[i].in_map.mappedResource);
1690            if (ctx->registered_frames[i].regptr)
1691                p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
1692        }
1693        ctx->nb_registered_frames = 0;
1694    }
1695
1696    if (ctx->surfaces) {
1697        for (i = 0; i < ctx->nb_surfaces; ++i) {
1698            if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
1699                p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
1700            av_frame_free(&ctx->surfaces[i].in_ref);
1701            p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
1702        }
1703    }
1704    av_freep(&ctx->surfaces);
1705    ctx->nb_surfaces = 0;
1706
1707    av_frame_free(&ctx->frame);
1708
1709    av_freep(&ctx->sei_data);
1710
1711    if (ctx->nvencoder) {
1712        p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
1713
1714        res = nvenc_pop_context(avctx);
1715        if (res < 0)
1716            return res;
1717    }
1718    ctx->nvencoder = NULL;
1719
1720    if (ctx->cu_context_internal)
1721        CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
1722    ctx->cu_context = ctx->cu_context_internal = NULL;
1723
1724#if CONFIG_D3D11VA
1725    if (ctx->d3d11_device) {
1726        ID3D11Device_Release(ctx->d3d11_device);
1727        ctx->d3d11_device = NULL;
1728    }
1729#endif
1730
1731    nvenc_free_functions(&dl_fn->nvenc_dl);
1732    cuda_free_functions(&dl_fn->cuda_dl);
1733
1734    dl_fn->nvenc_device_count = 0;
1735
1736    av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
1737
1738    return 0;
1739}
1740
1741av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
1742{
1743    NvencContext *ctx = avctx->priv_data;
1744    int ret;
1745
1746    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1747        AVHWFramesContext *frames_ctx;
1748        if (!avctx->hw_frames_ctx) {
1749            av_log(avctx, AV_LOG_ERROR,
1750                   "hw_frames_ctx must be set when using GPU frames as input\n");
1751            return AVERROR(EINVAL);
1752        }
1753        frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
1754        if (frames_ctx->format != avctx->pix_fmt) {
1755            av_log(avctx, AV_LOG_ERROR,
1756                   "hw_frames_ctx must match the GPU frame type\n");
1757            return AVERROR(EINVAL);
1758        }
1759        ctx->data_pix_fmt = frames_ctx->sw_format;
1760    } else {
1761        ctx->data_pix_fmt = avctx->pix_fmt;
1762    }
1763
1764    ctx->frame = av_frame_alloc();
1765    if (!ctx->frame)
1766        return AVERROR(ENOMEM);
1767
1768    if ((ret = nvenc_load_libraries(avctx)) < 0)
1769        return ret;
1770
1771    if ((ret = nvenc_setup_device(avctx)) < 0)
1772        return ret;
1773
1774    if ((ret = nvenc_setup_encoder(avctx)) < 0)
1775        return ret;
1776
1777    if ((ret = nvenc_setup_surfaces(avctx)) < 0)
1778        return ret;
1779
1780    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1781        if ((ret = nvenc_setup_extradata(avctx)) < 0)
1782            return ret;
1783    }
1784
1785    return 0;
1786}
1787
1788static NvencSurface *get_free_frame(NvencContext *ctx)
1789{
1790    NvencSurface *tmp_surf;
1791
1792    if (av_fifo_read(ctx->unused_surface_queue, &tmp_surf, 1) < 0)
1793        // queue empty
1794        return NULL;
1795
1796    return tmp_surf;
1797}
1798
1799static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
1800            NV_ENC_LOCK_INPUT_BUFFER *lock_buffer_params, const AVFrame *frame)
1801{
1802    int dst_linesize[4] = {
1803        lock_buffer_params->pitch,
1804        lock_buffer_params->pitch,
1805        lock_buffer_params->pitch,
1806        lock_buffer_params->pitch
1807    };
1808    uint8_t *dst_data[4];
1809    int ret;
1810
1811    if (frame->format == AV_PIX_FMT_YUV420P)
1812        dst_linesize[1] = dst_linesize[2] >>= 1;
1813
1814    ret = av_image_fill_pointers(dst_data, frame->format, nv_surface->height,
1815                                 lock_buffer_params->bufferDataPtr, dst_linesize);
1816    if (ret < 0)
1817        return ret;
1818
1819    if (frame->format == AV_PIX_FMT_YUV420P)
1820        FFSWAP(uint8_t*, dst_data[1], dst_data[2]);
1821
1822    av_image_copy(dst_data, dst_linesize,
1823                  (const uint8_t**)frame->data, frame->linesize, frame->format,
1824                  avctx->width, avctx->height);
1825
1826    return 0;
1827}
1828
1829static int nvenc_find_free_reg_resource(AVCodecContext *avctx)
1830{
1831    NvencContext *ctx = avctx->priv_data;
1832    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1833    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1834    NVENCSTATUS nv_status;
1835
1836    int i, first_round;
1837
1838    if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) {
1839        for (first_round = 1; first_round >= 0; first_round--) {
1840            for (i = 0; i < ctx->nb_registered_frames; i++) {
1841                if (!ctx->registered_frames[i].mapped) {
1842                    if (ctx->registered_frames[i].regptr) {
1843                        if (first_round)
1844                            continue;
1845                        nv_status = p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
1846                        if (nv_status != NV_ENC_SUCCESS)
1847                            return nvenc_print_error(avctx, nv_status, "Failed unregistering unused input resource");
1848                        ctx->registered_frames[i].ptr = NULL;
1849                        ctx->registered_frames[i].regptr = NULL;
1850                    }
1851                    return i;
1852                }
1853            }
1854        }
1855    } else {
1856        return ctx->nb_registered_frames++;
1857    }
1858
1859    av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n");
1860    return AVERROR(ENOMEM);
1861}
1862
1863static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
1864{
1865    NvencContext *ctx = avctx->priv_data;
1866    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1867    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1868
1869    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
1870    NV_ENC_REGISTER_RESOURCE reg = { 0 };
1871    int i, idx, ret;
1872
1873    for (i = 0; i < ctx->nb_registered_frames; i++) {
1874        if (avctx->pix_fmt == AV_PIX_FMT_CUDA && ctx->registered_frames[i].ptr == frame->data[0])
1875            return i;
1876        else if (avctx->pix_fmt == AV_PIX_FMT_D3D11 && ctx->registered_frames[i].ptr == frame->data[0] && ctx->registered_frames[i].ptr_index == (intptr_t)frame->data[1])
1877            return i;
1878    }
1879
1880    idx = nvenc_find_free_reg_resource(avctx);
1881    if (idx < 0)
1882        return idx;
1883
1884    reg.version            = NV_ENC_REGISTER_RESOURCE_VER;
1885    reg.width              = frames_ctx->width;
1886    reg.height             = frames_ctx->height;
1887    reg.pitch              = frame->linesize[0];
1888    reg.resourceToRegister = frame->data[0];
1889
1890    if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1891        reg.resourceType   = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
1892    }
1893    else if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1894        reg.resourceType     = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
1895        reg.subResourceIndex = (intptr_t)frame->data[1];
1896    }
1897
1898    reg.bufferFormat       = nvenc_map_buffer_format(frames_ctx->sw_format);
1899    if (reg.bufferFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
1900        av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
1901               av_get_pix_fmt_name(frames_ctx->sw_format));
1902        return AVERROR(EINVAL);
1903    }
1904
1905    ret = p_nvenc->nvEncRegisterResource(ctx->nvencoder, &reg);
1906    if (ret != NV_ENC_SUCCESS) {
1907        nvenc_print_error(avctx, ret, "Error registering an input resource");
1908        return AVERROR_UNKNOWN;
1909    }
1910
1911    ctx->registered_frames[idx].ptr       = frame->data[0];
1912    ctx->registered_frames[idx].ptr_index = reg.subResourceIndex;
1913    ctx->registered_frames[idx].regptr    = reg.registeredResource;
1914    return idx;
1915}
1916
1917static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
1918                                      NvencSurface *nvenc_frame)
1919{
1920    NvencContext *ctx = avctx->priv_data;
1921    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1922    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1923
1924    int res;
1925    NVENCSTATUS nv_status;
1926
1927    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1928        int reg_idx = nvenc_register_frame(avctx, frame);
1929        if (reg_idx < 0) {
1930            av_log(avctx, AV_LOG_ERROR, "Could not register an input HW frame\n");
1931            return reg_idx;
1932        }
1933
1934        res = av_frame_ref(nvenc_frame->in_ref, frame);
1935        if (res < 0)
1936            return res;
1937
1938        if (!ctx->registered_frames[reg_idx].mapped) {
1939            ctx->registered_frames[reg_idx].in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
1940            ctx->registered_frames[reg_idx].in_map.registeredResource = ctx->registered_frames[reg_idx].regptr;
1941            nv_status = p_nvenc->nvEncMapInputResource(ctx->nvencoder, &ctx->registered_frames[reg_idx].in_map);
1942            if (nv_status != NV_ENC_SUCCESS) {
1943                av_frame_unref(nvenc_frame->in_ref);
1944                return nvenc_print_error(avctx, nv_status, "Error mapping an input resource");
1945            }
1946        }
1947
1948        ctx->registered_frames[reg_idx].mapped += 1;
1949
1950        nvenc_frame->reg_idx                   = reg_idx;
1951        nvenc_frame->input_surface             = ctx->registered_frames[reg_idx].in_map.mappedResource;
1952        nvenc_frame->format                    = ctx->registered_frames[reg_idx].in_map.mappedBufferFmt;
1953        nvenc_frame->pitch                     = frame->linesize[0];
1954
1955        return 0;
1956    } else {
1957        NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
1958
1959        lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1960        lockBufferParams.inputBuffer = nvenc_frame->input_surface;
1961
1962        nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
1963        if (nv_status != NV_ENC_SUCCESS) {
1964            return nvenc_print_error(avctx, nv_status, "Failed locking nvenc input buffer");
1965        }
1966
1967        nvenc_frame->pitch = lockBufferParams.pitch;
1968        res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame);
1969
1970        nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface);
1971        if (nv_status != NV_ENC_SUCCESS) {
1972            return nvenc_print_error(avctx, nv_status, "Failed unlocking input buffer!");
1973        }
1974
1975        return res;
1976    }
1977}
1978
1979static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
1980                                            NV_ENC_PIC_PARAMS *params,
1981                                            NV_ENC_SEI_PAYLOAD *sei_data,
1982                                            int sei_count)
1983{
1984    NvencContext *ctx = avctx->priv_data;
1985
1986    switch (avctx->codec->id) {
1987    case AV_CODEC_ID_H264:
1988        params->codecPicParams.h264PicParams.sliceMode =
1989            ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
1990        params->codecPicParams.h264PicParams.sliceModeData =
1991            ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1992        if (sei_count > 0) {
1993            params->codecPicParams.h264PicParams.seiPayloadArray = sei_data;
1994            params->codecPicParams.h264PicParams.seiPayloadArrayCnt = sei_count;
1995        }
1996
1997      break;
1998    case AV_CODEC_ID_HEVC:
1999        params->codecPicParams.hevcPicParams.sliceMode =
2000            ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
2001        params->codecPicParams.hevcPicParams.sliceModeData =
2002            ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
2003        if (sei_count > 0) {
2004            params->codecPicParams.hevcPicParams.seiPayloadArray = sei_data;
2005            params->codecPicParams.hevcPicParams.seiPayloadArrayCnt = sei_count;
2006        }
2007
2008        break;
2009    }
2010}
2011
2012static inline void timestamp_queue_enqueue(AVFifo *queue, int64_t timestamp)
2013{
2014    av_fifo_write(queue, &timestamp, 1);
2015}
2016
2017static inline int64_t timestamp_queue_dequeue(AVFifo *queue)
2018{
2019    int64_t timestamp = AV_NOPTS_VALUE;
2020    // The following call might fail if the queue is empty.
2021    av_fifo_read(queue, &timestamp, 1);
2022
2023    return timestamp;
2024}
2025
2026static int nvenc_set_timestamp(AVCodecContext *avctx,
2027                               NV_ENC_LOCK_BITSTREAM *params,
2028                               AVPacket *pkt)
2029{
2030    NvencContext *ctx = avctx->priv_data;
2031
2032    pkt->pts = params->outputTimeStamp;
2033    pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list);
2034
2035    pkt->dts -= FFMAX(ctx->encode_config.frameIntervalP - 1, 0) * FFMAX(avctx->ticks_per_frame, 1) * FFMAX(avctx->time_base.num, 1);
2036
2037    return 0;
2038}
2039
2040static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf)
2041{
2042    NvencContext *ctx = avctx->priv_data;
2043    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
2044    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
2045
2046    uint32_t slice_mode_data;
2047    uint32_t *slice_offsets = NULL;
2048    NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
2049    NVENCSTATUS nv_status;
2050    int res = 0;
2051
2052    enum AVPictureType pict_type;
2053
2054    switch (avctx->codec->id) {
2055    case AV_CODEC_ID_H264:
2056      slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
2057      break;
2058    case AV_CODEC_ID_H265:
2059      slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
2060      break;
2061    default:
2062      av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
2063      res = AVERROR(EINVAL);
2064      goto error;
2065    }
2066    slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
2067
2068    if (!slice_offsets) {
2069        res = AVERROR(ENOMEM);
2070        goto error;
2071    }
2072
2073    lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
2074
2075    lock_params.doNotWait = 0;
2076    lock_params.outputBitstream = tmpoutsurf->output_surface;
2077    lock_params.sliceOffsets = slice_offsets;
2078
2079    nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
2080    if (nv_status != NV_ENC_SUCCESS) {
2081        res = nvenc_print_error(avctx, nv_status, "Failed locking bitstream buffer");
2082        goto error;
2083    }
2084
2085    res = ff_get_encode_buffer(avctx, pkt, lock_params.bitstreamSizeInBytes, 0);
2086
2087    if (res < 0) {
2088        p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
2089        goto error;
2090    }
2091
2092    memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
2093
2094    nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
2095    if (nv_status != NV_ENC_SUCCESS) {
2096        res = nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open");
2097        goto error;
2098    }
2099
2100
2101    if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
2102        ctx->registered_frames[tmpoutsurf->reg_idx].mapped -= 1;
2103        if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped == 0) {
2104            nv_status = p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[tmpoutsurf->reg_idx].in_map.mappedResource);
2105            if (nv_status != NV_ENC_SUCCESS) {
2106                res = nvenc_print_error(avctx, nv_status, "Failed unmapping input resource");
2107                goto error;
2108            }
2109        } else if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped < 0) {
2110            res = AVERROR_BUG;
2111            goto error;
2112        }
2113
2114        av_frame_unref(tmpoutsurf->in_ref);
2115
2116        tmpoutsurf->input_surface = NULL;
2117    }
2118
2119    switch (lock_params.pictureType) {
2120    case NV_ENC_PIC_TYPE_IDR:
2121        pkt->flags |= AV_PKT_FLAG_KEY;
2122    case NV_ENC_PIC_TYPE_I:
2123        pict_type = AV_PICTURE_TYPE_I;
2124        break;
2125    case NV_ENC_PIC_TYPE_P:
2126        pict_type = AV_PICTURE_TYPE_P;
2127        break;
2128    case NV_ENC_PIC_TYPE_B:
2129        pict_type = AV_PICTURE_TYPE_B;
2130        break;
2131    case NV_ENC_PIC_TYPE_BI:
2132        pict_type = AV_PICTURE_TYPE_BI;
2133        break;
2134    default:
2135        av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n");
2136        av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
2137        res = AVERROR_EXTERNAL;
2138        goto error;
2139    }
2140
2141    ff_side_data_set_encoder_stats(pkt,
2142        (lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
2143
2144    res = nvenc_set_timestamp(avctx, &lock_params, pkt);
2145    if (res < 0)
2146        goto error2;
2147
2148    av_free(slice_offsets);
2149
2150    return 0;
2151
2152error:
2153    timestamp_queue_dequeue(ctx->timestamp_list);
2154
2155error2:
2156    av_free(slice_offsets);
2157
2158    return res;
2159}
2160
2161static int output_ready(AVCodecContext *avctx, int flush)
2162{
2163    NvencContext *ctx = avctx->priv_data;
2164    int nb_ready, nb_pending;
2165
2166    nb_ready   = av_fifo_can_read(ctx->output_surface_ready_queue);
2167    nb_pending = av_fifo_can_read(ctx->output_surface_queue);
2168    if (flush)
2169        return nb_ready > 0;
2170    return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);
2171}
2172
2173static int prepare_sei_data_array(AVCodecContext *avctx, const AVFrame *frame)
2174{
2175    NvencContext *ctx = avctx->priv_data;
2176    int sei_count = 0;
2177    int i, res;
2178
2179    if (ctx->a53_cc && av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) {
2180        void *a53_data = NULL;
2181        size_t a53_size = 0;
2182
2183        if (ff_alloc_a53_sei(frame, 0, &a53_data, &a53_size) < 0) {
2184            av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2185        }
2186
2187        if (a53_data) {
2188            void *tmp = av_fast_realloc(ctx->sei_data,
2189                                        &ctx->sei_data_size,
2190                                        (sei_count + 1) * sizeof(*ctx->sei_data));
2191            if (!tmp) {
2192                av_free(a53_data);
2193                res = AVERROR(ENOMEM);
2194                goto error;
2195            } else {
2196                ctx->sei_data = tmp;
2197                ctx->sei_data[sei_count].payloadSize = (uint32_t)a53_size;
2198                ctx->sei_data[sei_count].payloadType = 4;
2199                ctx->sei_data[sei_count].payload = (uint8_t*)a53_data;
2200                sei_count++;
2201            }
2202        }
2203    }
2204
2205    if (ctx->s12m_tc && av_frame_get_side_data(frame, AV_FRAME_DATA_S12M_TIMECODE)) {
2206        void *tc_data = NULL;
2207        size_t tc_size = 0;
2208
2209        if (ff_alloc_timecode_sei(frame, avctx->framerate, 0, &tc_data, &tc_size) < 0) {
2210            av_log(ctx, AV_LOG_ERROR, "Not enough memory for timecode sei, skipping\n");
2211        }
2212
2213        if (tc_data) {
2214            void *tmp = av_fast_realloc(ctx->sei_data,
2215                                        &ctx->sei_data_size,
2216                                        (sei_count + 1) * sizeof(*ctx->sei_data));
2217            if (!tmp) {
2218                av_free(tc_data);
2219                res = AVERROR(ENOMEM);
2220                goto error;
2221            } else {
2222                ctx->sei_data = tmp;
2223                ctx->sei_data[sei_count].payloadSize = (uint32_t)tc_size;
2224                ctx->sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
2225                ctx->sei_data[sei_count].payload = (uint8_t*)tc_data;
2226                sei_count++;
2227            }
2228        }
2229    }
2230
2231    if (!ctx->udu_sei)
2232        return sei_count;
2233
2234    for (i = 0; i < frame->nb_side_data; i++) {
2235        AVFrameSideData *side_data = frame->side_data[i];
2236        void *tmp;
2237
2238        if (side_data->type != AV_FRAME_DATA_SEI_UNREGISTERED)
2239            continue;
2240
2241        tmp = av_fast_realloc(ctx->sei_data,
2242                              &ctx->sei_data_size,
2243                              (sei_count + 1) * sizeof(*ctx->sei_data));
2244        if (!tmp) {
2245            res = AVERROR(ENOMEM);
2246            goto error;
2247        } else {
2248            ctx->sei_data = tmp;
2249            ctx->sei_data[sei_count].payloadSize = side_data->size;
2250            ctx->sei_data[sei_count].payloadType = SEI_TYPE_USER_DATA_UNREGISTERED;
2251            ctx->sei_data[sei_count].payload = av_memdup(side_data->data, side_data->size);
2252
2253            if (!ctx->sei_data[sei_count].payload) {
2254                res = AVERROR(ENOMEM);
2255                goto error;
2256            }
2257
2258            sei_count++;
2259        }
2260    }
2261
2262    return sei_count;
2263
2264error:
2265    for (i = 0; i < sei_count; i++)
2266        av_freep(&(ctx->sei_data[i].payload));
2267
2268    return res;
2269}
2270
2271static void reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)
2272{
2273    NvencContext *ctx = avctx->priv_data;
2274    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
2275    NVENCSTATUS ret;
2276
2277    NV_ENC_RECONFIGURE_PARAMS params = { 0 };
2278    int needs_reconfig = 0;
2279    int needs_encode_config = 0;
2280    int reconfig_bitrate = 0, reconfig_dar = 0;
2281    int dw, dh;
2282
2283    params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
2284    params.reInitEncodeParams = ctx->init_encode_params;
2285
2286    compute_dar(avctx, &dw, &dh);
2287    if (dw != ctx->init_encode_params.darWidth || dh != ctx->init_encode_params.darHeight) {
2288        av_log(avctx, AV_LOG_VERBOSE,
2289               "aspect ratio change (DAR): %d:%d -> %d:%d\n",
2290               ctx->init_encode_params.darWidth,
2291               ctx->init_encode_params.darHeight, dw, dh);
2292
2293        params.reInitEncodeParams.darHeight = dh;
2294        params.reInitEncodeParams.darWidth = dw;
2295
2296        needs_reconfig = 1;
2297        reconfig_dar = 1;
2298    }
2299
2300    if (ctx->rc != NV_ENC_PARAMS_RC_CONSTQP && ctx->support_dyn_bitrate) {
2301        if (avctx->bit_rate > 0 && params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate != avctx->bit_rate) {
2302            av_log(avctx, AV_LOG_VERBOSE,
2303                   "avg bitrate change: %d -> %d\n",
2304                   params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate,
2305                   (uint32_t)avctx->bit_rate);
2306
2307            params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate = avctx->bit_rate;
2308            reconfig_bitrate = 1;
2309        }
2310
2311        if (avctx->rc_max_rate > 0 && ctx->encode_config.rcParams.maxBitRate != avctx->rc_max_rate) {
2312            av_log(avctx, AV_LOG_VERBOSE,
2313                   "max bitrate change: %d -> %d\n",
2314                   params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate,
2315                   (uint32_t)avctx->rc_max_rate);
2316
2317            params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate = avctx->rc_max_rate;
2318            reconfig_bitrate = 1;
2319        }
2320
2321        if (avctx->rc_buffer_size > 0 && ctx->encode_config.rcParams.vbvBufferSize != avctx->rc_buffer_size) {
2322            av_log(avctx, AV_LOG_VERBOSE,
2323                   "vbv buffer size change: %d -> %d\n",
2324                   params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize,
2325                   avctx->rc_buffer_size);
2326
2327            params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize = avctx->rc_buffer_size;
2328            reconfig_bitrate = 1;
2329        }
2330
2331        if (reconfig_bitrate) {
2332            params.resetEncoder = 1;
2333            params.forceIDR = 1;
2334
2335            needs_encode_config = 1;
2336            needs_reconfig = 1;
2337        }
2338    }
2339
2340    if (!needs_encode_config)
2341        params.reInitEncodeParams.encodeConfig = NULL;
2342
2343    if (needs_reconfig) {
2344        ret = p_nvenc->nvEncReconfigureEncoder(ctx->nvencoder, &params);
2345        if (ret != NV_ENC_SUCCESS) {
2346            nvenc_print_error(avctx, ret, "failed to reconfigure nvenc");
2347        } else {
2348            if (reconfig_dar) {
2349                ctx->init_encode_params.darHeight = dh;
2350                ctx->init_encode_params.darWidth = dw;
2351            }
2352
2353            if (reconfig_bitrate) {
2354                ctx->encode_config.rcParams.averageBitRate = params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate;
2355                ctx->encode_config.rcParams.maxBitRate = params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate;
2356                ctx->encode_config.rcParams.vbvBufferSize = params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize;
2357            }
2358
2359        }
2360    }
2361}
2362
2363static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
2364{
2365    NVENCSTATUS nv_status;
2366    NvencSurface *tmp_out_surf, *in_surf;
2367    int res, res2;
2368    int sei_count = 0;
2369    int i;
2370
2371    NvencContext *ctx = avctx->priv_data;
2372    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
2373    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
2374
2375    NV_ENC_PIC_PARAMS pic_params = { 0 };
2376    pic_params.version = NV_ENC_PIC_PARAMS_VER;
2377
2378    if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
2379        return AVERROR(EINVAL);
2380
2381    if (frame && frame->buf[0]) {
2382        in_surf = get_free_frame(ctx);
2383        if (!in_surf)
2384            return AVERROR(EAGAIN);
2385
2386        res = nvenc_push_context(avctx);
2387        if (res < 0)
2388            return res;
2389
2390        reconfig_encoder(avctx, frame);
2391
2392        res = nvenc_upload_frame(avctx, frame, in_surf);
2393
2394        res2 = nvenc_pop_context(avctx);
2395        if (res2 < 0)
2396            return res2;
2397
2398        if (res)
2399            return res;
2400
2401        pic_params.inputBuffer = in_surf->input_surface;
2402        pic_params.bufferFmt = in_surf->format;
2403        pic_params.inputWidth = in_surf->width;
2404        pic_params.inputHeight = in_surf->height;
2405        pic_params.inputPitch = in_surf->pitch;
2406        pic_params.outputBitstream = in_surf->output_surface;
2407
2408        if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2409            if (frame->top_field_first)
2410                pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
2411            else
2412                pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
2413        } else {
2414            pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
2415        }
2416
2417        if (ctx->forced_idr >= 0 && frame->pict_type == AV_PICTURE_TYPE_I) {
2418            pic_params.encodePicFlags =
2419                ctx->forced_idr ? NV_ENC_PIC_FLAG_FORCEIDR : NV_ENC_PIC_FLAG_FORCEINTRA;
2420        } else {
2421            pic_params.encodePicFlags = 0;
2422        }
2423
2424        pic_params.inputTimeStamp = frame->pts;
2425
2426        if (ctx->extra_sei) {
2427            res = prepare_sei_data_array(avctx, frame);
2428            if (res < 0)
2429                return res;
2430            sei_count = res;
2431        }
2432
2433        nvenc_codec_specific_pic_params(avctx, &pic_params, ctx->sei_data, sei_count);
2434    } else {
2435        pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
2436    }
2437
2438    res = nvenc_push_context(avctx);
2439    if (res < 0)
2440        return res;
2441
2442    nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
2443
2444    for (i = 0; i < sei_count; i++)
2445        av_freep(&(ctx->sei_data[i].payload));
2446
2447    res = nvenc_pop_context(avctx);
2448    if (res < 0)
2449        return res;
2450
2451    if (nv_status != NV_ENC_SUCCESS &&
2452        nv_status != NV_ENC_ERR_NEED_MORE_INPUT)
2453        return nvenc_print_error(avctx, nv_status, "EncodePicture failed!");
2454
2455    if (frame && frame->buf[0]) {
2456        av_fifo_write(ctx->output_surface_queue, &in_surf, 1);
2457        timestamp_queue_enqueue(ctx->timestamp_list, frame->pts);
2458    }
2459
2460    /* all the pending buffers are now ready for output */
2461    if (nv_status == NV_ENC_SUCCESS) {
2462        while (av_fifo_read(ctx->output_surface_queue, &tmp_out_surf, 1) >= 0)
2463            av_fifo_write(ctx->output_surface_ready_queue, &tmp_out_surf, 1);
2464    }
2465
2466    return 0;
2467}
2468
2469int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
2470{
2471    NvencSurface *tmp_out_surf;
2472    int res, res2;
2473
2474    NvencContext *ctx = avctx->priv_data;
2475
2476    AVFrame *frame = ctx->frame;
2477
2478    if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
2479        return AVERROR(EINVAL);
2480
2481    if (!frame->buf[0]) {
2482        res = ff_encode_get_frame(avctx, frame);
2483        if (res < 0 && res != AVERROR_EOF)
2484            return res;
2485    }
2486
2487    res = nvenc_send_frame(avctx, frame);
2488    if (res < 0) {
2489        if (res != AVERROR(EAGAIN))
2490            return res;
2491    } else
2492        av_frame_unref(frame);
2493
2494    if (output_ready(avctx, avctx->internal->draining)) {
2495        av_fifo_read(ctx->output_surface_ready_queue, &tmp_out_surf, 1);
2496
2497        res = nvenc_push_context(avctx);
2498        if (res < 0)
2499            return res;
2500
2501        res = process_output_surface(avctx, pkt, tmp_out_surf);
2502
2503        res2 = nvenc_pop_context(avctx);
2504        if (res2 < 0)
2505            return res2;
2506
2507        if (res)
2508            return res;
2509
2510        av_fifo_write(ctx->unused_surface_queue, &tmp_out_surf, 1);
2511    } else if (avctx->internal->draining) {
2512        return AVERROR_EOF;
2513    } else {
2514        return AVERROR(EAGAIN);
2515    }
2516
2517    return 0;
2518}
2519
2520av_cold void ff_nvenc_encode_flush(AVCodecContext *avctx)
2521{
2522    NvencContext *ctx = avctx->priv_data;
2523
2524    nvenc_send_frame(avctx, NULL);
2525    av_fifo_reset2(ctx->timestamp_list);
2526}
2527