1 /*
2 * H.264/HEVC hardware encoding using nvidia nvenc
3 * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "config.h"
23 #include "config_components.h"
24
25 #include "nvenc.h"
26 #include "hevc_sei.h"
27
28 #include "libavutil/hwcontext_cuda.h"
29 #include "libavutil/hwcontext.h"
30 #include "libavutil/cuda_check.h"
31 #include "libavutil/imgutils.h"
32 #include "libavutil/mem.h"
33 #include "libavutil/pixdesc.h"
34 #include "atsc_a53.h"
35 #include "encode.h"
36 #include "internal.h"
37 #include "packet_internal.h"
38
39 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
40
41 #define NVENC_CAP 0x30
42 #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
43 rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
44 rc == NV_ENC_PARAMS_RC_CBR_HQ)
45
46 const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
47 AV_PIX_FMT_YUV420P,
48 AV_PIX_FMT_NV12,
49 AV_PIX_FMT_P010,
50 AV_PIX_FMT_YUV444P,
51 AV_PIX_FMT_P016, // Truncated to 10bits
52 AV_PIX_FMT_YUV444P16, // Truncated to 10bits
53 AV_PIX_FMT_0RGB32,
54 AV_PIX_FMT_RGB32,
55 AV_PIX_FMT_0BGR32,
56 AV_PIX_FMT_BGR32,
57 AV_PIX_FMT_X2RGB10,
58 AV_PIX_FMT_X2BGR10,
59 AV_PIX_FMT_GBRP,
60 AV_PIX_FMT_GBRP16, // Truncated to 10bits
61 AV_PIX_FMT_CUDA,
62 #if CONFIG_D3D11VA
63 AV_PIX_FMT_D3D11,
64 #endif
65 AV_PIX_FMT_NONE
66 };
67
68 const AVCodecHWConfigInternal *const ff_nvenc_hw_configs[] = {
69 HW_CONFIG_ENCODER_FRAMES(CUDA, CUDA),
70 HW_CONFIG_ENCODER_DEVICE(NONE, CUDA),
71 #if CONFIG_D3D11VA
72 HW_CONFIG_ENCODER_FRAMES(D3D11, D3D11VA),
73 HW_CONFIG_ENCODER_DEVICE(NONE, D3D11VA),
74 #endif
75 NULL,
76 };
77
78 #define IS_10BIT(pix_fmt) (pix_fmt == AV_PIX_FMT_P010 || \
79 pix_fmt == AV_PIX_FMT_P016 || \
80 pix_fmt == AV_PIX_FMT_YUV444P16 || \
81 pix_fmt == AV_PIX_FMT_GBRP16)
82
83 #define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \
84 pix_fmt == AV_PIX_FMT_YUV444P16 || \
85 pix_fmt == AV_PIX_FMT_GBRP || \
86 pix_fmt == AV_PIX_FMT_GBRP16)
87
88 #define IS_GBRP(pix_fmt) (pix_fmt == AV_PIX_FMT_GBRP || \
89 pix_fmt == AV_PIX_FMT_GBRP16)
90
91 static const struct {
92 NVENCSTATUS nverr;
93 int averr;
94 const char *desc;
95 } nvenc_errors[] = {
96 { NV_ENC_SUCCESS, 0, "success" },
97 { NV_ENC_ERR_NO_ENCODE_DEVICE, AVERROR(ENOENT), "no encode device" },
98 { NV_ENC_ERR_UNSUPPORTED_DEVICE, AVERROR(ENOSYS), "unsupported device" },
99 { NV_ENC_ERR_INVALID_ENCODERDEVICE, AVERROR(EINVAL), "invalid encoder device" },
100 { NV_ENC_ERR_INVALID_DEVICE, AVERROR(EINVAL), "invalid device" },
101 { NV_ENC_ERR_DEVICE_NOT_EXIST, AVERROR(EIO), "device does not exist" },
102 { NV_ENC_ERR_INVALID_PTR, AVERROR(EFAULT), "invalid ptr" },
103 { NV_ENC_ERR_INVALID_EVENT, AVERROR(EINVAL), "invalid event" },
104 { NV_ENC_ERR_INVALID_PARAM, AVERROR(EINVAL), "invalid param" },
105 { NV_ENC_ERR_INVALID_CALL, AVERROR(EINVAL), "invalid call" },
106 { NV_ENC_ERR_OUT_OF_MEMORY, AVERROR(ENOMEM), "out of memory" },
107 { NV_ENC_ERR_ENCODER_NOT_INITIALIZED, AVERROR(EINVAL), "encoder not initialized" },
108 { NV_ENC_ERR_UNSUPPORTED_PARAM, AVERROR(ENOSYS), "unsupported param" },
109 { NV_ENC_ERR_LOCK_BUSY, AVERROR(EAGAIN), "lock busy" },
110 { NV_ENC_ERR_NOT_ENOUGH_BUFFER, AVERROR_BUFFER_TOO_SMALL, "not enough buffer"},
111 { NV_ENC_ERR_INVALID_VERSION, AVERROR(EINVAL), "invalid version" },
112 { NV_ENC_ERR_MAP_FAILED, AVERROR(EIO), "map failed" },
113 { NV_ENC_ERR_NEED_MORE_INPUT, AVERROR(EAGAIN), "need more input" },
114 { NV_ENC_ERR_ENCODER_BUSY, AVERROR(EAGAIN), "encoder busy" },
115 { NV_ENC_ERR_EVENT_NOT_REGISTERD, AVERROR(EBADF), "event not registered" },
116 { NV_ENC_ERR_GENERIC, AVERROR_UNKNOWN, "generic error" },
117 { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY, AVERROR(EINVAL), "incompatible client key" },
118 { NV_ENC_ERR_UNIMPLEMENTED, AVERROR(ENOSYS), "unimplemented" },
119 { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO), "resource register failed" },
120 { NV_ENC_ERR_RESOURCE_NOT_REGISTERED, AVERROR(EBADF), "resource not registered" },
121 { NV_ENC_ERR_RESOURCE_NOT_MAPPED, AVERROR(EBADF), "resource not mapped" },
122 };
123
nvenc_map_error(NVENCSTATUS err, const char **desc)124 static int nvenc_map_error(NVENCSTATUS err, const char **desc)
125 {
126 int i;
127 for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) {
128 if (nvenc_errors[i].nverr == err) {
129 if (desc)
130 *desc = nvenc_errors[i].desc;
131 return nvenc_errors[i].averr;
132 }
133 }
134 if (desc)
135 *desc = "unknown error";
136 return AVERROR_UNKNOWN;
137 }
138
nvenc_print_error(AVCodecContext *avctx, NVENCSTATUS err, const char *error_string)139 static int nvenc_print_error(AVCodecContext *avctx, NVENCSTATUS err,
140 const char *error_string)
141 {
142 const char *desc;
143 const char *details = "(no details)";
144 int ret = nvenc_map_error(err, &desc);
145
146 #ifdef NVENC_HAVE_GETLASTERRORSTRING
147 NvencContext *ctx = avctx->priv_data;
148 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
149
150 if (p_nvenc && ctx->nvencoder)
151 details = p_nvenc->nvEncGetLastErrorString(ctx->nvencoder);
152 #endif
153
154 av_log(avctx, AV_LOG_ERROR, "%s: %s (%d): %s\n", error_string, desc, err, details);
155
156 return ret;
157 }
158
159 typedef struct GUIDTuple {
160 const GUID guid;
161 int flags;
162 } GUIDTuple;
163
164 #define PRESET_ALIAS(alias, name, ...) \
165 [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
166
167 #define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
168
nvenc_map_preset(NvencContext *ctx)169 static void nvenc_map_preset(NvencContext *ctx)
170 {
171 GUIDTuple presets[] = {
172 #ifdef NVENC_HAVE_NEW_PRESETS
173 PRESET(P1),
174 PRESET(P2),
175 PRESET(P3),
176 PRESET(P4),
177 PRESET(P5),
178 PRESET(P6),
179 PRESET(P7),
180 PRESET_ALIAS(SLOW, P7, NVENC_TWO_PASSES),
181 PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS),
182 PRESET_ALIAS(FAST, P1, NVENC_ONE_PASS),
183 // Compat aliases
184 PRESET_ALIAS(DEFAULT, P4, NVENC_DEPRECATED_PRESET),
185 PRESET_ALIAS(HP, P1, NVENC_DEPRECATED_PRESET),
186 PRESET_ALIAS(HQ, P7, NVENC_DEPRECATED_PRESET),
187 PRESET_ALIAS(BD, P5, NVENC_DEPRECATED_PRESET),
188 PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
189 PRESET_ALIAS(LOW_LATENCY_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
190 PRESET_ALIAS(LOW_LATENCY_HQ, P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY),
191 PRESET_ALIAS(LOSSLESS_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
192 PRESET_ALIAS(LOSSLESS_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS),
193 #else
194 PRESET(DEFAULT),
195 PRESET(HP),
196 PRESET(HQ),
197 PRESET(BD),
198 PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES),
199 PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS),
200 PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS),
201 PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
202 PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY),
203 PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY),
204 PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS),
205 PRESET(LOSSLESS_HP, NVENC_LOSSLESS),
206 #endif
207 };
208
209 GUIDTuple *t = &presets[ctx->preset];
210
211 ctx->init_encode_params.presetGUID = t->guid;
212 ctx->flags = t->flags;
213
214 #ifdef NVENC_HAVE_NEW_PRESETS
215 if (ctx->tuning_info == NV_ENC_TUNING_INFO_LOSSLESS)
216 ctx->flags |= NVENC_LOSSLESS;
217 #endif
218 }
219
220 #undef PRESET
221 #undef PRESET_ALIAS
222
nvenc_print_driver_requirement(AVCodecContext *avctx, int level)223 static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
224 {
225 #if NVENCAPI_CHECK_VERSION(11, 2)
226 const char *minver = "(unknown)";
227 #elif NVENCAPI_CHECK_VERSION(11, 1)
228 # if defined(_WIN32) || defined(__CYGWIN__)
229 const char *minver = "471.41";
230 # else
231 const char *minver = "470.57.02";
232 # endif
233 #elif NVENCAPI_CHECK_VERSION(11, 0)
234 # if defined(_WIN32) || defined(__CYGWIN__)
235 const char *minver = "456.71";
236 # else
237 const char *minver = "455.28";
238 # endif
239 #elif NVENCAPI_CHECK_VERSION(10, 0)
240 # if defined(_WIN32) || defined(__CYGWIN__)
241 const char *minver = "450.51";
242 # else
243 const char *minver = "445.87";
244 # endif
245 #elif NVENCAPI_CHECK_VERSION(9, 1)
246 # if defined(_WIN32) || defined(__CYGWIN__)
247 const char *minver = "436.15";
248 # else
249 const char *minver = "435.21";
250 # endif
251 #elif NVENCAPI_CHECK_VERSION(9, 0)
252 # if defined(_WIN32) || defined(__CYGWIN__)
253 const char *minver = "418.81";
254 # else
255 const char *minver = "418.30";
256 # endif
257 #elif NVENCAPI_CHECK_VERSION(8, 2)
258 # if defined(_WIN32) || defined(__CYGWIN__)
259 const char *minver = "397.93";
260 # else
261 const char *minver = "396.24";
262 #endif
263 #elif NVENCAPI_CHECK_VERSION(8, 1)
264 # if defined(_WIN32) || defined(__CYGWIN__)
265 const char *minver = "390.77";
266 # else
267 const char *minver = "390.25";
268 # endif
269 #else
270 # if defined(_WIN32) || defined(__CYGWIN__)
271 const char *minver = "378.66";
272 # else
273 const char *minver = "378.13";
274 # endif
275 #endif
276 av_log(avctx, level, "The minimum required Nvidia driver for nvenc is %s or newer\n", minver);
277 }
278
nvenc_load_libraries(AVCodecContext *avctx)279 static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
280 {
281 NvencContext *ctx = avctx->priv_data;
282 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
283 NVENCSTATUS err;
284 uint32_t nvenc_max_ver;
285 int ret;
286
287 ret = cuda_load_functions(&dl_fn->cuda_dl, avctx);
288 if (ret < 0)
289 return ret;
290
291 ret = nvenc_load_functions(&dl_fn->nvenc_dl, avctx);
292 if (ret < 0) {
293 nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
294 return ret;
295 }
296
297 err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver);
298 if (err != NV_ENC_SUCCESS)
299 return nvenc_print_error(avctx, err, "Failed to query nvenc max version");
300
301 av_log(avctx, AV_LOG_VERBOSE, "Loaded Nvenc version %d.%d\n", nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
302
303 if ((NVENCAPI_MAJOR_VERSION << 4 | NVENCAPI_MINOR_VERSION) > nvenc_max_ver) {
304 av_log(avctx, AV_LOG_ERROR, "Driver does not support the required nvenc API version. "
305 "Required: %d.%d Found: %d.%d\n",
306 NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION,
307 nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
308 nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
309 return AVERROR(ENOSYS);
310 }
311
312 dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
313
314 err = dl_fn->nvenc_dl->NvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
315 if (err != NV_ENC_SUCCESS)
316 return nvenc_print_error(avctx, err, "Failed to create nvenc instance");
317
318 av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
319
320 return 0;
321 }
322
nvenc_push_context(AVCodecContext *avctx)323 static int nvenc_push_context(AVCodecContext *avctx)
324 {
325 NvencContext *ctx = avctx->priv_data;
326 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
327
328 if (ctx->d3d11_device)
329 return 0;
330
331 return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context));
332 }
333
nvenc_pop_context(AVCodecContext *avctx)334 static int nvenc_pop_context(AVCodecContext *avctx)
335 {
336 NvencContext *ctx = avctx->priv_data;
337 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
338 CUcontext dummy;
339
340 if (ctx->d3d11_device)
341 return 0;
342
343 return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy));
344 }
345
nvenc_open_session(AVCodecContext *avctx)346 static av_cold int nvenc_open_session(AVCodecContext *avctx)
347 {
348 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
349 NvencContext *ctx = avctx->priv_data;
350 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
351 NVENCSTATUS ret;
352
353 params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
354 params.apiVersion = NVENCAPI_VERSION;
355 if (ctx->d3d11_device) {
356 params.device = ctx->d3d11_device;
357 params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
358 } else {
359 params.device = ctx->cu_context;
360 params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
361 }
362
363 ret = p_nvenc->nvEncOpenEncodeSessionEx(¶ms, &ctx->nvencoder);
364 if (ret != NV_ENC_SUCCESS) {
365 ctx->nvencoder = NULL;
366 return nvenc_print_error(avctx, ret, "OpenEncodeSessionEx failed");
367 }
368
369 return 0;
370 }
371
nvenc_check_codec_support(AVCodecContext *avctx)372 static int nvenc_check_codec_support(AVCodecContext *avctx)
373 {
374 NvencContext *ctx = avctx->priv_data;
375 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
376 int i, ret, count = 0;
377 GUID *guids = NULL;
378
379 ret = p_nvenc->nvEncGetEncodeGUIDCount(ctx->nvencoder, &count);
380
381 if (ret != NV_ENC_SUCCESS || !count)
382 return AVERROR(ENOSYS);
383
384 guids = av_malloc(count * sizeof(GUID));
385 if (!guids)
386 return AVERROR(ENOMEM);
387
388 ret = p_nvenc->nvEncGetEncodeGUIDs(ctx->nvencoder, guids, count, &count);
389 if (ret != NV_ENC_SUCCESS) {
390 ret = AVERROR(ENOSYS);
391 goto fail;
392 }
393
394 ret = AVERROR(ENOSYS);
395 for (i = 0; i < count; i++) {
396 if (!memcmp(&guids[i], &ctx->init_encode_params.encodeGUID, sizeof(*guids))) {
397 ret = 0;
398 break;
399 }
400 }
401
402 fail:
403 av_free(guids);
404
405 return ret;
406 }
407
nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)408 static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
409 {
410 NvencContext *ctx = avctx->priv_data;
411 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
412 NV_ENC_CAPS_PARAM params = { 0 };
413 int ret, val = 0;
414
415 params.version = NV_ENC_CAPS_PARAM_VER;
416 params.capsToQuery = cap;
417
418 ret = p_nvenc->nvEncGetEncodeCaps(ctx->nvencoder, ctx->init_encode_params.encodeGUID, ¶ms, &val);
419
420 if (ret == NV_ENC_SUCCESS)
421 return val;
422 return 0;
423 }
424
nvenc_check_capabilities(AVCodecContext *avctx)425 static int nvenc_check_capabilities(AVCodecContext *avctx)
426 {
427 NvencContext *ctx = avctx->priv_data;
428 int ret;
429
430 ret = nvenc_check_codec_support(avctx);
431 if (ret < 0) {
432 av_log(avctx, AV_LOG_WARNING, "Codec not supported\n");
433 return ret;
434 }
435
436 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
437 if (IS_YUV444(ctx->data_pix_fmt) && ret <= 0) {
438 av_log(avctx, AV_LOG_WARNING, "YUV444P not supported\n");
439 return AVERROR(ENOSYS);
440 }
441
442 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
443 if (ctx->flags & NVENC_LOSSLESS && ret <= 0) {
444 av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n");
445 return AVERROR(ENOSYS);
446 }
447
448 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX);
449 if (ret < avctx->width) {
450 av_log(avctx, AV_LOG_WARNING, "Width %d exceeds %d\n",
451 avctx->width, ret);
452 return AVERROR(ENOSYS);
453 }
454
455 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX);
456 if (ret < avctx->height) {
457 av_log(avctx, AV_LOG_WARNING, "Height %d exceeds %d\n",
458 avctx->height, ret);
459 return AVERROR(ENOSYS);
460 }
461
462 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES);
463 if (ret < avctx->max_b_frames) {
464 av_log(avctx, AV_LOG_WARNING, "Max B-frames %d exceed %d\n",
465 avctx->max_b_frames, ret);
466
467 return AVERROR(ENOSYS);
468 }
469
470 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_FIELD_ENCODING);
471 if (ret < 1 && avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
472 av_log(avctx, AV_LOG_WARNING,
473 "Interlaced encoding is not supported. Supported level: %d\n",
474 ret);
475 return AVERROR(ENOSYS);
476 }
477
478 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
479 if (IS_10BIT(ctx->data_pix_fmt) && ret <= 0) {
480 av_log(avctx, AV_LOG_WARNING, "10 bit encode not supported\n");
481 return AVERROR(ENOSYS);
482 }
483
484 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD);
485 if (ctx->rc_lookahead > 0 && ret <= 0) {
486 av_log(avctx, AV_LOG_WARNING, "RC lookahead not supported\n");
487 return AVERROR(ENOSYS);
488 }
489
490 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ);
491 if (ctx->temporal_aq > 0 && ret <= 0) {
492 av_log(avctx, AV_LOG_WARNING, "Temporal AQ not supported\n");
493 return AVERROR(ENOSYS);
494 }
495
496 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION);
497 if (ctx->weighted_pred > 0 && ret <= 0) {
498 av_log (avctx, AV_LOG_WARNING, "Weighted Prediction not supported\n");
499 return AVERROR(ENOSYS);
500 }
501
502 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_CABAC);
503 if (ctx->coder == NV_ENC_H264_ENTROPY_CODING_MODE_CABAC && ret <= 0) {
504 av_log(avctx, AV_LOG_WARNING, "CABAC entropy coding not supported\n");
505 return AVERROR(ENOSYS);
506 }
507
508 #ifdef NVENC_HAVE_BFRAME_REF_MODE
509 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE);
510 if (ctx->b_ref_mode == NV_ENC_BFRAME_REF_MODE_EACH && ret != 1 && ret != 3) {
511 av_log(avctx, AV_LOG_WARNING, "Each B frame as reference is not supported\n");
512 return AVERROR(ENOSYS);
513 } else if (ctx->b_ref_mode != NV_ENC_BFRAME_REF_MODE_DISABLED && ret == 0) {
514 av_log(avctx, AV_LOG_WARNING, "B frames as references are not supported\n");
515 return AVERROR(ENOSYS);
516 }
517 #else
518 if (ctx->b_ref_mode != 0) {
519 av_log(avctx, AV_LOG_WARNING, "B frames as references need SDK 8.1 at build time\n");
520 return AVERROR(ENOSYS);
521 }
522 #endif
523
524 #ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
525 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES);
526 if(avctx->refs != NV_ENC_NUM_REF_FRAMES_AUTOSELECT && ret <= 0) {
527 av_log(avctx, AV_LOG_WARNING, "Multiple reference frames are not supported by the device\n");
528 return AVERROR(ENOSYS);
529 }
530 #else
531 if(avctx->refs != 0) {
532 av_log(avctx, AV_LOG_WARNING, "Multiple reference frames need SDK 9.1 at build time\n");
533 return AVERROR(ENOSYS);
534 }
535 #endif
536
537 #ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH
538 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SINGLE_SLICE_INTRA_REFRESH);
539 if(ctx->single_slice_intra_refresh && ret <= 0) {
540 av_log(avctx, AV_LOG_WARNING, "Single slice intra refresh not supported by the device\n");
541 return AVERROR(ENOSYS);
542 }
543 #else
544 if(ctx->single_slice_intra_refresh) {
545 av_log(avctx, AV_LOG_WARNING, "Single slice intra refresh needs SDK 11.1 at build time\n");
546 return AVERROR(ENOSYS);
547 }
548 #endif
549
550 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_INTRA_REFRESH);
551 if((ctx->intra_refresh || ctx->single_slice_intra_refresh) && ret <= 0) {
552 av_log(avctx, AV_LOG_WARNING, "Intra refresh not supported by the device\n");
553 return AVERROR(ENOSYS);
554 }
555
556 #ifndef NVENC_HAVE_HEVC_CONSTRAINED_ENCODING
557 if (ctx->constrained_encoding && avctx->codec->id == AV_CODEC_ID_HEVC) {
558 av_log(avctx, AV_LOG_WARNING, "HEVC constrained encoding needs SDK 10.0 at build time\n");
559 return AVERROR(ENOSYS);
560 }
561 #endif
562
563 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING);
564 if(ctx->constrained_encoding && ret <= 0) {
565 av_log(avctx, AV_LOG_WARNING, "Constrained encoding not supported by the device\n");
566 return AVERROR(ENOSYS);
567 }
568
569 ctx->support_dyn_bitrate = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
570
571 return 0;
572 }
573
nvenc_check_device(AVCodecContext *avctx, int idx)574 static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
575 {
576 NvencContext *ctx = avctx->priv_data;
577 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
578 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
579 char name[128] = { 0};
580 int major, minor, ret;
581 CUdevice cu_device;
582 int loglevel = AV_LOG_VERBOSE;
583
584 if (ctx->device == LIST_DEVICES)
585 loglevel = AV_LOG_INFO;
586
587 ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx));
588 if (ret < 0)
589 return ret;
590
591 ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device));
592 if (ret < 0)
593 return ret;
594
595 ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device));
596 if (ret < 0)
597 return ret;
598
599 av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
600 if (((major << 4) | minor) < NVENC_CAP) {
601 av_log(avctx, loglevel, "does not support NVENC\n");
602 goto fail;
603 }
604
605 if (ctx->device != idx && ctx->device != ANY_DEVICE)
606 return -1;
607
608 ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device));
609 if (ret < 0)
610 goto fail;
611
612 ctx->cu_context = ctx->cu_context_internal;
613 ctx->cu_stream = NULL;
614
615 if ((ret = nvenc_pop_context(avctx)) < 0)
616 goto fail2;
617
618 if ((ret = nvenc_open_session(avctx)) < 0)
619 goto fail2;
620
621 if ((ret = nvenc_check_capabilities(avctx)) < 0)
622 goto fail3;
623
624 av_log(avctx, loglevel, "supports NVENC\n");
625
626 dl_fn->nvenc_device_count++;
627
628 if (ctx->device == idx || ctx->device == ANY_DEVICE)
629 return 0;
630
631 fail3:
632 if ((ret = nvenc_push_context(avctx)) < 0)
633 return ret;
634
635 p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
636 ctx->nvencoder = NULL;
637
638 if ((ret = nvenc_pop_context(avctx)) < 0)
639 return ret;
640
641 fail2:
642 CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
643 ctx->cu_context_internal = NULL;
644
645 fail:
646 return AVERROR(ENOSYS);
647 }
648
nvenc_setup_device(AVCodecContext *avctx)649 static av_cold int nvenc_setup_device(AVCodecContext *avctx)
650 {
651 NvencContext *ctx = avctx->priv_data;
652 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
653
654 switch (avctx->codec->id) {
655 case AV_CODEC_ID_H264:
656 ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
657 break;
658 case AV_CODEC_ID_HEVC:
659 ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
660 break;
661 default:
662 return AVERROR_BUG;
663 }
664
665 nvenc_map_preset(ctx);
666
667 if (ctx->flags & NVENC_DEPRECATED_PRESET)
668 av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n");
669
670 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
671 AVHWFramesContext *frames_ctx;
672 AVHWDeviceContext *hwdev_ctx;
673 AVCUDADeviceContext *cuda_device_hwctx = NULL;
674 #if CONFIG_D3D11VA
675 AVD3D11VADeviceContext *d3d11_device_hwctx = NULL;
676 #endif
677 int ret;
678
679 if (avctx->hw_frames_ctx) {
680 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
681 if (frames_ctx->format == AV_PIX_FMT_CUDA)
682 cuda_device_hwctx = frames_ctx->device_ctx->hwctx;
683 #if CONFIG_D3D11VA
684 else if (frames_ctx->format == AV_PIX_FMT_D3D11)
685 d3d11_device_hwctx = frames_ctx->device_ctx->hwctx;
686 #endif
687 else
688 return AVERROR(EINVAL);
689 } else if (avctx->hw_device_ctx) {
690 hwdev_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
691 if (hwdev_ctx->type == AV_HWDEVICE_TYPE_CUDA)
692 cuda_device_hwctx = hwdev_ctx->hwctx;
693 #if CONFIG_D3D11VA
694 else if (hwdev_ctx->type == AV_HWDEVICE_TYPE_D3D11VA)
695 d3d11_device_hwctx = hwdev_ctx->hwctx;
696 #endif
697 else
698 return AVERROR(EINVAL);
699 } else {
700 return AVERROR(EINVAL);
701 }
702
703 if (cuda_device_hwctx) {
704 ctx->cu_context = cuda_device_hwctx->cuda_ctx;
705 ctx->cu_stream = cuda_device_hwctx->stream;
706 }
707 #if CONFIG_D3D11VA
708 else if (d3d11_device_hwctx) {
709 ctx->d3d11_device = d3d11_device_hwctx->device;
710 ID3D11Device_AddRef(ctx->d3d11_device);
711 }
712 #endif
713
714 ret = nvenc_open_session(avctx);
715 if (ret < 0)
716 return ret;
717
718 ret = nvenc_check_capabilities(avctx);
719 if (ret < 0) {
720 av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n");
721 return ret;
722 }
723 } else {
724 int i, nb_devices = 0;
725
726 if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0)
727 return AVERROR_UNKNOWN;
728
729 if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0)
730 return AVERROR_UNKNOWN;
731
732 if (!nb_devices) {
733 av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
734 return AVERROR_EXTERNAL;
735 }
736
737 av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", nb_devices);
738
739 dl_fn->nvenc_device_count = 0;
740 for (i = 0; i < nb_devices; ++i) {
741 if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES)
742 return 0;
743 }
744
745 if (ctx->device == LIST_DEVICES)
746 return AVERROR_EXIT;
747
748 if (!dl_fn->nvenc_device_count) {
749 av_log(avctx, AV_LOG_FATAL, "No capable devices found\n");
750 return AVERROR_EXTERNAL;
751 }
752
753 av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->device, nb_devices);
754 return AVERROR(EINVAL);
755 }
756
757 return 0;
758 }
759
set_constqp(AVCodecContext *avctx)760 static av_cold void set_constqp(AVCodecContext *avctx)
761 {
762 NvencContext *ctx = avctx->priv_data;
763 NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
764
765 rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
766
767 if (ctx->init_qp_p >= 0) {
768 rc->constQP.qpInterP = ctx->init_qp_p;
769 if (ctx->init_qp_i >= 0 && ctx->init_qp_b >= 0) {
770 rc->constQP.qpIntra = ctx->init_qp_i;
771 rc->constQP.qpInterB = ctx->init_qp_b;
772 } else if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
773 rc->constQP.qpIntra = av_clip(
774 rc->constQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
775 rc->constQP.qpInterB = av_clip(
776 rc->constQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
777 } else {
778 rc->constQP.qpIntra = rc->constQP.qpInterP;
779 rc->constQP.qpInterB = rc->constQP.qpInterP;
780 }
781 } else if (ctx->cqp >= 0) {
782 rc->constQP.qpInterP = rc->constQP.qpInterB = rc->constQP.qpIntra = ctx->cqp;
783 if (avctx->b_quant_factor != 0.0)
784 rc->constQP.qpInterB = av_clip(ctx->cqp * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
785 if (avctx->i_quant_factor != 0.0)
786 rc->constQP.qpIntra = av_clip(ctx->cqp * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
787 }
788
789 avctx->qmin = -1;
790 avctx->qmax = -1;
791 }
792
set_vbr(AVCodecContext *avctx)793 static av_cold void set_vbr(AVCodecContext *avctx)
794 {
795 NvencContext *ctx = avctx->priv_data;
796 NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
797 int qp_inter_p;
798
799 if (avctx->qmin >= 0 && avctx->qmax >= 0) {
800 rc->enableMinQP = 1;
801 rc->enableMaxQP = 1;
802
803 rc->minQP.qpInterB = avctx->qmin;
804 rc->minQP.qpInterP = avctx->qmin;
805 rc->minQP.qpIntra = avctx->qmin;
806
807 rc->maxQP.qpInterB = avctx->qmax;
808 rc->maxQP.qpInterP = avctx->qmax;
809 rc->maxQP.qpIntra = avctx->qmax;
810
811 qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
812 } else if (avctx->qmin >= 0) {
813 rc->enableMinQP = 1;
814
815 rc->minQP.qpInterB = avctx->qmin;
816 rc->minQP.qpInterP = avctx->qmin;
817 rc->minQP.qpIntra = avctx->qmin;
818
819 qp_inter_p = avctx->qmin;
820 } else {
821 qp_inter_p = 26; // default to 26
822 }
823
824 rc->enableInitialRCQP = 1;
825
826 if (ctx->init_qp_p < 0) {
827 rc->initialRCQP.qpInterP = qp_inter_p;
828 } else {
829 rc->initialRCQP.qpInterP = ctx->init_qp_p;
830 }
831
832 if (ctx->init_qp_i < 0) {
833 if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
834 rc->initialRCQP.qpIntra = av_clip(
835 rc->initialRCQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
836 } else {
837 rc->initialRCQP.qpIntra = rc->initialRCQP.qpInterP;
838 }
839 } else {
840 rc->initialRCQP.qpIntra = ctx->init_qp_i;
841 }
842
843 if (ctx->init_qp_b < 0) {
844 if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
845 rc->initialRCQP.qpInterB = av_clip(
846 rc->initialRCQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
847 } else {
848 rc->initialRCQP.qpInterB = rc->initialRCQP.qpInterP;
849 }
850 } else {
851 rc->initialRCQP.qpInterB = ctx->init_qp_b;
852 }
853 }
854
set_lossless(AVCodecContext *avctx)855 static av_cold void set_lossless(AVCodecContext *avctx)
856 {
857 NvencContext *ctx = avctx->priv_data;
858 NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
859
860 rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
861 rc->constQP.qpInterB = 0;
862 rc->constQP.qpInterP = 0;
863 rc->constQP.qpIntra = 0;
864
865 avctx->qmin = -1;
866 avctx->qmax = -1;
867 }
868
nvenc_override_rate_control(AVCodecContext *avctx)869 static void nvenc_override_rate_control(AVCodecContext *avctx)
870 {
871 NvencContext *ctx = avctx->priv_data;
872 NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
873
874 switch (ctx->rc) {
875 case NV_ENC_PARAMS_RC_CONSTQP:
876 set_constqp(avctx);
877 return;
878 case NV_ENC_PARAMS_RC_VBR_MINQP:
879 if (avctx->qmin < 0) {
880 av_log(avctx, AV_LOG_WARNING,
881 "The variable bitrate rate-control requires "
882 "the 'qmin' option set.\n");
883 set_vbr(avctx);
884 return;
885 }
886 /* fall through */
887 case NV_ENC_PARAMS_RC_VBR_HQ:
888 case NV_ENC_PARAMS_RC_VBR:
889 set_vbr(avctx);
890 break;
891 case NV_ENC_PARAMS_RC_CBR:
892 case NV_ENC_PARAMS_RC_CBR_HQ:
893 case NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ:
894 break;
895 }
896
897 rc->rateControlMode = ctx->rc;
898 }
899
nvenc_recalc_surfaces(AVCodecContext *avctx)900 static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
901 {
902 NvencContext *ctx = avctx->priv_data;
903 // default minimum of 4 surfaces
904 // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
905 // another multiply by 2 to avoid blocking next PBB group
906 int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
907
908 // lookahead enabled
909 if (ctx->rc_lookahead > 0) {
910 // +1 is to account for lkd_bound calculation later
911 // +4 is to allow sufficient pipelining with lookahead
912 nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
913 if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
914 {
915 av_log(avctx, AV_LOG_WARNING,
916 "Defined rc_lookahead requires more surfaces, "
917 "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
918 }
919 ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
920 } else {
921 if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
922 {
923 av_log(avctx, AV_LOG_WARNING,
924 "Defined b-frame requires more surfaces, "
925 "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
926 ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
927 }
928 else if (ctx->nb_surfaces <= 0)
929 ctx->nb_surfaces = nb_surfaces;
930 // otherwise use user specified value
931 }
932
933 ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
934 ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
935
936 return 0;
937 }
938
nvenc_setup_rate_control(AVCodecContext *avctx)939 static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx)
940 {
941 NvencContext *ctx = avctx->priv_data;
942
943 if (avctx->global_quality > 0)
944 av_log(avctx, AV_LOG_WARNING, "Using global_quality with nvenc is deprecated. Use qp instead.\n");
945
946 if (ctx->cqp < 0 && avctx->global_quality > 0)
947 ctx->cqp = avctx->global_quality;
948
949 if (avctx->bit_rate > 0) {
950 ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
951 } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
952 ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
953 }
954
955 if (avctx->rc_max_rate > 0)
956 ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
957
958 #ifdef NVENC_HAVE_MULTIPASS
959 ctx->encode_config.rcParams.multiPass = ctx->multipass;
960
961 if (ctx->flags & NVENC_ONE_PASS)
962 ctx->encode_config.rcParams.multiPass = NV_ENC_MULTI_PASS_DISABLED;
963 if (ctx->flags & NVENC_TWO_PASSES || ctx->twopass > 0)
964 ctx->encode_config.rcParams.multiPass = NV_ENC_TWO_PASS_FULL_RESOLUTION;
965
966 if (ctx->rc < 0) {
967 if (ctx->cbr) {
968 ctx->rc = NV_ENC_PARAMS_RC_CBR;
969 } else if (ctx->cqp >= 0) {
970 ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
971 } else if (ctx->quality >= 0.0f) {
972 ctx->rc = NV_ENC_PARAMS_RC_VBR;
973 }
974 }
975 #else
976 if (ctx->rc < 0) {
977 if (ctx->flags & NVENC_ONE_PASS)
978 ctx->twopass = 0;
979 if (ctx->flags & NVENC_TWO_PASSES)
980 ctx->twopass = 1;
981
982 if (ctx->twopass < 0)
983 ctx->twopass = (ctx->flags & NVENC_LOWLATENCY) != 0;
984
985 if (ctx->cbr) {
986 if (ctx->twopass) {
987 ctx->rc = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ;
988 } else {
989 ctx->rc = NV_ENC_PARAMS_RC_CBR;
990 }
991 } else if (ctx->cqp >= 0) {
992 ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
993 } else if (ctx->twopass) {
994 ctx->rc = NV_ENC_PARAMS_RC_VBR_HQ;
995 } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
996 ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP;
997 }
998 }
999 #endif
1000
1001 if (ctx->rc >= 0 && ctx->rc & RC_MODE_DEPRECATED) {
1002 av_log(avctx, AV_LOG_WARNING, "Specified rc mode is deprecated.\n");
1003 av_log(avctx, AV_LOG_WARNING, "Use -rc constqp/cbr/vbr, -tune and -multipass instead.\n");
1004
1005 ctx->rc &= ~RC_MODE_DEPRECATED;
1006 }
1007
1008 #ifdef NVENC_HAVE_QP_CHROMA_OFFSETS
1009 ctx->encode_config.rcParams.cbQPIndexOffset = ctx->qp_cb_offset;
1010 ctx->encode_config.rcParams.crQPIndexOffset = ctx->qp_cr_offset;
1011 #else
1012 if (ctx->qp_cb_offset || ctx->qp_cr_offset)
1013 av_log(avctx, AV_LOG_WARNING, "Failed setting QP CB/CR offsets, SDK 11.1 or greater required at compile time.\n");
1014 #endif
1015
1016 #ifdef NVENC_HAVE_LDKFS
1017 if (ctx->ldkfs)
1018 ctx->encode_config.rcParams.lowDelayKeyFrameScale = ctx->ldkfs;
1019 #endif
1020
1021 if (ctx->flags & NVENC_LOSSLESS) {
1022 set_lossless(avctx);
1023 } else if (ctx->rc >= 0) {
1024 nvenc_override_rate_control(avctx);
1025 } else {
1026 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
1027 set_vbr(avctx);
1028 }
1029
1030 if (avctx->rc_buffer_size > 0) {
1031 ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
1032 } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
1033 avctx->rc_buffer_size = ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
1034 }
1035
1036 if (ctx->aq) {
1037 ctx->encode_config.rcParams.enableAQ = 1;
1038 ctx->encode_config.rcParams.aqStrength = ctx->aq_strength;
1039 av_log(avctx, AV_LOG_VERBOSE, "AQ enabled.\n");
1040 }
1041
1042 if (ctx->temporal_aq) {
1043 ctx->encode_config.rcParams.enableTemporalAQ = 1;
1044 av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ enabled.\n");
1045 }
1046
1047 if (ctx->rc_lookahead > 0) {
1048 int lkd_bound = FFMIN(ctx->nb_surfaces, ctx->async_depth) -
1049 ctx->encode_config.frameIntervalP - 4;
1050
1051 if (lkd_bound < 0) {
1052 av_log(avctx, AV_LOG_WARNING,
1053 "Lookahead not enabled. Increase buffer delay (-delay).\n");
1054 } else {
1055 ctx->encode_config.rcParams.enableLookahead = 1;
1056 ctx->encode_config.rcParams.lookaheadDepth = av_clip(ctx->rc_lookahead, 0, lkd_bound);
1057 ctx->encode_config.rcParams.disableIadapt = ctx->no_scenecut;
1058 ctx->encode_config.rcParams.disableBadapt = !ctx->b_adapt;
1059 av_log(avctx, AV_LOG_VERBOSE,
1060 "Lookahead enabled: depth %d, scenecut %s, B-adapt %s.\n",
1061 ctx->encode_config.rcParams.lookaheadDepth,
1062 ctx->encode_config.rcParams.disableIadapt ? "disabled" : "enabled",
1063 ctx->encode_config.rcParams.disableBadapt ? "disabled" : "enabled");
1064 }
1065 }
1066
1067 if (ctx->strict_gop) {
1068 ctx->encode_config.rcParams.strictGOPTarget = 1;
1069 av_log(avctx, AV_LOG_VERBOSE, "Strict GOP target enabled.\n");
1070 }
1071
1072 if (ctx->nonref_p)
1073 ctx->encode_config.rcParams.enableNonRefP = 1;
1074
1075 if (ctx->zerolatency)
1076 ctx->encode_config.rcParams.zeroReorderDelay = 1;
1077
1078 if (ctx->quality) {
1079 //convert from float to fixed point 8.8
1080 int tmp_quality = (int)(ctx->quality * 256.0f);
1081 ctx->encode_config.rcParams.targetQuality = (uint8_t)(tmp_quality >> 8);
1082 ctx->encode_config.rcParams.targetQualityLSB = (uint8_t)(tmp_quality & 0xff);
1083
1084 av_log(avctx, AV_LOG_VERBOSE, "CQ(%d) mode enabled.\n", tmp_quality);
1085
1086 // CQ mode shall discard avg bitrate/vbv buffer size and honor only max bitrate
1087 ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate = 0;
1088 ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size = 0;
1089 ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
1090 }
1091 }
1092
nvenc_setup_h264_config(AVCodecContext *avctx)1093 static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
1094 {
1095 NvencContext *ctx = avctx->priv_data;
1096 NV_ENC_CONFIG *cc = &ctx->encode_config;
1097 NV_ENC_CONFIG_H264 *h264 = &cc->encodeCodecConfig.h264Config;
1098 NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
1099
1100 vui->colourMatrix = IS_GBRP(ctx->data_pix_fmt) ? AVCOL_SPC_RGB : avctx->colorspace;
1101 vui->colourPrimaries = avctx->color_primaries;
1102 vui->transferCharacteristics = avctx->color_trc;
1103 vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
1104 || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
1105
1106 vui->colourDescriptionPresentFlag =
1107 (vui->colourMatrix != 2 || vui->colourPrimaries != 2 || vui->transferCharacteristics != 2);
1108
1109 vui->videoSignalTypePresentFlag =
1110 (vui->colourDescriptionPresentFlag
1111 || vui->videoFormat != 5
1112 || vui->videoFullRangeFlag != 0);
1113
1114 h264->sliceMode = 3;
1115 h264->sliceModeData = avctx->slices > 0 ? avctx->slices : 1;
1116
1117 if (ctx->intra_refresh) {
1118 h264->enableIntraRefresh = 1;
1119 h264->intraRefreshPeriod = avctx->gop_size;
1120 h264->intraRefreshCnt = avctx->gop_size - 1;
1121 #ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH
1122 h264->singleSliceIntraRefresh = ctx->single_slice_intra_refresh;
1123 #endif
1124 }
1125
1126 if (ctx->constrained_encoding)
1127 h264->enableConstrainedEncoding = 1;
1128
1129 h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
1130 h264->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
1131 h264->outputAUD = ctx->aud;
1132
1133 if (ctx->dpb_size >= 0) {
1134 /* 0 means "let the hardware decide" */
1135 h264->maxNumRefFrames = ctx->dpb_size;
1136 }
1137
1138 if (ctx->intra_refresh) {
1139 h264->idrPeriod = NVENC_INFINITE_GOPLENGTH;
1140 } else if (avctx->gop_size >= 0) {
1141 h264->idrPeriod = avctx->gop_size;
1142 }
1143
1144 if (IS_CBR(cc->rcParams.rateControlMode)) {
1145 h264->outputBufferingPeriodSEI = 1;
1146 }
1147
1148 h264->outputPictureTimingSEI = 1;
1149
1150 if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ||
1151 cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_HQ ||
1152 cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_VBR_HQ) {
1153 h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
1154 h264->fmoMode = NV_ENC_H264_FMO_DISABLE;
1155 }
1156
1157 if (ctx->flags & NVENC_LOSSLESS) {
1158 h264->qpPrimeYZeroTransformBypassFlag = 1;
1159 } else {
1160 switch(ctx->profile) {
1161 case NV_ENC_H264_PROFILE_BASELINE:
1162 cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
1163 avctx->profile = FF_PROFILE_H264_BASELINE;
1164 break;
1165 case NV_ENC_H264_PROFILE_MAIN:
1166 cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
1167 avctx->profile = FF_PROFILE_H264_MAIN;
1168 break;
1169 case NV_ENC_H264_PROFILE_HIGH:
1170 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
1171 avctx->profile = FF_PROFILE_H264_HIGH;
1172 break;
1173 case NV_ENC_H264_PROFILE_HIGH_444P:
1174 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
1175 avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
1176 break;
1177 }
1178 }
1179
1180 // force setting profile as high444p if input is AV_PIX_FMT_YUV444P
1181 if (IS_YUV444(ctx->data_pix_fmt)) {
1182 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
1183 avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
1184 }
1185
1186 h264->chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
1187
1188 h264->level = ctx->level;
1189
1190 if (ctx->coder >= 0)
1191 h264->entropyCodingMode = ctx->coder;
1192
1193 #ifdef NVENC_HAVE_BFRAME_REF_MODE
1194 h264->useBFramesAsRef = ctx->b_ref_mode;
1195 #endif
1196
1197 #ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
1198 h264->numRefL0 = avctx->refs;
1199 h264->numRefL1 = avctx->refs;
1200 #endif
1201
1202 return 0;
1203 }
1204
nvenc_setup_hevc_config(AVCodecContext *avctx)1205 static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
1206 {
1207 NvencContext *ctx = avctx->priv_data;
1208 NV_ENC_CONFIG *cc = &ctx->encode_config;
1209 NV_ENC_CONFIG_HEVC *hevc = &cc->encodeCodecConfig.hevcConfig;
1210 NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters;
1211
1212 vui->colourMatrix = IS_GBRP(ctx->data_pix_fmt) ? AVCOL_SPC_RGB : avctx->colorspace;
1213 vui->colourPrimaries = avctx->color_primaries;
1214 vui->transferCharacteristics = avctx->color_trc;
1215 vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
1216 || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
1217
1218 vui->colourDescriptionPresentFlag =
1219 (vui->colourMatrix != 2 || vui->colourPrimaries != 2 || vui->transferCharacteristics != 2);
1220
1221 vui->videoSignalTypePresentFlag =
1222 (vui->colourDescriptionPresentFlag
1223 || vui->videoFormat != 5
1224 || vui->videoFullRangeFlag != 0);
1225
1226 hevc->sliceMode = 3;
1227 hevc->sliceModeData = avctx->slices > 0 ? avctx->slices : 1;
1228
1229 if (ctx->intra_refresh) {
1230 hevc->enableIntraRefresh = 1;
1231 hevc->intraRefreshPeriod = avctx->gop_size;
1232 hevc->intraRefreshCnt = avctx->gop_size - 1;
1233 #ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH
1234 hevc->singleSliceIntraRefresh = ctx->single_slice_intra_refresh;
1235 #endif
1236 }
1237
1238 #ifdef NVENC_HAVE_HEVC_CONSTRAINED_ENCODING
1239 if (ctx->constrained_encoding)
1240 hevc->enableConstrainedEncoding = 1;
1241 #endif
1242
1243 hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
1244 hevc->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
1245 hevc->outputAUD = ctx->aud;
1246
1247 if (ctx->dpb_size >= 0) {
1248 /* 0 means "let the hardware decide" */
1249 hevc->maxNumRefFramesInDPB = ctx->dpb_size;
1250 }
1251
1252 if (ctx->intra_refresh) {
1253 hevc->idrPeriod = NVENC_INFINITE_GOPLENGTH;
1254 } else if (avctx->gop_size >= 0) {
1255 hevc->idrPeriod = avctx->gop_size;
1256 }
1257
1258 if (IS_CBR(cc->rcParams.rateControlMode)) {
1259 hevc->outputBufferingPeriodSEI = 1;
1260 }
1261
1262 hevc->outputPictureTimingSEI = 1;
1263
1264 switch (ctx->profile) {
1265 case NV_ENC_HEVC_PROFILE_MAIN:
1266 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
1267 avctx->profile = FF_PROFILE_HEVC_MAIN;
1268 break;
1269 case NV_ENC_HEVC_PROFILE_MAIN_10:
1270 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
1271 avctx->profile = FF_PROFILE_HEVC_MAIN_10;
1272 break;
1273 case NV_ENC_HEVC_PROFILE_REXT:
1274 cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
1275 avctx->profile = FF_PROFILE_HEVC_REXT;
1276 break;
1277 }
1278
1279 // force setting profile as main10 if input is 10 bit
1280 if (IS_10BIT(ctx->data_pix_fmt)) {
1281 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
1282 avctx->profile = FF_PROFILE_HEVC_MAIN_10;
1283 }
1284
1285 // force setting profile as rext if input is yuv444
1286 if (IS_YUV444(ctx->data_pix_fmt)) {
1287 cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
1288 avctx->profile = FF_PROFILE_HEVC_REXT;
1289 }
1290
1291 hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
1292
1293 hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0;
1294
1295 hevc->level = ctx->level;
1296
1297 hevc->tier = ctx->tier;
1298
1299 #ifdef NVENC_HAVE_HEVC_BFRAME_REF_MODE
1300 hevc->useBFramesAsRef = ctx->b_ref_mode;
1301 #endif
1302
1303 #ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
1304 hevc->numRefL0 = avctx->refs;
1305 hevc->numRefL1 = avctx->refs;
1306 #endif
1307
1308 return 0;
1309 }
1310
nvenc_setup_codec_config(AVCodecContext *avctx)1311 static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx)
1312 {
1313 switch (avctx->codec->id) {
1314 case AV_CODEC_ID_H264:
1315 return nvenc_setup_h264_config(avctx);
1316 case AV_CODEC_ID_HEVC:
1317 return nvenc_setup_hevc_config(avctx);
1318 /* Earlier switch/case will return if unknown codec is passed. */
1319 }
1320
1321 return 0;
1322 }
1323
compute_dar(AVCodecContext *avctx, int *dw, int *dh)1324 static void compute_dar(AVCodecContext *avctx, int *dw, int *dh) {
1325 int sw, sh;
1326
1327 sw = avctx->width;
1328 sh = avctx->height;
1329
1330 if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) {
1331 sw *= avctx->sample_aspect_ratio.num;
1332 sh *= avctx->sample_aspect_ratio.den;
1333 }
1334
1335 av_reduce(dw, dh, sw, sh, 1024 * 1024);
1336 }
1337
nvenc_setup_encoder(AVCodecContext *avctx)1338 static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
1339 {
1340 NvencContext *ctx = avctx->priv_data;
1341 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1342 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1343
1344 NV_ENC_PRESET_CONFIG preset_config = { 0 };
1345 NVENCSTATUS nv_status = NV_ENC_SUCCESS;
1346 AVCPBProperties *cpb_props;
1347 int res = 0;
1348 int dw, dh;
1349
1350 ctx->encode_config.version = NV_ENC_CONFIG_VER;
1351 ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
1352
1353 ctx->init_encode_params.encodeHeight = avctx->height;
1354 ctx->init_encode_params.encodeWidth = avctx->width;
1355
1356 ctx->init_encode_params.encodeConfig = &ctx->encode_config;
1357
1358 preset_config.version = NV_ENC_PRESET_CONFIG_VER;
1359 preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
1360
1361 #ifdef NVENC_HAVE_NEW_PRESETS
1362 ctx->init_encode_params.tuningInfo = ctx->tuning_info;
1363
1364 if (ctx->flags & NVENC_LOSSLESS)
1365 ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOSSLESS;
1366 else if (ctx->flags & NVENC_LOWLATENCY)
1367 ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY;
1368
1369 nv_status = p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder,
1370 ctx->init_encode_params.encodeGUID,
1371 ctx->init_encode_params.presetGUID,
1372 ctx->init_encode_params.tuningInfo,
1373 &preset_config);
1374 #else
1375 nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder,
1376 ctx->init_encode_params.encodeGUID,
1377 ctx->init_encode_params.presetGUID,
1378 &preset_config);
1379 #endif
1380 if (nv_status != NV_ENC_SUCCESS)
1381 return nvenc_print_error(avctx, nv_status, "Cannot get the preset configuration");
1382
1383 memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
1384
1385 ctx->encode_config.version = NV_ENC_CONFIG_VER;
1386
1387 compute_dar(avctx, &dw, &dh);
1388 ctx->init_encode_params.darHeight = dh;
1389 ctx->init_encode_params.darWidth = dw;
1390
1391 if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
1392 ctx->init_encode_params.frameRateNum = avctx->framerate.num;
1393 ctx->init_encode_params.frameRateDen = avctx->framerate.den;
1394 } else {
1395 ctx->init_encode_params.frameRateNum = avctx->time_base.den;
1396 ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
1397 }
1398
1399 ctx->init_encode_params.enableEncodeAsync = 0;
1400 ctx->init_encode_params.enablePTD = 1;
1401
1402 #ifdef NVENC_HAVE_NEW_PRESETS
1403 /* If lookahead isn't set from CLI, use value from preset.
1404 * P6 & P7 presets may enable lookahead for better quality.
1405 * */
1406 if (ctx->rc_lookahead == 0 && ctx->encode_config.rcParams.enableLookahead)
1407 ctx->rc_lookahead = ctx->encode_config.rcParams.lookaheadDepth;
1408 #endif
1409
1410 if (ctx->weighted_pred == 1)
1411 ctx->init_encode_params.enableWeightedPrediction = 1;
1412
1413 if (ctx->bluray_compat) {
1414 ctx->aud = 1;
1415 ctx->dpb_size = FFMIN(FFMAX(avctx->refs, 0), 6);
1416 avctx->max_b_frames = FFMIN(avctx->max_b_frames, 3);
1417 switch (avctx->codec->id) {
1418 case AV_CODEC_ID_H264:
1419 /* maximum level depends on used resolution */
1420 break;
1421 case AV_CODEC_ID_HEVC:
1422 ctx->level = NV_ENC_LEVEL_HEVC_51;
1423 ctx->tier = NV_ENC_TIER_HEVC_HIGH;
1424 break;
1425 }
1426 }
1427
1428 if (avctx->gop_size > 0) {
1429 if (avctx->max_b_frames >= 0) {
1430 /* 0 is intra-only, 1 is I/P only, 2 is one B-Frame, 3 two B-frames, and so on. */
1431 ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
1432 }
1433
1434 ctx->encode_config.gopLength = avctx->gop_size;
1435 } else if (avctx->gop_size == 0) {
1436 ctx->encode_config.frameIntervalP = 0;
1437 ctx->encode_config.gopLength = 1;
1438 }
1439
1440 /* force to enable intra refresh */
1441 if(ctx->single_slice_intra_refresh)
1442 ctx->intra_refresh = 1;
1443
1444 if (ctx->intra_refresh)
1445 ctx->encode_config.gopLength = NVENC_INFINITE_GOPLENGTH;
1446
1447 nvenc_recalc_surfaces(avctx);
1448
1449 nvenc_setup_rate_control(avctx);
1450
1451 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
1452 ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
1453 } else {
1454 ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
1455 }
1456
1457 res = nvenc_setup_codec_config(avctx);
1458 if (res)
1459 return res;
1460
1461 res = nvenc_push_context(avctx);
1462 if (res < 0)
1463 return res;
1464
1465 nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
1466 if (nv_status != NV_ENC_SUCCESS) {
1467 nvenc_pop_context(avctx);
1468 return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
1469 }
1470
1471 #ifdef NVENC_HAVE_CUSTREAM_PTR
1472 if (ctx->cu_context) {
1473 nv_status = p_nvenc->nvEncSetIOCudaStreams(ctx->nvencoder, &ctx->cu_stream, &ctx->cu_stream);
1474 if (nv_status != NV_ENC_SUCCESS) {
1475 nvenc_pop_context(avctx);
1476 return nvenc_print_error(avctx, nv_status, "SetIOCudaStreams failed");
1477 }
1478 }
1479 #endif
1480
1481 res = nvenc_pop_context(avctx);
1482 if (res < 0)
1483 return res;
1484
1485 if (ctx->encode_config.frameIntervalP > 1)
1486 avctx->has_b_frames = 2;
1487
1488 if (ctx->encode_config.rcParams.averageBitRate > 0)
1489 avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
1490
1491 cpb_props = ff_add_cpb_side_data(avctx);
1492 if (!cpb_props)
1493 return AVERROR(ENOMEM);
1494 cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate;
1495 cpb_props->avg_bitrate = avctx->bit_rate;
1496 cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
1497
1498 return 0;
1499 }
1500
nvenc_map_buffer_format(enum AVPixelFormat pix_fmt)1501 static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt)
1502 {
1503 switch (pix_fmt) {
1504 case AV_PIX_FMT_YUV420P:
1505 return NV_ENC_BUFFER_FORMAT_YV12_PL;
1506 case AV_PIX_FMT_NV12:
1507 return NV_ENC_BUFFER_FORMAT_NV12_PL;
1508 case AV_PIX_FMT_P010:
1509 case AV_PIX_FMT_P016:
1510 return NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
1511 case AV_PIX_FMT_GBRP:
1512 case AV_PIX_FMT_YUV444P:
1513 return NV_ENC_BUFFER_FORMAT_YUV444_PL;
1514 case AV_PIX_FMT_GBRP16:
1515 case AV_PIX_FMT_YUV444P16:
1516 return NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
1517 case AV_PIX_FMT_0RGB32:
1518 case AV_PIX_FMT_RGB32:
1519 return NV_ENC_BUFFER_FORMAT_ARGB;
1520 case AV_PIX_FMT_0BGR32:
1521 case AV_PIX_FMT_BGR32:
1522 return NV_ENC_BUFFER_FORMAT_ABGR;
1523 case AV_PIX_FMT_X2RGB10:
1524 return NV_ENC_BUFFER_FORMAT_ARGB10;
1525 case AV_PIX_FMT_X2BGR10:
1526 return NV_ENC_BUFFER_FORMAT_ABGR10;
1527 default:
1528 return NV_ENC_BUFFER_FORMAT_UNDEFINED;
1529 }
1530 }
1531
nvenc_alloc_surface(AVCodecContext *avctx, int idx)1532 static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
1533 {
1534 NvencContext *ctx = avctx->priv_data;
1535 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1536 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1537 NvencSurface* tmp_surface = &ctx->surfaces[idx];
1538
1539 NVENCSTATUS nv_status;
1540 NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
1541 allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
1542
1543 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1544 ctx->surfaces[idx].in_ref = av_frame_alloc();
1545 if (!ctx->surfaces[idx].in_ref)
1546 return AVERROR(ENOMEM);
1547 } else {
1548 NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
1549
1550 ctx->surfaces[idx].format = nvenc_map_buffer_format(ctx->data_pix_fmt);
1551 if (ctx->surfaces[idx].format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
1552 av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
1553 av_get_pix_fmt_name(ctx->data_pix_fmt));
1554 return AVERROR(EINVAL);
1555 }
1556
1557 allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
1558 allocSurf.width = avctx->width;
1559 allocSurf.height = avctx->height;
1560 allocSurf.bufferFmt = ctx->surfaces[idx].format;
1561
1562 nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
1563 if (nv_status != NV_ENC_SUCCESS) {
1564 return nvenc_print_error(avctx, nv_status, "CreateInputBuffer failed");
1565 }
1566
1567 ctx->surfaces[idx].input_surface = allocSurf.inputBuffer;
1568 ctx->surfaces[idx].width = allocSurf.width;
1569 ctx->surfaces[idx].height = allocSurf.height;
1570 }
1571
1572 nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
1573 if (nv_status != NV_ENC_SUCCESS) {
1574 int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
1575 if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
1576 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface);
1577 av_frame_free(&ctx->surfaces[idx].in_ref);
1578 return err;
1579 }
1580
1581 ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
1582
1583 av_fifo_write(ctx->unused_surface_queue, &tmp_surface, 1);
1584
1585 return 0;
1586 }
1587
nvenc_setup_surfaces(AVCodecContext *avctx)1588 static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
1589 {
1590 NvencContext *ctx = avctx->priv_data;
1591 int i, res = 0, res2;
1592
1593 ctx->surfaces = av_calloc(ctx->nb_surfaces, sizeof(*ctx->surfaces));
1594 if (!ctx->surfaces)
1595 return AVERROR(ENOMEM);
1596
1597 ctx->timestamp_list = av_fifo_alloc2(ctx->nb_surfaces, sizeof(int64_t), 0);
1598 if (!ctx->timestamp_list)
1599 return AVERROR(ENOMEM);
1600
1601 ctx->unused_surface_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0);
1602 if (!ctx->unused_surface_queue)
1603 return AVERROR(ENOMEM);
1604
1605 ctx->output_surface_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0);
1606 if (!ctx->output_surface_queue)
1607 return AVERROR(ENOMEM);
1608 ctx->output_surface_ready_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0);
1609 if (!ctx->output_surface_ready_queue)
1610 return AVERROR(ENOMEM);
1611
1612 res = nvenc_push_context(avctx);
1613 if (res < 0)
1614 return res;
1615
1616 for (i = 0; i < ctx->nb_surfaces; i++) {
1617 if ((res = nvenc_alloc_surface(avctx, i)) < 0)
1618 goto fail;
1619 }
1620
1621 fail:
1622 res2 = nvenc_pop_context(avctx);
1623 if (res2 < 0)
1624 return res2;
1625
1626 return res;
1627 }
1628
nvenc_setup_extradata(AVCodecContext *avctx)1629 static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
1630 {
1631 NvencContext *ctx = avctx->priv_data;
1632 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1633 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1634
1635 NVENCSTATUS nv_status;
1636 uint32_t outSize = 0;
1637 char tmpHeader[256];
1638 NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
1639 payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
1640
1641 payload.spsppsBuffer = tmpHeader;
1642 payload.inBufferSize = sizeof(tmpHeader);
1643 payload.outSPSPPSPayloadSize = &outSize;
1644
1645 nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
1646 if (nv_status != NV_ENC_SUCCESS) {
1647 return nvenc_print_error(avctx, nv_status, "GetSequenceParams failed");
1648 }
1649
1650 avctx->extradata_size = outSize;
1651 avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
1652
1653 if (!avctx->extradata) {
1654 return AVERROR(ENOMEM);
1655 }
1656
1657 memcpy(avctx->extradata, tmpHeader, outSize);
1658
1659 return 0;
1660 }
1661
ff_nvenc_encode_close(AVCodecContext *avctx)1662 av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
1663 {
1664 NvencContext *ctx = avctx->priv_data;
1665 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1666 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1667 int i, res;
1668
1669 /* the encoder has to be flushed before it can be closed */
1670 if (ctx->nvencoder) {
1671 NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER,
1672 .encodePicFlags = NV_ENC_PIC_FLAG_EOS };
1673
1674 res = nvenc_push_context(avctx);
1675 if (res < 0)
1676 return res;
1677
1678 p_nvenc->nvEncEncodePicture(ctx->nvencoder, ¶ms);
1679 }
1680
1681 av_fifo_freep2(&ctx->timestamp_list);
1682 av_fifo_freep2(&ctx->output_surface_ready_queue);
1683 av_fifo_freep2(&ctx->output_surface_queue);
1684 av_fifo_freep2(&ctx->unused_surface_queue);
1685
1686 if (ctx->surfaces && (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11)) {
1687 for (i = 0; i < ctx->nb_registered_frames; i++) {
1688 if (ctx->registered_frames[i].mapped)
1689 p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[i].in_map.mappedResource);
1690 if (ctx->registered_frames[i].regptr)
1691 p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
1692 }
1693 ctx->nb_registered_frames = 0;
1694 }
1695
1696 if (ctx->surfaces) {
1697 for (i = 0; i < ctx->nb_surfaces; ++i) {
1698 if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
1699 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
1700 av_frame_free(&ctx->surfaces[i].in_ref);
1701 p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
1702 }
1703 }
1704 av_freep(&ctx->surfaces);
1705 ctx->nb_surfaces = 0;
1706
1707 av_frame_free(&ctx->frame);
1708
1709 av_freep(&ctx->sei_data);
1710
1711 if (ctx->nvencoder) {
1712 p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
1713
1714 res = nvenc_pop_context(avctx);
1715 if (res < 0)
1716 return res;
1717 }
1718 ctx->nvencoder = NULL;
1719
1720 if (ctx->cu_context_internal)
1721 CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
1722 ctx->cu_context = ctx->cu_context_internal = NULL;
1723
1724 #if CONFIG_D3D11VA
1725 if (ctx->d3d11_device) {
1726 ID3D11Device_Release(ctx->d3d11_device);
1727 ctx->d3d11_device = NULL;
1728 }
1729 #endif
1730
1731 nvenc_free_functions(&dl_fn->nvenc_dl);
1732 cuda_free_functions(&dl_fn->cuda_dl);
1733
1734 dl_fn->nvenc_device_count = 0;
1735
1736 av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
1737
1738 return 0;
1739 }
1740
ff_nvenc_encode_init(AVCodecContext *avctx)1741 av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
1742 {
1743 NvencContext *ctx = avctx->priv_data;
1744 int ret;
1745
1746 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1747 AVHWFramesContext *frames_ctx;
1748 if (!avctx->hw_frames_ctx) {
1749 av_log(avctx, AV_LOG_ERROR,
1750 "hw_frames_ctx must be set when using GPU frames as input\n");
1751 return AVERROR(EINVAL);
1752 }
1753 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
1754 if (frames_ctx->format != avctx->pix_fmt) {
1755 av_log(avctx, AV_LOG_ERROR,
1756 "hw_frames_ctx must match the GPU frame type\n");
1757 return AVERROR(EINVAL);
1758 }
1759 ctx->data_pix_fmt = frames_ctx->sw_format;
1760 } else {
1761 ctx->data_pix_fmt = avctx->pix_fmt;
1762 }
1763
1764 ctx->frame = av_frame_alloc();
1765 if (!ctx->frame)
1766 return AVERROR(ENOMEM);
1767
1768 if ((ret = nvenc_load_libraries(avctx)) < 0)
1769 return ret;
1770
1771 if ((ret = nvenc_setup_device(avctx)) < 0)
1772 return ret;
1773
1774 if ((ret = nvenc_setup_encoder(avctx)) < 0)
1775 return ret;
1776
1777 if ((ret = nvenc_setup_surfaces(avctx)) < 0)
1778 return ret;
1779
1780 if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1781 if ((ret = nvenc_setup_extradata(avctx)) < 0)
1782 return ret;
1783 }
1784
1785 return 0;
1786 }
1787
get_free_frame(NvencContext *ctx)1788 static NvencSurface *get_free_frame(NvencContext *ctx)
1789 {
1790 NvencSurface *tmp_surf;
1791
1792 if (av_fifo_read(ctx->unused_surface_queue, &tmp_surf, 1) < 0)
1793 // queue empty
1794 return NULL;
1795
1796 return tmp_surf;
1797 }
1798
nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface, NV_ENC_LOCK_INPUT_BUFFER *lock_buffer_params, const AVFrame *frame)1799 static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
1800 NV_ENC_LOCK_INPUT_BUFFER *lock_buffer_params, const AVFrame *frame)
1801 {
1802 int dst_linesize[4] = {
1803 lock_buffer_params->pitch,
1804 lock_buffer_params->pitch,
1805 lock_buffer_params->pitch,
1806 lock_buffer_params->pitch
1807 };
1808 uint8_t *dst_data[4];
1809 int ret;
1810
1811 if (frame->format == AV_PIX_FMT_YUV420P)
1812 dst_linesize[1] = dst_linesize[2] >>= 1;
1813
1814 ret = av_image_fill_pointers(dst_data, frame->format, nv_surface->height,
1815 lock_buffer_params->bufferDataPtr, dst_linesize);
1816 if (ret < 0)
1817 return ret;
1818
1819 if (frame->format == AV_PIX_FMT_YUV420P)
1820 FFSWAP(uint8_t*, dst_data[1], dst_data[2]);
1821
1822 av_image_copy(dst_data, dst_linesize,
1823 (const uint8_t**)frame->data, frame->linesize, frame->format,
1824 avctx->width, avctx->height);
1825
1826 return 0;
1827 }
1828
nvenc_find_free_reg_resource(AVCodecContext *avctx)1829 static int nvenc_find_free_reg_resource(AVCodecContext *avctx)
1830 {
1831 NvencContext *ctx = avctx->priv_data;
1832 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1833 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1834 NVENCSTATUS nv_status;
1835
1836 int i, first_round;
1837
1838 if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) {
1839 for (first_round = 1; first_round >= 0; first_round--) {
1840 for (i = 0; i < ctx->nb_registered_frames; i++) {
1841 if (!ctx->registered_frames[i].mapped) {
1842 if (ctx->registered_frames[i].regptr) {
1843 if (first_round)
1844 continue;
1845 nv_status = p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
1846 if (nv_status != NV_ENC_SUCCESS)
1847 return nvenc_print_error(avctx, nv_status, "Failed unregistering unused input resource");
1848 ctx->registered_frames[i].ptr = NULL;
1849 ctx->registered_frames[i].regptr = NULL;
1850 }
1851 return i;
1852 }
1853 }
1854 }
1855 } else {
1856 return ctx->nb_registered_frames++;
1857 }
1858
1859 av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n");
1860 return AVERROR(ENOMEM);
1861 }
1862
nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)1863 static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
1864 {
1865 NvencContext *ctx = avctx->priv_data;
1866 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1867 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1868
1869 AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
1870 NV_ENC_REGISTER_RESOURCE reg = { 0 };
1871 int i, idx, ret;
1872
1873 for (i = 0; i < ctx->nb_registered_frames; i++) {
1874 if (avctx->pix_fmt == AV_PIX_FMT_CUDA && ctx->registered_frames[i].ptr == frame->data[0])
1875 return i;
1876 else if (avctx->pix_fmt == AV_PIX_FMT_D3D11 && ctx->registered_frames[i].ptr == frame->data[0] && ctx->registered_frames[i].ptr_index == (intptr_t)frame->data[1])
1877 return i;
1878 }
1879
1880 idx = nvenc_find_free_reg_resource(avctx);
1881 if (idx < 0)
1882 return idx;
1883
1884 reg.version = NV_ENC_REGISTER_RESOURCE_VER;
1885 reg.width = frames_ctx->width;
1886 reg.height = frames_ctx->height;
1887 reg.pitch = frame->linesize[0];
1888 reg.resourceToRegister = frame->data[0];
1889
1890 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1891 reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
1892 }
1893 else if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1894 reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
1895 reg.subResourceIndex = (intptr_t)frame->data[1];
1896 }
1897
1898 reg.bufferFormat = nvenc_map_buffer_format(frames_ctx->sw_format);
1899 if (reg.bufferFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
1900 av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
1901 av_get_pix_fmt_name(frames_ctx->sw_format));
1902 return AVERROR(EINVAL);
1903 }
1904
1905 ret = p_nvenc->nvEncRegisterResource(ctx->nvencoder, ®);
1906 if (ret != NV_ENC_SUCCESS) {
1907 nvenc_print_error(avctx, ret, "Error registering an input resource");
1908 return AVERROR_UNKNOWN;
1909 }
1910
1911 ctx->registered_frames[idx].ptr = frame->data[0];
1912 ctx->registered_frames[idx].ptr_index = reg.subResourceIndex;
1913 ctx->registered_frames[idx].regptr = reg.registeredResource;
1914 return idx;
1915 }
1916
nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame, NvencSurface *nvenc_frame)1917 static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
1918 NvencSurface *nvenc_frame)
1919 {
1920 NvencContext *ctx = avctx->priv_data;
1921 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
1922 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1923
1924 int res;
1925 NVENCSTATUS nv_status;
1926
1927 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
1928 int reg_idx = nvenc_register_frame(avctx, frame);
1929 if (reg_idx < 0) {
1930 av_log(avctx, AV_LOG_ERROR, "Could not register an input HW frame\n");
1931 return reg_idx;
1932 }
1933
1934 res = av_frame_ref(nvenc_frame->in_ref, frame);
1935 if (res < 0)
1936 return res;
1937
1938 if (!ctx->registered_frames[reg_idx].mapped) {
1939 ctx->registered_frames[reg_idx].in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
1940 ctx->registered_frames[reg_idx].in_map.registeredResource = ctx->registered_frames[reg_idx].regptr;
1941 nv_status = p_nvenc->nvEncMapInputResource(ctx->nvencoder, &ctx->registered_frames[reg_idx].in_map);
1942 if (nv_status != NV_ENC_SUCCESS) {
1943 av_frame_unref(nvenc_frame->in_ref);
1944 return nvenc_print_error(avctx, nv_status, "Error mapping an input resource");
1945 }
1946 }
1947
1948 ctx->registered_frames[reg_idx].mapped += 1;
1949
1950 nvenc_frame->reg_idx = reg_idx;
1951 nvenc_frame->input_surface = ctx->registered_frames[reg_idx].in_map.mappedResource;
1952 nvenc_frame->format = ctx->registered_frames[reg_idx].in_map.mappedBufferFmt;
1953 nvenc_frame->pitch = frame->linesize[0];
1954
1955 return 0;
1956 } else {
1957 NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
1958
1959 lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1960 lockBufferParams.inputBuffer = nvenc_frame->input_surface;
1961
1962 nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
1963 if (nv_status != NV_ENC_SUCCESS) {
1964 return nvenc_print_error(avctx, nv_status, "Failed locking nvenc input buffer");
1965 }
1966
1967 nvenc_frame->pitch = lockBufferParams.pitch;
1968 res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame);
1969
1970 nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface);
1971 if (nv_status != NV_ENC_SUCCESS) {
1972 return nvenc_print_error(avctx, nv_status, "Failed unlocking input buffer!");
1973 }
1974
1975 return res;
1976 }
1977 }
1978
nvenc_codec_specific_pic_params(AVCodecContext *avctx, NV_ENC_PIC_PARAMS *params, NV_ENC_SEI_PAYLOAD *sei_data, int sei_count)1979 static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
1980 NV_ENC_PIC_PARAMS *params,
1981 NV_ENC_SEI_PAYLOAD *sei_data,
1982 int sei_count)
1983 {
1984 NvencContext *ctx = avctx->priv_data;
1985
1986 switch (avctx->codec->id) {
1987 case AV_CODEC_ID_H264:
1988 params->codecPicParams.h264PicParams.sliceMode =
1989 ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
1990 params->codecPicParams.h264PicParams.sliceModeData =
1991 ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
1992 if (sei_count > 0) {
1993 params->codecPicParams.h264PicParams.seiPayloadArray = sei_data;
1994 params->codecPicParams.h264PicParams.seiPayloadArrayCnt = sei_count;
1995 }
1996
1997 break;
1998 case AV_CODEC_ID_HEVC:
1999 params->codecPicParams.hevcPicParams.sliceMode =
2000 ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode;
2001 params->codecPicParams.hevcPicParams.sliceModeData =
2002 ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
2003 if (sei_count > 0) {
2004 params->codecPicParams.hevcPicParams.seiPayloadArray = sei_data;
2005 params->codecPicParams.hevcPicParams.seiPayloadArrayCnt = sei_count;
2006 }
2007
2008 break;
2009 }
2010 }
2011
timestamp_queue_enqueue(AVFifo *queue, int64_t timestamp)2012 static inline void timestamp_queue_enqueue(AVFifo *queue, int64_t timestamp)
2013 {
2014 av_fifo_write(queue, ×tamp, 1);
2015 }
2016
timestamp_queue_dequeue(AVFifo *queue)2017 static inline int64_t timestamp_queue_dequeue(AVFifo *queue)
2018 {
2019 int64_t timestamp = AV_NOPTS_VALUE;
2020 // The following call might fail if the queue is empty.
2021 av_fifo_read(queue, ×tamp, 1);
2022
2023 return timestamp;
2024 }
2025
nvenc_set_timestamp(AVCodecContext *avctx, NV_ENC_LOCK_BITSTREAM *params, AVPacket *pkt)2026 static int nvenc_set_timestamp(AVCodecContext *avctx,
2027 NV_ENC_LOCK_BITSTREAM *params,
2028 AVPacket *pkt)
2029 {
2030 NvencContext *ctx = avctx->priv_data;
2031
2032 pkt->pts = params->outputTimeStamp;
2033 pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list);
2034
2035 pkt->dts -= FFMAX(ctx->encode_config.frameIntervalP - 1, 0) * FFMAX(avctx->ticks_per_frame, 1) * FFMAX(avctx->time_base.num, 1);
2036
2037 return 0;
2038 }
2039
process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf)2040 static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf)
2041 {
2042 NvencContext *ctx = avctx->priv_data;
2043 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
2044 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
2045
2046 uint32_t slice_mode_data;
2047 uint32_t *slice_offsets = NULL;
2048 NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
2049 NVENCSTATUS nv_status;
2050 int res = 0;
2051
2052 enum AVPictureType pict_type;
2053
2054 switch (avctx->codec->id) {
2055 case AV_CODEC_ID_H264:
2056 slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
2057 break;
2058 case AV_CODEC_ID_H265:
2059 slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
2060 break;
2061 default:
2062 av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
2063 res = AVERROR(EINVAL);
2064 goto error;
2065 }
2066 slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
2067
2068 if (!slice_offsets) {
2069 res = AVERROR(ENOMEM);
2070 goto error;
2071 }
2072
2073 lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
2074
2075 lock_params.doNotWait = 0;
2076 lock_params.outputBitstream = tmpoutsurf->output_surface;
2077 lock_params.sliceOffsets = slice_offsets;
2078
2079 nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
2080 if (nv_status != NV_ENC_SUCCESS) {
2081 res = nvenc_print_error(avctx, nv_status, "Failed locking bitstream buffer");
2082 goto error;
2083 }
2084
2085 res = ff_get_encode_buffer(avctx, pkt, lock_params.bitstreamSizeInBytes, 0);
2086
2087 if (res < 0) {
2088 p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
2089 goto error;
2090 }
2091
2092 memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
2093
2094 nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
2095 if (nv_status != NV_ENC_SUCCESS) {
2096 res = nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open");
2097 goto error;
2098 }
2099
2100
2101 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
2102 ctx->registered_frames[tmpoutsurf->reg_idx].mapped -= 1;
2103 if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped == 0) {
2104 nv_status = p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[tmpoutsurf->reg_idx].in_map.mappedResource);
2105 if (nv_status != NV_ENC_SUCCESS) {
2106 res = nvenc_print_error(avctx, nv_status, "Failed unmapping input resource");
2107 goto error;
2108 }
2109 } else if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped < 0) {
2110 res = AVERROR_BUG;
2111 goto error;
2112 }
2113
2114 av_frame_unref(tmpoutsurf->in_ref);
2115
2116 tmpoutsurf->input_surface = NULL;
2117 }
2118
2119 switch (lock_params.pictureType) {
2120 case NV_ENC_PIC_TYPE_IDR:
2121 pkt->flags |= AV_PKT_FLAG_KEY;
2122 case NV_ENC_PIC_TYPE_I:
2123 pict_type = AV_PICTURE_TYPE_I;
2124 break;
2125 case NV_ENC_PIC_TYPE_P:
2126 pict_type = AV_PICTURE_TYPE_P;
2127 break;
2128 case NV_ENC_PIC_TYPE_B:
2129 pict_type = AV_PICTURE_TYPE_B;
2130 break;
2131 case NV_ENC_PIC_TYPE_BI:
2132 pict_type = AV_PICTURE_TYPE_BI;
2133 break;
2134 default:
2135 av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n");
2136 av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
2137 res = AVERROR_EXTERNAL;
2138 goto error;
2139 }
2140
2141 ff_side_data_set_encoder_stats(pkt,
2142 (lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
2143
2144 res = nvenc_set_timestamp(avctx, &lock_params, pkt);
2145 if (res < 0)
2146 goto error2;
2147
2148 av_free(slice_offsets);
2149
2150 return 0;
2151
2152 error:
2153 timestamp_queue_dequeue(ctx->timestamp_list);
2154
2155 error2:
2156 av_free(slice_offsets);
2157
2158 return res;
2159 }
2160
output_ready(AVCodecContext *avctx, int flush)2161 static int output_ready(AVCodecContext *avctx, int flush)
2162 {
2163 NvencContext *ctx = avctx->priv_data;
2164 int nb_ready, nb_pending;
2165
2166 nb_ready = av_fifo_can_read(ctx->output_surface_ready_queue);
2167 nb_pending = av_fifo_can_read(ctx->output_surface_queue);
2168 if (flush)
2169 return nb_ready > 0;
2170 return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);
2171 }
2172
prepare_sei_data_array(AVCodecContext *avctx, const AVFrame *frame)2173 static int prepare_sei_data_array(AVCodecContext *avctx, const AVFrame *frame)
2174 {
2175 NvencContext *ctx = avctx->priv_data;
2176 int sei_count = 0;
2177 int i, res;
2178
2179 if (ctx->a53_cc && av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) {
2180 void *a53_data = NULL;
2181 size_t a53_size = 0;
2182
2183 if (ff_alloc_a53_sei(frame, 0, &a53_data, &a53_size) < 0) {
2184 av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2185 }
2186
2187 if (a53_data) {
2188 void *tmp = av_fast_realloc(ctx->sei_data,
2189 &ctx->sei_data_size,
2190 (sei_count + 1) * sizeof(*ctx->sei_data));
2191 if (!tmp) {
2192 av_free(a53_data);
2193 res = AVERROR(ENOMEM);
2194 goto error;
2195 } else {
2196 ctx->sei_data = tmp;
2197 ctx->sei_data[sei_count].payloadSize = (uint32_t)a53_size;
2198 ctx->sei_data[sei_count].payloadType = 4;
2199 ctx->sei_data[sei_count].payload = (uint8_t*)a53_data;
2200 sei_count++;
2201 }
2202 }
2203 }
2204
2205 if (ctx->s12m_tc && av_frame_get_side_data(frame, AV_FRAME_DATA_S12M_TIMECODE)) {
2206 void *tc_data = NULL;
2207 size_t tc_size = 0;
2208
2209 if (ff_alloc_timecode_sei(frame, avctx->framerate, 0, &tc_data, &tc_size) < 0) {
2210 av_log(ctx, AV_LOG_ERROR, "Not enough memory for timecode sei, skipping\n");
2211 }
2212
2213 if (tc_data) {
2214 void *tmp = av_fast_realloc(ctx->sei_data,
2215 &ctx->sei_data_size,
2216 (sei_count + 1) * sizeof(*ctx->sei_data));
2217 if (!tmp) {
2218 av_free(tc_data);
2219 res = AVERROR(ENOMEM);
2220 goto error;
2221 } else {
2222 ctx->sei_data = tmp;
2223 ctx->sei_data[sei_count].payloadSize = (uint32_t)tc_size;
2224 ctx->sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
2225 ctx->sei_data[sei_count].payload = (uint8_t*)tc_data;
2226 sei_count++;
2227 }
2228 }
2229 }
2230
2231 if (!ctx->udu_sei)
2232 return sei_count;
2233
2234 for (i = 0; i < frame->nb_side_data; i++) {
2235 AVFrameSideData *side_data = frame->side_data[i];
2236 void *tmp;
2237
2238 if (side_data->type != AV_FRAME_DATA_SEI_UNREGISTERED)
2239 continue;
2240
2241 tmp = av_fast_realloc(ctx->sei_data,
2242 &ctx->sei_data_size,
2243 (sei_count + 1) * sizeof(*ctx->sei_data));
2244 if (!tmp) {
2245 res = AVERROR(ENOMEM);
2246 goto error;
2247 } else {
2248 ctx->sei_data = tmp;
2249 ctx->sei_data[sei_count].payloadSize = side_data->size;
2250 ctx->sei_data[sei_count].payloadType = SEI_TYPE_USER_DATA_UNREGISTERED;
2251 ctx->sei_data[sei_count].payload = av_memdup(side_data->data, side_data->size);
2252
2253 if (!ctx->sei_data[sei_count].payload) {
2254 res = AVERROR(ENOMEM);
2255 goto error;
2256 }
2257
2258 sei_count++;
2259 }
2260 }
2261
2262 return sei_count;
2263
2264 error:
2265 for (i = 0; i < sei_count; i++)
2266 av_freep(&(ctx->sei_data[i].payload));
2267
2268 return res;
2269 }
2270
reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)2271 static void reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)
2272 {
2273 NvencContext *ctx = avctx->priv_data;
2274 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
2275 NVENCSTATUS ret;
2276
2277 NV_ENC_RECONFIGURE_PARAMS params = { 0 };
2278 int needs_reconfig = 0;
2279 int needs_encode_config = 0;
2280 int reconfig_bitrate = 0, reconfig_dar = 0;
2281 int dw, dh;
2282
2283 params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
2284 params.reInitEncodeParams = ctx->init_encode_params;
2285
2286 compute_dar(avctx, &dw, &dh);
2287 if (dw != ctx->init_encode_params.darWidth || dh != ctx->init_encode_params.darHeight) {
2288 av_log(avctx, AV_LOG_VERBOSE,
2289 "aspect ratio change (DAR): %d:%d -> %d:%d\n",
2290 ctx->init_encode_params.darWidth,
2291 ctx->init_encode_params.darHeight, dw, dh);
2292
2293 params.reInitEncodeParams.darHeight = dh;
2294 params.reInitEncodeParams.darWidth = dw;
2295
2296 needs_reconfig = 1;
2297 reconfig_dar = 1;
2298 }
2299
2300 if (ctx->rc != NV_ENC_PARAMS_RC_CONSTQP && ctx->support_dyn_bitrate) {
2301 if (avctx->bit_rate > 0 && params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate != avctx->bit_rate) {
2302 av_log(avctx, AV_LOG_VERBOSE,
2303 "avg bitrate change: %d -> %d\n",
2304 params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate,
2305 (uint32_t)avctx->bit_rate);
2306
2307 params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate = avctx->bit_rate;
2308 reconfig_bitrate = 1;
2309 }
2310
2311 if (avctx->rc_max_rate > 0 && ctx->encode_config.rcParams.maxBitRate != avctx->rc_max_rate) {
2312 av_log(avctx, AV_LOG_VERBOSE,
2313 "max bitrate change: %d -> %d\n",
2314 params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate,
2315 (uint32_t)avctx->rc_max_rate);
2316
2317 params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate = avctx->rc_max_rate;
2318 reconfig_bitrate = 1;
2319 }
2320
2321 if (avctx->rc_buffer_size > 0 && ctx->encode_config.rcParams.vbvBufferSize != avctx->rc_buffer_size) {
2322 av_log(avctx, AV_LOG_VERBOSE,
2323 "vbv buffer size change: %d -> %d\n",
2324 params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize,
2325 avctx->rc_buffer_size);
2326
2327 params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize = avctx->rc_buffer_size;
2328 reconfig_bitrate = 1;
2329 }
2330
2331 if (reconfig_bitrate) {
2332 params.resetEncoder = 1;
2333 params.forceIDR = 1;
2334
2335 needs_encode_config = 1;
2336 needs_reconfig = 1;
2337 }
2338 }
2339
2340 if (!needs_encode_config)
2341 params.reInitEncodeParams.encodeConfig = NULL;
2342
2343 if (needs_reconfig) {
2344 ret = p_nvenc->nvEncReconfigureEncoder(ctx->nvencoder, ¶ms);
2345 if (ret != NV_ENC_SUCCESS) {
2346 nvenc_print_error(avctx, ret, "failed to reconfigure nvenc");
2347 } else {
2348 if (reconfig_dar) {
2349 ctx->init_encode_params.darHeight = dh;
2350 ctx->init_encode_params.darWidth = dw;
2351 }
2352
2353 if (reconfig_bitrate) {
2354 ctx->encode_config.rcParams.averageBitRate = params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate;
2355 ctx->encode_config.rcParams.maxBitRate = params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate;
2356 ctx->encode_config.rcParams.vbvBufferSize = params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize;
2357 }
2358
2359 }
2360 }
2361 }
2362
nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)2363 static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
2364 {
2365 NVENCSTATUS nv_status;
2366 NvencSurface *tmp_out_surf, *in_surf;
2367 int res, res2;
2368 int sei_count = 0;
2369 int i;
2370
2371 NvencContext *ctx = avctx->priv_data;
2372 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
2373 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
2374
2375 NV_ENC_PIC_PARAMS pic_params = { 0 };
2376 pic_params.version = NV_ENC_PIC_PARAMS_VER;
2377
2378 if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
2379 return AVERROR(EINVAL);
2380
2381 if (frame && frame->buf[0]) {
2382 in_surf = get_free_frame(ctx);
2383 if (!in_surf)
2384 return AVERROR(EAGAIN);
2385
2386 res = nvenc_push_context(avctx);
2387 if (res < 0)
2388 return res;
2389
2390 reconfig_encoder(avctx, frame);
2391
2392 res = nvenc_upload_frame(avctx, frame, in_surf);
2393
2394 res2 = nvenc_pop_context(avctx);
2395 if (res2 < 0)
2396 return res2;
2397
2398 if (res)
2399 return res;
2400
2401 pic_params.inputBuffer = in_surf->input_surface;
2402 pic_params.bufferFmt = in_surf->format;
2403 pic_params.inputWidth = in_surf->width;
2404 pic_params.inputHeight = in_surf->height;
2405 pic_params.inputPitch = in_surf->pitch;
2406 pic_params.outputBitstream = in_surf->output_surface;
2407
2408 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2409 if (frame->top_field_first)
2410 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
2411 else
2412 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
2413 } else {
2414 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
2415 }
2416
2417 if (ctx->forced_idr >= 0 && frame->pict_type == AV_PICTURE_TYPE_I) {
2418 pic_params.encodePicFlags =
2419 ctx->forced_idr ? NV_ENC_PIC_FLAG_FORCEIDR : NV_ENC_PIC_FLAG_FORCEINTRA;
2420 } else {
2421 pic_params.encodePicFlags = 0;
2422 }
2423
2424 pic_params.inputTimeStamp = frame->pts;
2425
2426 if (ctx->extra_sei) {
2427 res = prepare_sei_data_array(avctx, frame);
2428 if (res < 0)
2429 return res;
2430 sei_count = res;
2431 }
2432
2433 nvenc_codec_specific_pic_params(avctx, &pic_params, ctx->sei_data, sei_count);
2434 } else {
2435 pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
2436 }
2437
2438 res = nvenc_push_context(avctx);
2439 if (res < 0)
2440 return res;
2441
2442 nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
2443
2444 for (i = 0; i < sei_count; i++)
2445 av_freep(&(ctx->sei_data[i].payload));
2446
2447 res = nvenc_pop_context(avctx);
2448 if (res < 0)
2449 return res;
2450
2451 if (nv_status != NV_ENC_SUCCESS &&
2452 nv_status != NV_ENC_ERR_NEED_MORE_INPUT)
2453 return nvenc_print_error(avctx, nv_status, "EncodePicture failed!");
2454
2455 if (frame && frame->buf[0]) {
2456 av_fifo_write(ctx->output_surface_queue, &in_surf, 1);
2457 timestamp_queue_enqueue(ctx->timestamp_list, frame->pts);
2458 }
2459
2460 /* all the pending buffers are now ready for output */
2461 if (nv_status == NV_ENC_SUCCESS) {
2462 while (av_fifo_read(ctx->output_surface_queue, &tmp_out_surf, 1) >= 0)
2463 av_fifo_write(ctx->output_surface_ready_queue, &tmp_out_surf, 1);
2464 }
2465
2466 return 0;
2467 }
2468
ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)2469 int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
2470 {
2471 NvencSurface *tmp_out_surf;
2472 int res, res2;
2473
2474 NvencContext *ctx = avctx->priv_data;
2475
2476 AVFrame *frame = ctx->frame;
2477
2478 if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
2479 return AVERROR(EINVAL);
2480
2481 if (!frame->buf[0]) {
2482 res = ff_encode_get_frame(avctx, frame);
2483 if (res < 0 && res != AVERROR_EOF)
2484 return res;
2485 }
2486
2487 res = nvenc_send_frame(avctx, frame);
2488 if (res < 0) {
2489 if (res != AVERROR(EAGAIN))
2490 return res;
2491 } else
2492 av_frame_unref(frame);
2493
2494 if (output_ready(avctx, avctx->internal->draining)) {
2495 av_fifo_read(ctx->output_surface_ready_queue, &tmp_out_surf, 1);
2496
2497 res = nvenc_push_context(avctx);
2498 if (res < 0)
2499 return res;
2500
2501 res = process_output_surface(avctx, pkt, tmp_out_surf);
2502
2503 res2 = nvenc_pop_context(avctx);
2504 if (res2 < 0)
2505 return res2;
2506
2507 if (res)
2508 return res;
2509
2510 av_fifo_write(ctx->unused_surface_queue, &tmp_out_surf, 1);
2511 } else if (avctx->internal->draining) {
2512 return AVERROR_EOF;
2513 } else {
2514 return AVERROR(EAGAIN);
2515 }
2516
2517 return 0;
2518 }
2519
ff_nvenc_encode_flush(AVCodecContext *avctx)2520 av_cold void ff_nvenc_encode_flush(AVCodecContext *avctx)
2521 {
2522 NvencContext *ctx = avctx->priv_data;
2523
2524 nvenc_send_frame(avctx, NULL);
2525 av_fifo_reset2(ctx->timestamp_list);
2526 }
2527