1/* 2 * H.264/HEVC hardware encoding using nvidia nvenc 3 * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "config.h" 23#include "config_components.h" 24 25#include "nvenc.h" 26#include "hevc_sei.h" 27 28#include "libavutil/hwcontext_cuda.h" 29#include "libavutil/hwcontext.h" 30#include "libavutil/cuda_check.h" 31#include "libavutil/imgutils.h" 32#include "libavutil/mem.h" 33#include "libavutil/pixdesc.h" 34#include "atsc_a53.h" 35#include "encode.h" 36#include "internal.h" 37#include "packet_internal.h" 38 39#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x) 40 41#define NVENC_CAP 0x30 42#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \ 43 rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \ 44 rc == NV_ENC_PARAMS_RC_CBR_HQ) 45 46const enum AVPixelFormat ff_nvenc_pix_fmts[] = { 47 AV_PIX_FMT_YUV420P, 48 AV_PIX_FMT_NV12, 49 AV_PIX_FMT_P010, 50 AV_PIX_FMT_YUV444P, 51 AV_PIX_FMT_P016, // Truncated to 10bits 52 AV_PIX_FMT_YUV444P16, // Truncated to 10bits 53 AV_PIX_FMT_0RGB32, 54 AV_PIX_FMT_RGB32, 55 AV_PIX_FMT_0BGR32, 56 AV_PIX_FMT_BGR32, 57 AV_PIX_FMT_X2RGB10, 58 AV_PIX_FMT_X2BGR10, 59 AV_PIX_FMT_GBRP, 60 AV_PIX_FMT_GBRP16, // Truncated to 10bits 61 AV_PIX_FMT_CUDA, 62#if CONFIG_D3D11VA 63 AV_PIX_FMT_D3D11, 64#endif 65 AV_PIX_FMT_NONE 66}; 67 68const AVCodecHWConfigInternal *const ff_nvenc_hw_configs[] = { 69 HW_CONFIG_ENCODER_FRAMES(CUDA, CUDA), 70 HW_CONFIG_ENCODER_DEVICE(NONE, CUDA), 71#if CONFIG_D3D11VA 72 HW_CONFIG_ENCODER_FRAMES(D3D11, D3D11VA), 73 HW_CONFIG_ENCODER_DEVICE(NONE, D3D11VA), 74#endif 75 NULL, 76}; 77 78#define IS_10BIT(pix_fmt) (pix_fmt == AV_PIX_FMT_P010 || \ 79 pix_fmt == AV_PIX_FMT_P016 || \ 80 pix_fmt == AV_PIX_FMT_YUV444P16 || \ 81 pix_fmt == AV_PIX_FMT_GBRP16) 82 83#define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \ 84 pix_fmt == AV_PIX_FMT_YUV444P16 || \ 85 pix_fmt == AV_PIX_FMT_GBRP || \ 86 pix_fmt == AV_PIX_FMT_GBRP16) 87 88#define IS_GBRP(pix_fmt) (pix_fmt == AV_PIX_FMT_GBRP || \ 89 pix_fmt == AV_PIX_FMT_GBRP16) 90 91static const struct { 92 NVENCSTATUS nverr; 93 int averr; 94 const char *desc; 95} nvenc_errors[] = { 96 { NV_ENC_SUCCESS, 0, "success" }, 97 { NV_ENC_ERR_NO_ENCODE_DEVICE, AVERROR(ENOENT), "no encode device" }, 98 { NV_ENC_ERR_UNSUPPORTED_DEVICE, AVERROR(ENOSYS), "unsupported device" }, 99 { NV_ENC_ERR_INVALID_ENCODERDEVICE, AVERROR(EINVAL), "invalid encoder device" }, 100 { NV_ENC_ERR_INVALID_DEVICE, AVERROR(EINVAL), "invalid device" }, 101 { NV_ENC_ERR_DEVICE_NOT_EXIST, AVERROR(EIO), "device does not exist" }, 102 { NV_ENC_ERR_INVALID_PTR, AVERROR(EFAULT), "invalid ptr" }, 103 { NV_ENC_ERR_INVALID_EVENT, AVERROR(EINVAL), "invalid event" }, 104 { NV_ENC_ERR_INVALID_PARAM, AVERROR(EINVAL), "invalid param" }, 105 { NV_ENC_ERR_INVALID_CALL, AVERROR(EINVAL), "invalid call" }, 106 { NV_ENC_ERR_OUT_OF_MEMORY, AVERROR(ENOMEM), "out of memory" }, 107 { NV_ENC_ERR_ENCODER_NOT_INITIALIZED, AVERROR(EINVAL), "encoder not initialized" }, 108 { NV_ENC_ERR_UNSUPPORTED_PARAM, AVERROR(ENOSYS), "unsupported param" }, 109 { NV_ENC_ERR_LOCK_BUSY, AVERROR(EAGAIN), "lock busy" }, 110 { NV_ENC_ERR_NOT_ENOUGH_BUFFER, AVERROR_BUFFER_TOO_SMALL, "not enough buffer"}, 111 { NV_ENC_ERR_INVALID_VERSION, AVERROR(EINVAL), "invalid version" }, 112 { NV_ENC_ERR_MAP_FAILED, AVERROR(EIO), "map failed" }, 113 { NV_ENC_ERR_NEED_MORE_INPUT, AVERROR(EAGAIN), "need more input" }, 114 { NV_ENC_ERR_ENCODER_BUSY, AVERROR(EAGAIN), "encoder busy" }, 115 { NV_ENC_ERR_EVENT_NOT_REGISTERD, AVERROR(EBADF), "event not registered" }, 116 { NV_ENC_ERR_GENERIC, AVERROR_UNKNOWN, "generic error" }, 117 { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY, AVERROR(EINVAL), "incompatible client key" }, 118 { NV_ENC_ERR_UNIMPLEMENTED, AVERROR(ENOSYS), "unimplemented" }, 119 { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO), "resource register failed" }, 120 { NV_ENC_ERR_RESOURCE_NOT_REGISTERED, AVERROR(EBADF), "resource not registered" }, 121 { NV_ENC_ERR_RESOURCE_NOT_MAPPED, AVERROR(EBADF), "resource not mapped" }, 122}; 123 124static int nvenc_map_error(NVENCSTATUS err, const char **desc) 125{ 126 int i; 127 for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) { 128 if (nvenc_errors[i].nverr == err) { 129 if (desc) 130 *desc = nvenc_errors[i].desc; 131 return nvenc_errors[i].averr; 132 } 133 } 134 if (desc) 135 *desc = "unknown error"; 136 return AVERROR_UNKNOWN; 137} 138 139static int nvenc_print_error(AVCodecContext *avctx, NVENCSTATUS err, 140 const char *error_string) 141{ 142 const char *desc; 143 const char *details = "(no details)"; 144 int ret = nvenc_map_error(err, &desc); 145 146#ifdef NVENC_HAVE_GETLASTERRORSTRING 147 NvencContext *ctx = avctx->priv_data; 148 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs; 149 150 if (p_nvenc && ctx->nvencoder) 151 details = p_nvenc->nvEncGetLastErrorString(ctx->nvencoder); 152#endif 153 154 av_log(avctx, AV_LOG_ERROR, "%s: %s (%d): %s\n", error_string, desc, err, details); 155 156 return ret; 157} 158 159typedef struct GUIDTuple { 160 const GUID guid; 161 int flags; 162} GUIDTuple; 163 164#define PRESET_ALIAS(alias, name, ...) \ 165 [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ } 166 167#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__) 168 169static void nvenc_map_preset(NvencContext *ctx) 170{ 171 GUIDTuple presets[] = { 172#ifdef NVENC_HAVE_NEW_PRESETS 173 PRESET(P1), 174 PRESET(P2), 175 PRESET(P3), 176 PRESET(P4), 177 PRESET(P5), 178 PRESET(P6), 179 PRESET(P7), 180 PRESET_ALIAS(SLOW, P7, NVENC_TWO_PASSES), 181 PRESET_ALIAS(MEDIUM, P4, NVENC_ONE_PASS), 182 PRESET_ALIAS(FAST, P1, NVENC_ONE_PASS), 183 // Compat aliases 184 PRESET_ALIAS(DEFAULT, P4, NVENC_DEPRECATED_PRESET), 185 PRESET_ALIAS(HP, P1, NVENC_DEPRECATED_PRESET), 186 PRESET_ALIAS(HQ, P7, NVENC_DEPRECATED_PRESET), 187 PRESET_ALIAS(BD, P5, NVENC_DEPRECATED_PRESET), 188 PRESET_ALIAS(LOW_LATENCY_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), 189 PRESET_ALIAS(LOW_LATENCY_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), 190 PRESET_ALIAS(LOW_LATENCY_HQ, P7, NVENC_DEPRECATED_PRESET | NVENC_LOWLATENCY), 191 PRESET_ALIAS(LOSSLESS_DEFAULT, P4, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), 192 PRESET_ALIAS(LOSSLESS_HP, P1, NVENC_DEPRECATED_PRESET | NVENC_LOSSLESS), 193#else 194 PRESET(DEFAULT), 195 PRESET(HP), 196 PRESET(HQ), 197 PRESET(BD), 198 PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES), 199 PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS), 200 PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS), 201 PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY), 202 PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY), 203 PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY), 204 PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS), 205 PRESET(LOSSLESS_HP, NVENC_LOSSLESS), 206#endif 207 }; 208 209 GUIDTuple *t = &presets[ctx->preset]; 210 211 ctx->init_encode_params.presetGUID = t->guid; 212 ctx->flags = t->flags; 213 214#ifdef NVENC_HAVE_NEW_PRESETS 215 if (ctx->tuning_info == NV_ENC_TUNING_INFO_LOSSLESS) 216 ctx->flags |= NVENC_LOSSLESS; 217#endif 218} 219 220#undef PRESET 221#undef PRESET_ALIAS 222 223static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level) 224{ 225#if NVENCAPI_CHECK_VERSION(11, 2) 226 const char *minver = "(unknown)"; 227#elif NVENCAPI_CHECK_VERSION(11, 1) 228# if defined(_WIN32) || defined(__CYGWIN__) 229 const char *minver = "471.41"; 230# else 231 const char *minver = "470.57.02"; 232# endif 233#elif NVENCAPI_CHECK_VERSION(11, 0) 234# if defined(_WIN32) || defined(__CYGWIN__) 235 const char *minver = "456.71"; 236# else 237 const char *minver = "455.28"; 238# endif 239#elif NVENCAPI_CHECK_VERSION(10, 0) 240# if defined(_WIN32) || defined(__CYGWIN__) 241 const char *minver = "450.51"; 242# else 243 const char *minver = "445.87"; 244# endif 245#elif NVENCAPI_CHECK_VERSION(9, 1) 246# if defined(_WIN32) || defined(__CYGWIN__) 247 const char *minver = "436.15"; 248# else 249 const char *minver = "435.21"; 250# endif 251#elif NVENCAPI_CHECK_VERSION(9, 0) 252# if defined(_WIN32) || defined(__CYGWIN__) 253 const char *minver = "418.81"; 254# else 255 const char *minver = "418.30"; 256# endif 257#elif NVENCAPI_CHECK_VERSION(8, 2) 258# if defined(_WIN32) || defined(__CYGWIN__) 259 const char *minver = "397.93"; 260# else 261 const char *minver = "396.24"; 262#endif 263#elif NVENCAPI_CHECK_VERSION(8, 1) 264# if defined(_WIN32) || defined(__CYGWIN__) 265 const char *minver = "390.77"; 266# else 267 const char *minver = "390.25"; 268# endif 269#else 270# if defined(_WIN32) || defined(__CYGWIN__) 271 const char *minver = "378.66"; 272# else 273 const char *minver = "378.13"; 274# endif 275#endif 276 av_log(avctx, level, "The minimum required Nvidia driver for nvenc is %s or newer\n", minver); 277} 278 279static av_cold int nvenc_load_libraries(AVCodecContext *avctx) 280{ 281 NvencContext *ctx = avctx->priv_data; 282 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 283 NVENCSTATUS err; 284 uint32_t nvenc_max_ver; 285 int ret; 286 287 ret = cuda_load_functions(&dl_fn->cuda_dl, avctx); 288 if (ret < 0) 289 return ret; 290 291 ret = nvenc_load_functions(&dl_fn->nvenc_dl, avctx); 292 if (ret < 0) { 293 nvenc_print_driver_requirement(avctx, AV_LOG_ERROR); 294 return ret; 295 } 296 297 err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver); 298 if (err != NV_ENC_SUCCESS) 299 return nvenc_print_error(avctx, err, "Failed to query nvenc max version"); 300 301 av_log(avctx, AV_LOG_VERBOSE, "Loaded Nvenc version %d.%d\n", nvenc_max_ver >> 4, nvenc_max_ver & 0xf); 302 303 if ((NVENCAPI_MAJOR_VERSION << 4 | NVENCAPI_MINOR_VERSION) > nvenc_max_ver) { 304 av_log(avctx, AV_LOG_ERROR, "Driver does not support the required nvenc API version. " 305 "Required: %d.%d Found: %d.%d\n", 306 NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION, 307 nvenc_max_ver >> 4, nvenc_max_ver & 0xf); 308 nvenc_print_driver_requirement(avctx, AV_LOG_ERROR); 309 return AVERROR(ENOSYS); 310 } 311 312 dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER; 313 314 err = dl_fn->nvenc_dl->NvEncodeAPICreateInstance(&dl_fn->nvenc_funcs); 315 if (err != NV_ENC_SUCCESS) 316 return nvenc_print_error(avctx, err, "Failed to create nvenc instance"); 317 318 av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n"); 319 320 return 0; 321} 322 323static int nvenc_push_context(AVCodecContext *avctx) 324{ 325 NvencContext *ctx = avctx->priv_data; 326 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 327 328 if (ctx->d3d11_device) 329 return 0; 330 331 return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context)); 332} 333 334static int nvenc_pop_context(AVCodecContext *avctx) 335{ 336 NvencContext *ctx = avctx->priv_data; 337 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 338 CUcontext dummy; 339 340 if (ctx->d3d11_device) 341 return 0; 342 343 return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy)); 344} 345 346static av_cold int nvenc_open_session(AVCodecContext *avctx) 347{ 348 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 }; 349 NvencContext *ctx = avctx->priv_data; 350 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs; 351 NVENCSTATUS ret; 352 353 params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; 354 params.apiVersion = NVENCAPI_VERSION; 355 if (ctx->d3d11_device) { 356 params.device = ctx->d3d11_device; 357 params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX; 358 } else { 359 params.device = ctx->cu_context; 360 params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; 361 } 362 363 ret = p_nvenc->nvEncOpenEncodeSessionEx(¶ms, &ctx->nvencoder); 364 if (ret != NV_ENC_SUCCESS) { 365 ctx->nvencoder = NULL; 366 return nvenc_print_error(avctx, ret, "OpenEncodeSessionEx failed"); 367 } 368 369 return 0; 370} 371 372static int nvenc_check_codec_support(AVCodecContext *avctx) 373{ 374 NvencContext *ctx = avctx->priv_data; 375 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs; 376 int i, ret, count = 0; 377 GUID *guids = NULL; 378 379 ret = p_nvenc->nvEncGetEncodeGUIDCount(ctx->nvencoder, &count); 380 381 if (ret != NV_ENC_SUCCESS || !count) 382 return AVERROR(ENOSYS); 383 384 guids = av_malloc(count * sizeof(GUID)); 385 if (!guids) 386 return AVERROR(ENOMEM); 387 388 ret = p_nvenc->nvEncGetEncodeGUIDs(ctx->nvencoder, guids, count, &count); 389 if (ret != NV_ENC_SUCCESS) { 390 ret = AVERROR(ENOSYS); 391 goto fail; 392 } 393 394 ret = AVERROR(ENOSYS); 395 for (i = 0; i < count; i++) { 396 if (!memcmp(&guids[i], &ctx->init_encode_params.encodeGUID, sizeof(*guids))) { 397 ret = 0; 398 break; 399 } 400 } 401 402fail: 403 av_free(guids); 404 405 return ret; 406} 407 408static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap) 409{ 410 NvencContext *ctx = avctx->priv_data; 411 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs; 412 NV_ENC_CAPS_PARAM params = { 0 }; 413 int ret, val = 0; 414 415 params.version = NV_ENC_CAPS_PARAM_VER; 416 params.capsToQuery = cap; 417 418 ret = p_nvenc->nvEncGetEncodeCaps(ctx->nvencoder, ctx->init_encode_params.encodeGUID, ¶ms, &val); 419 420 if (ret == NV_ENC_SUCCESS) 421 return val; 422 return 0; 423} 424 425static int nvenc_check_capabilities(AVCodecContext *avctx) 426{ 427 NvencContext *ctx = avctx->priv_data; 428 int ret; 429 430 ret = nvenc_check_codec_support(avctx); 431 if (ret < 0) { 432 av_log(avctx, AV_LOG_WARNING, "Codec not supported\n"); 433 return ret; 434 } 435 436 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE); 437 if (IS_YUV444(ctx->data_pix_fmt) && ret <= 0) { 438 av_log(avctx, AV_LOG_WARNING, "YUV444P not supported\n"); 439 return AVERROR(ENOSYS); 440 } 441 442 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE); 443 if (ctx->flags & NVENC_LOSSLESS && ret <= 0) { 444 av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n"); 445 return AVERROR(ENOSYS); 446 } 447 448 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX); 449 if (ret < avctx->width) { 450 av_log(avctx, AV_LOG_WARNING, "Width %d exceeds %d\n", 451 avctx->width, ret); 452 return AVERROR(ENOSYS); 453 } 454 455 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX); 456 if (ret < avctx->height) { 457 av_log(avctx, AV_LOG_WARNING, "Height %d exceeds %d\n", 458 avctx->height, ret); 459 return AVERROR(ENOSYS); 460 } 461 462 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES); 463 if (ret < avctx->max_b_frames) { 464 av_log(avctx, AV_LOG_WARNING, "Max B-frames %d exceed %d\n", 465 avctx->max_b_frames, ret); 466 467 return AVERROR(ENOSYS); 468 } 469 470 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_FIELD_ENCODING); 471 if (ret < 1 && avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { 472 av_log(avctx, AV_LOG_WARNING, 473 "Interlaced encoding is not supported. Supported level: %d\n", 474 ret); 475 return AVERROR(ENOSYS); 476 } 477 478 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE); 479 if (IS_10BIT(ctx->data_pix_fmt) && ret <= 0) { 480 av_log(avctx, AV_LOG_WARNING, "10 bit encode not supported\n"); 481 return AVERROR(ENOSYS); 482 } 483 484 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD); 485 if (ctx->rc_lookahead > 0 && ret <= 0) { 486 av_log(avctx, AV_LOG_WARNING, "RC lookahead not supported\n"); 487 return AVERROR(ENOSYS); 488 } 489 490 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ); 491 if (ctx->temporal_aq > 0 && ret <= 0) { 492 av_log(avctx, AV_LOG_WARNING, "Temporal AQ not supported\n"); 493 return AVERROR(ENOSYS); 494 } 495 496 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION); 497 if (ctx->weighted_pred > 0 && ret <= 0) { 498 av_log (avctx, AV_LOG_WARNING, "Weighted Prediction not supported\n"); 499 return AVERROR(ENOSYS); 500 } 501 502 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_CABAC); 503 if (ctx->coder == NV_ENC_H264_ENTROPY_CODING_MODE_CABAC && ret <= 0) { 504 av_log(avctx, AV_LOG_WARNING, "CABAC entropy coding not supported\n"); 505 return AVERROR(ENOSYS); 506 } 507 508#ifdef NVENC_HAVE_BFRAME_REF_MODE 509 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE); 510 if (ctx->b_ref_mode == NV_ENC_BFRAME_REF_MODE_EACH && ret != 1 && ret != 3) { 511 av_log(avctx, AV_LOG_WARNING, "Each B frame as reference is not supported\n"); 512 return AVERROR(ENOSYS); 513 } else if (ctx->b_ref_mode != NV_ENC_BFRAME_REF_MODE_DISABLED && ret == 0) { 514 av_log(avctx, AV_LOG_WARNING, "B frames as references are not supported\n"); 515 return AVERROR(ENOSYS); 516 } 517#else 518 if (ctx->b_ref_mode != 0) { 519 av_log(avctx, AV_LOG_WARNING, "B frames as references need SDK 8.1 at build time\n"); 520 return AVERROR(ENOSYS); 521 } 522#endif 523 524#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES 525 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES); 526 if(avctx->refs != NV_ENC_NUM_REF_FRAMES_AUTOSELECT && ret <= 0) { 527 av_log(avctx, AV_LOG_WARNING, "Multiple reference frames are not supported by the device\n"); 528 return AVERROR(ENOSYS); 529 } 530#else 531 if(avctx->refs != 0) { 532 av_log(avctx, AV_LOG_WARNING, "Multiple reference frames need SDK 9.1 at build time\n"); 533 return AVERROR(ENOSYS); 534 } 535#endif 536 537#ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH 538 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SINGLE_SLICE_INTRA_REFRESH); 539 if(ctx->single_slice_intra_refresh && ret <= 0) { 540 av_log(avctx, AV_LOG_WARNING, "Single slice intra refresh not supported by the device\n"); 541 return AVERROR(ENOSYS); 542 } 543#else 544 if(ctx->single_slice_intra_refresh) { 545 av_log(avctx, AV_LOG_WARNING, "Single slice intra refresh needs SDK 11.1 at build time\n"); 546 return AVERROR(ENOSYS); 547 } 548#endif 549 550 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_INTRA_REFRESH); 551 if((ctx->intra_refresh || ctx->single_slice_intra_refresh) && ret <= 0) { 552 av_log(avctx, AV_LOG_WARNING, "Intra refresh not supported by the device\n"); 553 return AVERROR(ENOSYS); 554 } 555 556#ifndef NVENC_HAVE_HEVC_CONSTRAINED_ENCODING 557 if (ctx->constrained_encoding && avctx->codec->id == AV_CODEC_ID_HEVC) { 558 av_log(avctx, AV_LOG_WARNING, "HEVC constrained encoding needs SDK 10.0 at build time\n"); 559 return AVERROR(ENOSYS); 560 } 561#endif 562 563 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING); 564 if(ctx->constrained_encoding && ret <= 0) { 565 av_log(avctx, AV_LOG_WARNING, "Constrained encoding not supported by the device\n"); 566 return AVERROR(ENOSYS); 567 } 568 569 ctx->support_dyn_bitrate = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE); 570 571 return 0; 572} 573 574static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) 575{ 576 NvencContext *ctx = avctx->priv_data; 577 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 578 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 579 char name[128] = { 0}; 580 int major, minor, ret; 581 CUdevice cu_device; 582 int loglevel = AV_LOG_VERBOSE; 583 584 if (ctx->device == LIST_DEVICES) 585 loglevel = AV_LOG_INFO; 586 587 ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx)); 588 if (ret < 0) 589 return ret; 590 591 ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device)); 592 if (ret < 0) 593 return ret; 594 595 ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device)); 596 if (ret < 0) 597 return ret; 598 599 av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor); 600 if (((major << 4) | minor) < NVENC_CAP) { 601 av_log(avctx, loglevel, "does not support NVENC\n"); 602 goto fail; 603 } 604 605 if (ctx->device != idx && ctx->device != ANY_DEVICE) 606 return -1; 607 608 ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device)); 609 if (ret < 0) 610 goto fail; 611 612 ctx->cu_context = ctx->cu_context_internal; 613 ctx->cu_stream = NULL; 614 615 if ((ret = nvenc_pop_context(avctx)) < 0) 616 goto fail2; 617 618 if ((ret = nvenc_open_session(avctx)) < 0) 619 goto fail2; 620 621 if ((ret = nvenc_check_capabilities(avctx)) < 0) 622 goto fail3; 623 624 av_log(avctx, loglevel, "supports NVENC\n"); 625 626 dl_fn->nvenc_device_count++; 627 628 if (ctx->device == idx || ctx->device == ANY_DEVICE) 629 return 0; 630 631fail3: 632 if ((ret = nvenc_push_context(avctx)) < 0) 633 return ret; 634 635 p_nvenc->nvEncDestroyEncoder(ctx->nvencoder); 636 ctx->nvencoder = NULL; 637 638 if ((ret = nvenc_pop_context(avctx)) < 0) 639 return ret; 640 641fail2: 642 CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal)); 643 ctx->cu_context_internal = NULL; 644 645fail: 646 return AVERROR(ENOSYS); 647} 648 649static av_cold int nvenc_setup_device(AVCodecContext *avctx) 650{ 651 NvencContext *ctx = avctx->priv_data; 652 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 653 654 switch (avctx->codec->id) { 655 case AV_CODEC_ID_H264: 656 ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_H264_GUID; 657 break; 658 case AV_CODEC_ID_HEVC: 659 ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID; 660 break; 661 default: 662 return AVERROR_BUG; 663 } 664 665 nvenc_map_preset(ctx); 666 667 if (ctx->flags & NVENC_DEPRECATED_PRESET) 668 av_log(avctx, AV_LOG_WARNING, "The selected preset is deprecated. Use p1 to p7 + -tune or fast/medium/slow.\n"); 669 670 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) { 671 AVHWFramesContext *frames_ctx; 672 AVHWDeviceContext *hwdev_ctx; 673 AVCUDADeviceContext *cuda_device_hwctx = NULL; 674#if CONFIG_D3D11VA 675 AVD3D11VADeviceContext *d3d11_device_hwctx = NULL; 676#endif 677 int ret; 678 679 if (avctx->hw_frames_ctx) { 680 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; 681 if (frames_ctx->format == AV_PIX_FMT_CUDA) 682 cuda_device_hwctx = frames_ctx->device_ctx->hwctx; 683#if CONFIG_D3D11VA 684 else if (frames_ctx->format == AV_PIX_FMT_D3D11) 685 d3d11_device_hwctx = frames_ctx->device_ctx->hwctx; 686#endif 687 else 688 return AVERROR(EINVAL); 689 } else if (avctx->hw_device_ctx) { 690 hwdev_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data; 691 if (hwdev_ctx->type == AV_HWDEVICE_TYPE_CUDA) 692 cuda_device_hwctx = hwdev_ctx->hwctx; 693#if CONFIG_D3D11VA 694 else if (hwdev_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) 695 d3d11_device_hwctx = hwdev_ctx->hwctx; 696#endif 697 else 698 return AVERROR(EINVAL); 699 } else { 700 return AVERROR(EINVAL); 701 } 702 703 if (cuda_device_hwctx) { 704 ctx->cu_context = cuda_device_hwctx->cuda_ctx; 705 ctx->cu_stream = cuda_device_hwctx->stream; 706 } 707#if CONFIG_D3D11VA 708 else if (d3d11_device_hwctx) { 709 ctx->d3d11_device = d3d11_device_hwctx->device; 710 ID3D11Device_AddRef(ctx->d3d11_device); 711 } 712#endif 713 714 ret = nvenc_open_session(avctx); 715 if (ret < 0) 716 return ret; 717 718 ret = nvenc_check_capabilities(avctx); 719 if (ret < 0) { 720 av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n"); 721 return ret; 722 } 723 } else { 724 int i, nb_devices = 0; 725 726 if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0) 727 return AVERROR_UNKNOWN; 728 729 if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0) 730 return AVERROR_UNKNOWN; 731 732 if (!nb_devices) { 733 av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n"); 734 return AVERROR_EXTERNAL; 735 } 736 737 av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", nb_devices); 738 739 dl_fn->nvenc_device_count = 0; 740 for (i = 0; i < nb_devices; ++i) { 741 if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES) 742 return 0; 743 } 744 745 if (ctx->device == LIST_DEVICES) 746 return AVERROR_EXIT; 747 748 if (!dl_fn->nvenc_device_count) { 749 av_log(avctx, AV_LOG_FATAL, "No capable devices found\n"); 750 return AVERROR_EXTERNAL; 751 } 752 753 av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->device, nb_devices); 754 return AVERROR(EINVAL); 755 } 756 757 return 0; 758} 759 760static av_cold void set_constqp(AVCodecContext *avctx) 761{ 762 NvencContext *ctx = avctx->priv_data; 763 NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams; 764 765 rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; 766 767 if (ctx->init_qp_p >= 0) { 768 rc->constQP.qpInterP = ctx->init_qp_p; 769 if (ctx->init_qp_i >= 0 && ctx->init_qp_b >= 0) { 770 rc->constQP.qpIntra = ctx->init_qp_i; 771 rc->constQP.qpInterB = ctx->init_qp_b; 772 } else if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) { 773 rc->constQP.qpIntra = av_clip( 774 rc->constQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51); 775 rc->constQP.qpInterB = av_clip( 776 rc->constQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51); 777 } else { 778 rc->constQP.qpIntra = rc->constQP.qpInterP; 779 rc->constQP.qpInterB = rc->constQP.qpInterP; 780 } 781 } else if (ctx->cqp >= 0) { 782 rc->constQP.qpInterP = rc->constQP.qpInterB = rc->constQP.qpIntra = ctx->cqp; 783 if (avctx->b_quant_factor != 0.0) 784 rc->constQP.qpInterB = av_clip(ctx->cqp * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51); 785 if (avctx->i_quant_factor != 0.0) 786 rc->constQP.qpIntra = av_clip(ctx->cqp * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51); 787 } 788 789 avctx->qmin = -1; 790 avctx->qmax = -1; 791} 792 793static av_cold void set_vbr(AVCodecContext *avctx) 794{ 795 NvencContext *ctx = avctx->priv_data; 796 NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams; 797 int qp_inter_p; 798 799 if (avctx->qmin >= 0 && avctx->qmax >= 0) { 800 rc->enableMinQP = 1; 801 rc->enableMaxQP = 1; 802 803 rc->minQP.qpInterB = avctx->qmin; 804 rc->minQP.qpInterP = avctx->qmin; 805 rc->minQP.qpIntra = avctx->qmin; 806 807 rc->maxQP.qpInterB = avctx->qmax; 808 rc->maxQP.qpInterP = avctx->qmax; 809 rc->maxQP.qpIntra = avctx->qmax; 810 811 qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin 812 } else if (avctx->qmin >= 0) { 813 rc->enableMinQP = 1; 814 815 rc->minQP.qpInterB = avctx->qmin; 816 rc->minQP.qpInterP = avctx->qmin; 817 rc->minQP.qpIntra = avctx->qmin; 818 819 qp_inter_p = avctx->qmin; 820 } else { 821 qp_inter_p = 26; // default to 26 822 } 823 824 rc->enableInitialRCQP = 1; 825 826 if (ctx->init_qp_p < 0) { 827 rc->initialRCQP.qpInterP = qp_inter_p; 828 } else { 829 rc->initialRCQP.qpInterP = ctx->init_qp_p; 830 } 831 832 if (ctx->init_qp_i < 0) { 833 if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) { 834 rc->initialRCQP.qpIntra = av_clip( 835 rc->initialRCQP.qpInterP * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51); 836 } else { 837 rc->initialRCQP.qpIntra = rc->initialRCQP.qpInterP; 838 } 839 } else { 840 rc->initialRCQP.qpIntra = ctx->init_qp_i; 841 } 842 843 if (ctx->init_qp_b < 0) { 844 if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) { 845 rc->initialRCQP.qpInterB = av_clip( 846 rc->initialRCQP.qpInterP * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51); 847 } else { 848 rc->initialRCQP.qpInterB = rc->initialRCQP.qpInterP; 849 } 850 } else { 851 rc->initialRCQP.qpInterB = ctx->init_qp_b; 852 } 853} 854 855static av_cold void set_lossless(AVCodecContext *avctx) 856{ 857 NvencContext *ctx = avctx->priv_data; 858 NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams; 859 860 rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; 861 rc->constQP.qpInterB = 0; 862 rc->constQP.qpInterP = 0; 863 rc->constQP.qpIntra = 0; 864 865 avctx->qmin = -1; 866 avctx->qmax = -1; 867} 868 869static void nvenc_override_rate_control(AVCodecContext *avctx) 870{ 871 NvencContext *ctx = avctx->priv_data; 872 NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams; 873 874 switch (ctx->rc) { 875 case NV_ENC_PARAMS_RC_CONSTQP: 876 set_constqp(avctx); 877 return; 878 case NV_ENC_PARAMS_RC_VBR_MINQP: 879 if (avctx->qmin < 0) { 880 av_log(avctx, AV_LOG_WARNING, 881 "The variable bitrate rate-control requires " 882 "the 'qmin' option set.\n"); 883 set_vbr(avctx); 884 return; 885 } 886 /* fall through */ 887 case NV_ENC_PARAMS_RC_VBR_HQ: 888 case NV_ENC_PARAMS_RC_VBR: 889 set_vbr(avctx); 890 break; 891 case NV_ENC_PARAMS_RC_CBR: 892 case NV_ENC_PARAMS_RC_CBR_HQ: 893 case NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ: 894 break; 895 } 896 897 rc->rateControlMode = ctx->rc; 898} 899 900static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx) 901{ 902 NvencContext *ctx = avctx->priv_data; 903 // default minimum of 4 surfaces 904 // multiply by 2 for number of NVENCs on gpu (hardcode to 2) 905 // another multiply by 2 to avoid blocking next PBB group 906 int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2); 907 908 // lookahead enabled 909 if (ctx->rc_lookahead > 0) { 910 // +1 is to account for lkd_bound calculation later 911 // +4 is to allow sufficient pipelining with lookahead 912 nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4)); 913 if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0) 914 { 915 av_log(avctx, AV_LOG_WARNING, 916 "Defined rc_lookahead requires more surfaces, " 917 "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces); 918 } 919 ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces); 920 } else { 921 if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0) 922 { 923 av_log(avctx, AV_LOG_WARNING, 924 "Defined b-frame requires more surfaces, " 925 "increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces); 926 ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces); 927 } 928 else if (ctx->nb_surfaces <= 0) 929 ctx->nb_surfaces = nb_surfaces; 930 // otherwise use user specified value 931 } 932 933 ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces)); 934 ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1); 935 936 return 0; 937} 938 939static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx) 940{ 941 NvencContext *ctx = avctx->priv_data; 942 943 if (avctx->global_quality > 0) 944 av_log(avctx, AV_LOG_WARNING, "Using global_quality with nvenc is deprecated. Use qp instead.\n"); 945 946 if (ctx->cqp < 0 && avctx->global_quality > 0) 947 ctx->cqp = avctx->global_quality; 948 949 if (avctx->bit_rate > 0) { 950 ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate; 951 } else if (ctx->encode_config.rcParams.averageBitRate > 0) { 952 ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate; 953 } 954 955 if (avctx->rc_max_rate > 0) 956 ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate; 957 958#ifdef NVENC_HAVE_MULTIPASS 959 ctx->encode_config.rcParams.multiPass = ctx->multipass; 960 961 if (ctx->flags & NVENC_ONE_PASS) 962 ctx->encode_config.rcParams.multiPass = NV_ENC_MULTI_PASS_DISABLED; 963 if (ctx->flags & NVENC_TWO_PASSES || ctx->twopass > 0) 964 ctx->encode_config.rcParams.multiPass = NV_ENC_TWO_PASS_FULL_RESOLUTION; 965 966 if (ctx->rc < 0) { 967 if (ctx->cbr) { 968 ctx->rc = NV_ENC_PARAMS_RC_CBR; 969 } else if (ctx->cqp >= 0) { 970 ctx->rc = NV_ENC_PARAMS_RC_CONSTQP; 971 } else if (ctx->quality >= 0.0f) { 972 ctx->rc = NV_ENC_PARAMS_RC_VBR; 973 } 974 } 975#else 976 if (ctx->rc < 0) { 977 if (ctx->flags & NVENC_ONE_PASS) 978 ctx->twopass = 0; 979 if (ctx->flags & NVENC_TWO_PASSES) 980 ctx->twopass = 1; 981 982 if (ctx->twopass < 0) 983 ctx->twopass = (ctx->flags & NVENC_LOWLATENCY) != 0; 984 985 if (ctx->cbr) { 986 if (ctx->twopass) { 987 ctx->rc = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ; 988 } else { 989 ctx->rc = NV_ENC_PARAMS_RC_CBR; 990 } 991 } else if (ctx->cqp >= 0) { 992 ctx->rc = NV_ENC_PARAMS_RC_CONSTQP; 993 } else if (ctx->twopass) { 994 ctx->rc = NV_ENC_PARAMS_RC_VBR_HQ; 995 } else if (avctx->qmin >= 0 && avctx->qmax >= 0) { 996 ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP; 997 } 998 } 999#endif 1000 1001 if (ctx->rc >= 0 && ctx->rc & RC_MODE_DEPRECATED) { 1002 av_log(avctx, AV_LOG_WARNING, "Specified rc mode is deprecated.\n"); 1003 av_log(avctx, AV_LOG_WARNING, "Use -rc constqp/cbr/vbr, -tune and -multipass instead.\n"); 1004 1005 ctx->rc &= ~RC_MODE_DEPRECATED; 1006 } 1007 1008#ifdef NVENC_HAVE_QP_CHROMA_OFFSETS 1009 ctx->encode_config.rcParams.cbQPIndexOffset = ctx->qp_cb_offset; 1010 ctx->encode_config.rcParams.crQPIndexOffset = ctx->qp_cr_offset; 1011#else 1012 if (ctx->qp_cb_offset || ctx->qp_cr_offset) 1013 av_log(avctx, AV_LOG_WARNING, "Failed setting QP CB/CR offsets, SDK 11.1 or greater required at compile time.\n"); 1014#endif 1015 1016#ifdef NVENC_HAVE_LDKFS 1017 if (ctx->ldkfs) 1018 ctx->encode_config.rcParams.lowDelayKeyFrameScale = ctx->ldkfs; 1019#endif 1020 1021 if (ctx->flags & NVENC_LOSSLESS) { 1022 set_lossless(avctx); 1023 } else if (ctx->rc >= 0) { 1024 nvenc_override_rate_control(avctx); 1025 } else { 1026 ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR; 1027 set_vbr(avctx); 1028 } 1029 1030 if (avctx->rc_buffer_size > 0) { 1031 ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size; 1032 } else if (ctx->encode_config.rcParams.averageBitRate > 0) { 1033 avctx->rc_buffer_size = ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate; 1034 } 1035 1036 if (ctx->aq) { 1037 ctx->encode_config.rcParams.enableAQ = 1; 1038 ctx->encode_config.rcParams.aqStrength = ctx->aq_strength; 1039 av_log(avctx, AV_LOG_VERBOSE, "AQ enabled.\n"); 1040 } 1041 1042 if (ctx->temporal_aq) { 1043 ctx->encode_config.rcParams.enableTemporalAQ = 1; 1044 av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ enabled.\n"); 1045 } 1046 1047 if (ctx->rc_lookahead > 0) { 1048 int lkd_bound = FFMIN(ctx->nb_surfaces, ctx->async_depth) - 1049 ctx->encode_config.frameIntervalP - 4; 1050 1051 if (lkd_bound < 0) { 1052 av_log(avctx, AV_LOG_WARNING, 1053 "Lookahead not enabled. Increase buffer delay (-delay).\n"); 1054 } else { 1055 ctx->encode_config.rcParams.enableLookahead = 1; 1056 ctx->encode_config.rcParams.lookaheadDepth = av_clip(ctx->rc_lookahead, 0, lkd_bound); 1057 ctx->encode_config.rcParams.disableIadapt = ctx->no_scenecut; 1058 ctx->encode_config.rcParams.disableBadapt = !ctx->b_adapt; 1059 av_log(avctx, AV_LOG_VERBOSE, 1060 "Lookahead enabled: depth %d, scenecut %s, B-adapt %s.\n", 1061 ctx->encode_config.rcParams.lookaheadDepth, 1062 ctx->encode_config.rcParams.disableIadapt ? "disabled" : "enabled", 1063 ctx->encode_config.rcParams.disableBadapt ? "disabled" : "enabled"); 1064 } 1065 } 1066 1067 if (ctx->strict_gop) { 1068 ctx->encode_config.rcParams.strictGOPTarget = 1; 1069 av_log(avctx, AV_LOG_VERBOSE, "Strict GOP target enabled.\n"); 1070 } 1071 1072 if (ctx->nonref_p) 1073 ctx->encode_config.rcParams.enableNonRefP = 1; 1074 1075 if (ctx->zerolatency) 1076 ctx->encode_config.rcParams.zeroReorderDelay = 1; 1077 1078 if (ctx->quality) { 1079 //convert from float to fixed point 8.8 1080 int tmp_quality = (int)(ctx->quality * 256.0f); 1081 ctx->encode_config.rcParams.targetQuality = (uint8_t)(tmp_quality >> 8); 1082 ctx->encode_config.rcParams.targetQualityLSB = (uint8_t)(tmp_quality & 0xff); 1083 1084 av_log(avctx, AV_LOG_VERBOSE, "CQ(%d) mode enabled.\n", tmp_quality); 1085 1086 // CQ mode shall discard avg bitrate/vbv buffer size and honor only max bitrate 1087 ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate = 0; 1088 ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size = 0; 1089 ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate; 1090 } 1091} 1092 1093static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx) 1094{ 1095 NvencContext *ctx = avctx->priv_data; 1096 NV_ENC_CONFIG *cc = &ctx->encode_config; 1097 NV_ENC_CONFIG_H264 *h264 = &cc->encodeCodecConfig.h264Config; 1098 NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters; 1099 1100 vui->colourMatrix = IS_GBRP(ctx->data_pix_fmt) ? AVCOL_SPC_RGB : avctx->colorspace; 1101 vui->colourPrimaries = avctx->color_primaries; 1102 vui->transferCharacteristics = avctx->color_trc; 1103 vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG 1104 || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P); 1105 1106 vui->colourDescriptionPresentFlag = 1107 (vui->colourMatrix != 2 || vui->colourPrimaries != 2 || vui->transferCharacteristics != 2); 1108 1109 vui->videoSignalTypePresentFlag = 1110 (vui->colourDescriptionPresentFlag 1111 || vui->videoFormat != 5 1112 || vui->videoFullRangeFlag != 0); 1113 1114 h264->sliceMode = 3; 1115 h264->sliceModeData = avctx->slices > 0 ? avctx->slices : 1; 1116 1117 if (ctx->intra_refresh) { 1118 h264->enableIntraRefresh = 1; 1119 h264->intraRefreshPeriod = avctx->gop_size; 1120 h264->intraRefreshCnt = avctx->gop_size - 1; 1121#ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH 1122 h264->singleSliceIntraRefresh = ctx->single_slice_intra_refresh; 1123#endif 1124 } 1125 1126 if (ctx->constrained_encoding) 1127 h264->enableConstrainedEncoding = 1; 1128 1129 h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0; 1130 h264->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1; 1131 h264->outputAUD = ctx->aud; 1132 1133 if (ctx->dpb_size >= 0) { 1134 /* 0 means "let the hardware decide" */ 1135 h264->maxNumRefFrames = ctx->dpb_size; 1136 } 1137 1138 if (ctx->intra_refresh) { 1139 h264->idrPeriod = NVENC_INFINITE_GOPLENGTH; 1140 } else if (avctx->gop_size >= 0) { 1141 h264->idrPeriod = avctx->gop_size; 1142 } 1143 1144 if (IS_CBR(cc->rcParams.rateControlMode)) { 1145 h264->outputBufferingPeriodSEI = 1; 1146 } 1147 1148 h264->outputPictureTimingSEI = 1; 1149 1150 if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || 1151 cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_HQ || 1152 cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_VBR_HQ) { 1153 h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE; 1154 h264->fmoMode = NV_ENC_H264_FMO_DISABLE; 1155 } 1156 1157 if (ctx->flags & NVENC_LOSSLESS) { 1158 h264->qpPrimeYZeroTransformBypassFlag = 1; 1159 } else { 1160 switch(ctx->profile) { 1161 case NV_ENC_H264_PROFILE_BASELINE: 1162 cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID; 1163 avctx->profile = FF_PROFILE_H264_BASELINE; 1164 break; 1165 case NV_ENC_H264_PROFILE_MAIN: 1166 cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID; 1167 avctx->profile = FF_PROFILE_H264_MAIN; 1168 break; 1169 case NV_ENC_H264_PROFILE_HIGH: 1170 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; 1171 avctx->profile = FF_PROFILE_H264_HIGH; 1172 break; 1173 case NV_ENC_H264_PROFILE_HIGH_444P: 1174 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; 1175 avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE; 1176 break; 1177 } 1178 } 1179 1180 // force setting profile as high444p if input is AV_PIX_FMT_YUV444P 1181 if (IS_YUV444(ctx->data_pix_fmt)) { 1182 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; 1183 avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE; 1184 } 1185 1186 h264->chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1; 1187 1188 h264->level = ctx->level; 1189 1190 if (ctx->coder >= 0) 1191 h264->entropyCodingMode = ctx->coder; 1192 1193#ifdef NVENC_HAVE_BFRAME_REF_MODE 1194 h264->useBFramesAsRef = ctx->b_ref_mode; 1195#endif 1196 1197#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES 1198 h264->numRefL0 = avctx->refs; 1199 h264->numRefL1 = avctx->refs; 1200#endif 1201 1202 return 0; 1203} 1204 1205static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx) 1206{ 1207 NvencContext *ctx = avctx->priv_data; 1208 NV_ENC_CONFIG *cc = &ctx->encode_config; 1209 NV_ENC_CONFIG_HEVC *hevc = &cc->encodeCodecConfig.hevcConfig; 1210 NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters; 1211 1212 vui->colourMatrix = IS_GBRP(ctx->data_pix_fmt) ? AVCOL_SPC_RGB : avctx->colorspace; 1213 vui->colourPrimaries = avctx->color_primaries; 1214 vui->transferCharacteristics = avctx->color_trc; 1215 vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG 1216 || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P); 1217 1218 vui->colourDescriptionPresentFlag = 1219 (vui->colourMatrix != 2 || vui->colourPrimaries != 2 || vui->transferCharacteristics != 2); 1220 1221 vui->videoSignalTypePresentFlag = 1222 (vui->colourDescriptionPresentFlag 1223 || vui->videoFormat != 5 1224 || vui->videoFullRangeFlag != 0); 1225 1226 hevc->sliceMode = 3; 1227 hevc->sliceModeData = avctx->slices > 0 ? avctx->slices : 1; 1228 1229 if (ctx->intra_refresh) { 1230 hevc->enableIntraRefresh = 1; 1231 hevc->intraRefreshPeriod = avctx->gop_size; 1232 hevc->intraRefreshCnt = avctx->gop_size - 1; 1233#ifdef NVENC_HAVE_SINGLE_SLICE_INTRA_REFRESH 1234 hevc->singleSliceIntraRefresh = ctx->single_slice_intra_refresh; 1235#endif 1236 } 1237 1238#ifdef NVENC_HAVE_HEVC_CONSTRAINED_ENCODING 1239 if (ctx->constrained_encoding) 1240 hevc->enableConstrainedEncoding = 1; 1241#endif 1242 1243 hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0; 1244 hevc->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1; 1245 hevc->outputAUD = ctx->aud; 1246 1247 if (ctx->dpb_size >= 0) { 1248 /* 0 means "let the hardware decide" */ 1249 hevc->maxNumRefFramesInDPB = ctx->dpb_size; 1250 } 1251 1252 if (ctx->intra_refresh) { 1253 hevc->idrPeriod = NVENC_INFINITE_GOPLENGTH; 1254 } else if (avctx->gop_size >= 0) { 1255 hevc->idrPeriod = avctx->gop_size; 1256 } 1257 1258 if (IS_CBR(cc->rcParams.rateControlMode)) { 1259 hevc->outputBufferingPeriodSEI = 1; 1260 } 1261 1262 hevc->outputPictureTimingSEI = 1; 1263 1264 switch (ctx->profile) { 1265 case NV_ENC_HEVC_PROFILE_MAIN: 1266 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID; 1267 avctx->profile = FF_PROFILE_HEVC_MAIN; 1268 break; 1269 case NV_ENC_HEVC_PROFILE_MAIN_10: 1270 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; 1271 avctx->profile = FF_PROFILE_HEVC_MAIN_10; 1272 break; 1273 case NV_ENC_HEVC_PROFILE_REXT: 1274 cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID; 1275 avctx->profile = FF_PROFILE_HEVC_REXT; 1276 break; 1277 } 1278 1279 // force setting profile as main10 if input is 10 bit 1280 if (IS_10BIT(ctx->data_pix_fmt)) { 1281 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; 1282 avctx->profile = FF_PROFILE_HEVC_MAIN_10; 1283 } 1284 1285 // force setting profile as rext if input is yuv444 1286 if (IS_YUV444(ctx->data_pix_fmt)) { 1287 cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID; 1288 avctx->profile = FF_PROFILE_HEVC_REXT; 1289 } 1290 1291 hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1; 1292 1293 hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0; 1294 1295 hevc->level = ctx->level; 1296 1297 hevc->tier = ctx->tier; 1298 1299#ifdef NVENC_HAVE_HEVC_BFRAME_REF_MODE 1300 hevc->useBFramesAsRef = ctx->b_ref_mode; 1301#endif 1302 1303#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES 1304 hevc->numRefL0 = avctx->refs; 1305 hevc->numRefL1 = avctx->refs; 1306#endif 1307 1308 return 0; 1309} 1310 1311static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx) 1312{ 1313 switch (avctx->codec->id) { 1314 case AV_CODEC_ID_H264: 1315 return nvenc_setup_h264_config(avctx); 1316 case AV_CODEC_ID_HEVC: 1317 return nvenc_setup_hevc_config(avctx); 1318 /* Earlier switch/case will return if unknown codec is passed. */ 1319 } 1320 1321 return 0; 1322} 1323 1324static void compute_dar(AVCodecContext *avctx, int *dw, int *dh) { 1325 int sw, sh; 1326 1327 sw = avctx->width; 1328 sh = avctx->height; 1329 1330 if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) { 1331 sw *= avctx->sample_aspect_ratio.num; 1332 sh *= avctx->sample_aspect_ratio.den; 1333 } 1334 1335 av_reduce(dw, dh, sw, sh, 1024 * 1024); 1336} 1337 1338static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) 1339{ 1340 NvencContext *ctx = avctx->priv_data; 1341 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 1342 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 1343 1344 NV_ENC_PRESET_CONFIG preset_config = { 0 }; 1345 NVENCSTATUS nv_status = NV_ENC_SUCCESS; 1346 AVCPBProperties *cpb_props; 1347 int res = 0; 1348 int dw, dh; 1349 1350 ctx->encode_config.version = NV_ENC_CONFIG_VER; 1351 ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER; 1352 1353 ctx->init_encode_params.encodeHeight = avctx->height; 1354 ctx->init_encode_params.encodeWidth = avctx->width; 1355 1356 ctx->init_encode_params.encodeConfig = &ctx->encode_config; 1357 1358 preset_config.version = NV_ENC_PRESET_CONFIG_VER; 1359 preset_config.presetCfg.version = NV_ENC_CONFIG_VER; 1360 1361#ifdef NVENC_HAVE_NEW_PRESETS 1362 ctx->init_encode_params.tuningInfo = ctx->tuning_info; 1363 1364 if (ctx->flags & NVENC_LOSSLESS) 1365 ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOSSLESS; 1366 else if (ctx->flags & NVENC_LOWLATENCY) 1367 ctx->init_encode_params.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY; 1368 1369 nv_status = p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder, 1370 ctx->init_encode_params.encodeGUID, 1371 ctx->init_encode_params.presetGUID, 1372 ctx->init_encode_params.tuningInfo, 1373 &preset_config); 1374#else 1375 nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder, 1376 ctx->init_encode_params.encodeGUID, 1377 ctx->init_encode_params.presetGUID, 1378 &preset_config); 1379#endif 1380 if (nv_status != NV_ENC_SUCCESS) 1381 return nvenc_print_error(avctx, nv_status, "Cannot get the preset configuration"); 1382 1383 memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config)); 1384 1385 ctx->encode_config.version = NV_ENC_CONFIG_VER; 1386 1387 compute_dar(avctx, &dw, &dh); 1388 ctx->init_encode_params.darHeight = dh; 1389 ctx->init_encode_params.darWidth = dw; 1390 1391 if (avctx->framerate.num > 0 && avctx->framerate.den > 0) { 1392 ctx->init_encode_params.frameRateNum = avctx->framerate.num; 1393 ctx->init_encode_params.frameRateDen = avctx->framerate.den; 1394 } else { 1395 ctx->init_encode_params.frameRateNum = avctx->time_base.den; 1396 ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame; 1397 } 1398 1399 ctx->init_encode_params.enableEncodeAsync = 0; 1400 ctx->init_encode_params.enablePTD = 1; 1401 1402#ifdef NVENC_HAVE_NEW_PRESETS 1403 /* If lookahead isn't set from CLI, use value from preset. 1404 * P6 & P7 presets may enable lookahead for better quality. 1405 * */ 1406 if (ctx->rc_lookahead == 0 && ctx->encode_config.rcParams.enableLookahead) 1407 ctx->rc_lookahead = ctx->encode_config.rcParams.lookaheadDepth; 1408#endif 1409 1410 if (ctx->weighted_pred == 1) 1411 ctx->init_encode_params.enableWeightedPrediction = 1; 1412 1413 if (ctx->bluray_compat) { 1414 ctx->aud = 1; 1415 ctx->dpb_size = FFMIN(FFMAX(avctx->refs, 0), 6); 1416 avctx->max_b_frames = FFMIN(avctx->max_b_frames, 3); 1417 switch (avctx->codec->id) { 1418 case AV_CODEC_ID_H264: 1419 /* maximum level depends on used resolution */ 1420 break; 1421 case AV_CODEC_ID_HEVC: 1422 ctx->level = NV_ENC_LEVEL_HEVC_51; 1423 ctx->tier = NV_ENC_TIER_HEVC_HIGH; 1424 break; 1425 } 1426 } 1427 1428 if (avctx->gop_size > 0) { 1429 if (avctx->max_b_frames >= 0) { 1430 /* 0 is intra-only, 1 is I/P only, 2 is one B-Frame, 3 two B-frames, and so on. */ 1431 ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1; 1432 } 1433 1434 ctx->encode_config.gopLength = avctx->gop_size; 1435 } else if (avctx->gop_size == 0) { 1436 ctx->encode_config.frameIntervalP = 0; 1437 ctx->encode_config.gopLength = 1; 1438 } 1439 1440 /* force to enable intra refresh */ 1441 if(ctx->single_slice_intra_refresh) 1442 ctx->intra_refresh = 1; 1443 1444 if (ctx->intra_refresh) 1445 ctx->encode_config.gopLength = NVENC_INFINITE_GOPLENGTH; 1446 1447 nvenc_recalc_surfaces(avctx); 1448 1449 nvenc_setup_rate_control(avctx); 1450 1451 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { 1452 ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD; 1453 } else { 1454 ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME; 1455 } 1456 1457 res = nvenc_setup_codec_config(avctx); 1458 if (res) 1459 return res; 1460 1461 res = nvenc_push_context(avctx); 1462 if (res < 0) 1463 return res; 1464 1465 nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params); 1466 if (nv_status != NV_ENC_SUCCESS) { 1467 nvenc_pop_context(avctx); 1468 return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed"); 1469 } 1470 1471#ifdef NVENC_HAVE_CUSTREAM_PTR 1472 if (ctx->cu_context) { 1473 nv_status = p_nvenc->nvEncSetIOCudaStreams(ctx->nvencoder, &ctx->cu_stream, &ctx->cu_stream); 1474 if (nv_status != NV_ENC_SUCCESS) { 1475 nvenc_pop_context(avctx); 1476 return nvenc_print_error(avctx, nv_status, "SetIOCudaStreams failed"); 1477 } 1478 } 1479#endif 1480 1481 res = nvenc_pop_context(avctx); 1482 if (res < 0) 1483 return res; 1484 1485 if (ctx->encode_config.frameIntervalP > 1) 1486 avctx->has_b_frames = 2; 1487 1488 if (ctx->encode_config.rcParams.averageBitRate > 0) 1489 avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate; 1490 1491 cpb_props = ff_add_cpb_side_data(avctx); 1492 if (!cpb_props) 1493 return AVERROR(ENOMEM); 1494 cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate; 1495 cpb_props->avg_bitrate = avctx->bit_rate; 1496 cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize; 1497 1498 return 0; 1499} 1500 1501static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt) 1502{ 1503 switch (pix_fmt) { 1504 case AV_PIX_FMT_YUV420P: 1505 return NV_ENC_BUFFER_FORMAT_YV12_PL; 1506 case AV_PIX_FMT_NV12: 1507 return NV_ENC_BUFFER_FORMAT_NV12_PL; 1508 case AV_PIX_FMT_P010: 1509 case AV_PIX_FMT_P016: 1510 return NV_ENC_BUFFER_FORMAT_YUV420_10BIT; 1511 case AV_PIX_FMT_GBRP: 1512 case AV_PIX_FMT_YUV444P: 1513 return NV_ENC_BUFFER_FORMAT_YUV444_PL; 1514 case AV_PIX_FMT_GBRP16: 1515 case AV_PIX_FMT_YUV444P16: 1516 return NV_ENC_BUFFER_FORMAT_YUV444_10BIT; 1517 case AV_PIX_FMT_0RGB32: 1518 case AV_PIX_FMT_RGB32: 1519 return NV_ENC_BUFFER_FORMAT_ARGB; 1520 case AV_PIX_FMT_0BGR32: 1521 case AV_PIX_FMT_BGR32: 1522 return NV_ENC_BUFFER_FORMAT_ABGR; 1523 case AV_PIX_FMT_X2RGB10: 1524 return NV_ENC_BUFFER_FORMAT_ARGB10; 1525 case AV_PIX_FMT_X2BGR10: 1526 return NV_ENC_BUFFER_FORMAT_ABGR10; 1527 default: 1528 return NV_ENC_BUFFER_FORMAT_UNDEFINED; 1529 } 1530} 1531 1532static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) 1533{ 1534 NvencContext *ctx = avctx->priv_data; 1535 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 1536 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 1537 NvencSurface* tmp_surface = &ctx->surfaces[idx]; 1538 1539 NVENCSTATUS nv_status; 1540 NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 }; 1541 allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; 1542 1543 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) { 1544 ctx->surfaces[idx].in_ref = av_frame_alloc(); 1545 if (!ctx->surfaces[idx].in_ref) 1546 return AVERROR(ENOMEM); 1547 } else { 1548 NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 }; 1549 1550 ctx->surfaces[idx].format = nvenc_map_buffer_format(ctx->data_pix_fmt); 1551 if (ctx->surfaces[idx].format == NV_ENC_BUFFER_FORMAT_UNDEFINED) { 1552 av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n", 1553 av_get_pix_fmt_name(ctx->data_pix_fmt)); 1554 return AVERROR(EINVAL); 1555 } 1556 1557 allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER; 1558 allocSurf.width = avctx->width; 1559 allocSurf.height = avctx->height; 1560 allocSurf.bufferFmt = ctx->surfaces[idx].format; 1561 1562 nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf); 1563 if (nv_status != NV_ENC_SUCCESS) { 1564 return nvenc_print_error(avctx, nv_status, "CreateInputBuffer failed"); 1565 } 1566 1567 ctx->surfaces[idx].input_surface = allocSurf.inputBuffer; 1568 ctx->surfaces[idx].width = allocSurf.width; 1569 ctx->surfaces[idx].height = allocSurf.height; 1570 } 1571 1572 nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut); 1573 if (nv_status != NV_ENC_SUCCESS) { 1574 int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed"); 1575 if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11) 1576 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface); 1577 av_frame_free(&ctx->surfaces[idx].in_ref); 1578 return err; 1579 } 1580 1581 ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer; 1582 1583 av_fifo_write(ctx->unused_surface_queue, &tmp_surface, 1); 1584 1585 return 0; 1586} 1587 1588static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx) 1589{ 1590 NvencContext *ctx = avctx->priv_data; 1591 int i, res = 0, res2; 1592 1593 ctx->surfaces = av_calloc(ctx->nb_surfaces, sizeof(*ctx->surfaces)); 1594 if (!ctx->surfaces) 1595 return AVERROR(ENOMEM); 1596 1597 ctx->timestamp_list = av_fifo_alloc2(ctx->nb_surfaces, sizeof(int64_t), 0); 1598 if (!ctx->timestamp_list) 1599 return AVERROR(ENOMEM); 1600 1601 ctx->unused_surface_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0); 1602 if (!ctx->unused_surface_queue) 1603 return AVERROR(ENOMEM); 1604 1605 ctx->output_surface_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0); 1606 if (!ctx->output_surface_queue) 1607 return AVERROR(ENOMEM); 1608 ctx->output_surface_ready_queue = av_fifo_alloc2(ctx->nb_surfaces, sizeof(NvencSurface*), 0); 1609 if (!ctx->output_surface_ready_queue) 1610 return AVERROR(ENOMEM); 1611 1612 res = nvenc_push_context(avctx); 1613 if (res < 0) 1614 return res; 1615 1616 for (i = 0; i < ctx->nb_surfaces; i++) { 1617 if ((res = nvenc_alloc_surface(avctx, i)) < 0) 1618 goto fail; 1619 } 1620 1621fail: 1622 res2 = nvenc_pop_context(avctx); 1623 if (res2 < 0) 1624 return res2; 1625 1626 return res; 1627} 1628 1629static av_cold int nvenc_setup_extradata(AVCodecContext *avctx) 1630{ 1631 NvencContext *ctx = avctx->priv_data; 1632 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 1633 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 1634 1635 NVENCSTATUS nv_status; 1636 uint32_t outSize = 0; 1637 char tmpHeader[256]; 1638 NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 }; 1639 payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; 1640 1641 payload.spsppsBuffer = tmpHeader; 1642 payload.inBufferSize = sizeof(tmpHeader); 1643 payload.outSPSPPSPayloadSize = &outSize; 1644 1645 nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload); 1646 if (nv_status != NV_ENC_SUCCESS) { 1647 return nvenc_print_error(avctx, nv_status, "GetSequenceParams failed"); 1648 } 1649 1650 avctx->extradata_size = outSize; 1651 avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE); 1652 1653 if (!avctx->extradata) { 1654 return AVERROR(ENOMEM); 1655 } 1656 1657 memcpy(avctx->extradata, tmpHeader, outSize); 1658 1659 return 0; 1660} 1661 1662av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) 1663{ 1664 NvencContext *ctx = avctx->priv_data; 1665 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 1666 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 1667 int i, res; 1668 1669 /* the encoder has to be flushed before it can be closed */ 1670 if (ctx->nvencoder) { 1671 NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER, 1672 .encodePicFlags = NV_ENC_PIC_FLAG_EOS }; 1673 1674 res = nvenc_push_context(avctx); 1675 if (res < 0) 1676 return res; 1677 1678 p_nvenc->nvEncEncodePicture(ctx->nvencoder, ¶ms); 1679 } 1680 1681 av_fifo_freep2(&ctx->timestamp_list); 1682 av_fifo_freep2(&ctx->output_surface_ready_queue); 1683 av_fifo_freep2(&ctx->output_surface_queue); 1684 av_fifo_freep2(&ctx->unused_surface_queue); 1685 1686 if (ctx->surfaces && (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11)) { 1687 for (i = 0; i < ctx->nb_registered_frames; i++) { 1688 if (ctx->registered_frames[i].mapped) 1689 p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[i].in_map.mappedResource); 1690 if (ctx->registered_frames[i].regptr) 1691 p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr); 1692 } 1693 ctx->nb_registered_frames = 0; 1694 } 1695 1696 if (ctx->surfaces) { 1697 for (i = 0; i < ctx->nb_surfaces; ++i) { 1698 if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11) 1699 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface); 1700 av_frame_free(&ctx->surfaces[i].in_ref); 1701 p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface); 1702 } 1703 } 1704 av_freep(&ctx->surfaces); 1705 ctx->nb_surfaces = 0; 1706 1707 av_frame_free(&ctx->frame); 1708 1709 av_freep(&ctx->sei_data); 1710 1711 if (ctx->nvencoder) { 1712 p_nvenc->nvEncDestroyEncoder(ctx->nvencoder); 1713 1714 res = nvenc_pop_context(avctx); 1715 if (res < 0) 1716 return res; 1717 } 1718 ctx->nvencoder = NULL; 1719 1720 if (ctx->cu_context_internal) 1721 CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal)); 1722 ctx->cu_context = ctx->cu_context_internal = NULL; 1723 1724#if CONFIG_D3D11VA 1725 if (ctx->d3d11_device) { 1726 ID3D11Device_Release(ctx->d3d11_device); 1727 ctx->d3d11_device = NULL; 1728 } 1729#endif 1730 1731 nvenc_free_functions(&dl_fn->nvenc_dl); 1732 cuda_free_functions(&dl_fn->cuda_dl); 1733 1734 dl_fn->nvenc_device_count = 0; 1735 1736 av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n"); 1737 1738 return 0; 1739} 1740 1741av_cold int ff_nvenc_encode_init(AVCodecContext *avctx) 1742{ 1743 NvencContext *ctx = avctx->priv_data; 1744 int ret; 1745 1746 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) { 1747 AVHWFramesContext *frames_ctx; 1748 if (!avctx->hw_frames_ctx) { 1749 av_log(avctx, AV_LOG_ERROR, 1750 "hw_frames_ctx must be set when using GPU frames as input\n"); 1751 return AVERROR(EINVAL); 1752 } 1753 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; 1754 if (frames_ctx->format != avctx->pix_fmt) { 1755 av_log(avctx, AV_LOG_ERROR, 1756 "hw_frames_ctx must match the GPU frame type\n"); 1757 return AVERROR(EINVAL); 1758 } 1759 ctx->data_pix_fmt = frames_ctx->sw_format; 1760 } else { 1761 ctx->data_pix_fmt = avctx->pix_fmt; 1762 } 1763 1764 ctx->frame = av_frame_alloc(); 1765 if (!ctx->frame) 1766 return AVERROR(ENOMEM); 1767 1768 if ((ret = nvenc_load_libraries(avctx)) < 0) 1769 return ret; 1770 1771 if ((ret = nvenc_setup_device(avctx)) < 0) 1772 return ret; 1773 1774 if ((ret = nvenc_setup_encoder(avctx)) < 0) 1775 return ret; 1776 1777 if ((ret = nvenc_setup_surfaces(avctx)) < 0) 1778 return ret; 1779 1780 if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { 1781 if ((ret = nvenc_setup_extradata(avctx)) < 0) 1782 return ret; 1783 } 1784 1785 return 0; 1786} 1787 1788static NvencSurface *get_free_frame(NvencContext *ctx) 1789{ 1790 NvencSurface *tmp_surf; 1791 1792 if (av_fifo_read(ctx->unused_surface_queue, &tmp_surf, 1) < 0) 1793 // queue empty 1794 return NULL; 1795 1796 return tmp_surf; 1797} 1798 1799static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface, 1800 NV_ENC_LOCK_INPUT_BUFFER *lock_buffer_params, const AVFrame *frame) 1801{ 1802 int dst_linesize[4] = { 1803 lock_buffer_params->pitch, 1804 lock_buffer_params->pitch, 1805 lock_buffer_params->pitch, 1806 lock_buffer_params->pitch 1807 }; 1808 uint8_t *dst_data[4]; 1809 int ret; 1810 1811 if (frame->format == AV_PIX_FMT_YUV420P) 1812 dst_linesize[1] = dst_linesize[2] >>= 1; 1813 1814 ret = av_image_fill_pointers(dst_data, frame->format, nv_surface->height, 1815 lock_buffer_params->bufferDataPtr, dst_linesize); 1816 if (ret < 0) 1817 return ret; 1818 1819 if (frame->format == AV_PIX_FMT_YUV420P) 1820 FFSWAP(uint8_t*, dst_data[1], dst_data[2]); 1821 1822 av_image_copy(dst_data, dst_linesize, 1823 (const uint8_t**)frame->data, frame->linesize, frame->format, 1824 avctx->width, avctx->height); 1825 1826 return 0; 1827} 1828 1829static int nvenc_find_free_reg_resource(AVCodecContext *avctx) 1830{ 1831 NvencContext *ctx = avctx->priv_data; 1832 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 1833 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 1834 NVENCSTATUS nv_status; 1835 1836 int i, first_round; 1837 1838 if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) { 1839 for (first_round = 1; first_round >= 0; first_round--) { 1840 for (i = 0; i < ctx->nb_registered_frames; i++) { 1841 if (!ctx->registered_frames[i].mapped) { 1842 if (ctx->registered_frames[i].regptr) { 1843 if (first_round) 1844 continue; 1845 nv_status = p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr); 1846 if (nv_status != NV_ENC_SUCCESS) 1847 return nvenc_print_error(avctx, nv_status, "Failed unregistering unused input resource"); 1848 ctx->registered_frames[i].ptr = NULL; 1849 ctx->registered_frames[i].regptr = NULL; 1850 } 1851 return i; 1852 } 1853 } 1854 } 1855 } else { 1856 return ctx->nb_registered_frames++; 1857 } 1858 1859 av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n"); 1860 return AVERROR(ENOMEM); 1861} 1862 1863static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame) 1864{ 1865 NvencContext *ctx = avctx->priv_data; 1866 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 1867 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 1868 1869 AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data; 1870 NV_ENC_REGISTER_RESOURCE reg = { 0 }; 1871 int i, idx, ret; 1872 1873 for (i = 0; i < ctx->nb_registered_frames; i++) { 1874 if (avctx->pix_fmt == AV_PIX_FMT_CUDA && ctx->registered_frames[i].ptr == frame->data[0]) 1875 return i; 1876 else if (avctx->pix_fmt == AV_PIX_FMT_D3D11 && ctx->registered_frames[i].ptr == frame->data[0] && ctx->registered_frames[i].ptr_index == (intptr_t)frame->data[1]) 1877 return i; 1878 } 1879 1880 idx = nvenc_find_free_reg_resource(avctx); 1881 if (idx < 0) 1882 return idx; 1883 1884 reg.version = NV_ENC_REGISTER_RESOURCE_VER; 1885 reg.width = frames_ctx->width; 1886 reg.height = frames_ctx->height; 1887 reg.pitch = frame->linesize[0]; 1888 reg.resourceToRegister = frame->data[0]; 1889 1890 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { 1891 reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; 1892 } 1893 else if (avctx->pix_fmt == AV_PIX_FMT_D3D11) { 1894 reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; 1895 reg.subResourceIndex = (intptr_t)frame->data[1]; 1896 } 1897 1898 reg.bufferFormat = nvenc_map_buffer_format(frames_ctx->sw_format); 1899 if (reg.bufferFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) { 1900 av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n", 1901 av_get_pix_fmt_name(frames_ctx->sw_format)); 1902 return AVERROR(EINVAL); 1903 } 1904 1905 ret = p_nvenc->nvEncRegisterResource(ctx->nvencoder, ®); 1906 if (ret != NV_ENC_SUCCESS) { 1907 nvenc_print_error(avctx, ret, "Error registering an input resource"); 1908 return AVERROR_UNKNOWN; 1909 } 1910 1911 ctx->registered_frames[idx].ptr = frame->data[0]; 1912 ctx->registered_frames[idx].ptr_index = reg.subResourceIndex; 1913 ctx->registered_frames[idx].regptr = reg.registeredResource; 1914 return idx; 1915} 1916 1917static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame, 1918 NvencSurface *nvenc_frame) 1919{ 1920 NvencContext *ctx = avctx->priv_data; 1921 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 1922 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 1923 1924 int res; 1925 NVENCSTATUS nv_status; 1926 1927 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) { 1928 int reg_idx = nvenc_register_frame(avctx, frame); 1929 if (reg_idx < 0) { 1930 av_log(avctx, AV_LOG_ERROR, "Could not register an input HW frame\n"); 1931 return reg_idx; 1932 } 1933 1934 res = av_frame_ref(nvenc_frame->in_ref, frame); 1935 if (res < 0) 1936 return res; 1937 1938 if (!ctx->registered_frames[reg_idx].mapped) { 1939 ctx->registered_frames[reg_idx].in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER; 1940 ctx->registered_frames[reg_idx].in_map.registeredResource = ctx->registered_frames[reg_idx].regptr; 1941 nv_status = p_nvenc->nvEncMapInputResource(ctx->nvencoder, &ctx->registered_frames[reg_idx].in_map); 1942 if (nv_status != NV_ENC_SUCCESS) { 1943 av_frame_unref(nvenc_frame->in_ref); 1944 return nvenc_print_error(avctx, nv_status, "Error mapping an input resource"); 1945 } 1946 } 1947 1948 ctx->registered_frames[reg_idx].mapped += 1; 1949 1950 nvenc_frame->reg_idx = reg_idx; 1951 nvenc_frame->input_surface = ctx->registered_frames[reg_idx].in_map.mappedResource; 1952 nvenc_frame->format = ctx->registered_frames[reg_idx].in_map.mappedBufferFmt; 1953 nvenc_frame->pitch = frame->linesize[0]; 1954 1955 return 0; 1956 } else { 1957 NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 }; 1958 1959 lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER; 1960 lockBufferParams.inputBuffer = nvenc_frame->input_surface; 1961 1962 nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams); 1963 if (nv_status != NV_ENC_SUCCESS) { 1964 return nvenc_print_error(avctx, nv_status, "Failed locking nvenc input buffer"); 1965 } 1966 1967 nvenc_frame->pitch = lockBufferParams.pitch; 1968 res = nvenc_copy_frame(avctx, nvenc_frame, &lockBufferParams, frame); 1969 1970 nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, nvenc_frame->input_surface); 1971 if (nv_status != NV_ENC_SUCCESS) { 1972 return nvenc_print_error(avctx, nv_status, "Failed unlocking input buffer!"); 1973 } 1974 1975 return res; 1976 } 1977} 1978 1979static void nvenc_codec_specific_pic_params(AVCodecContext *avctx, 1980 NV_ENC_PIC_PARAMS *params, 1981 NV_ENC_SEI_PAYLOAD *sei_data, 1982 int sei_count) 1983{ 1984 NvencContext *ctx = avctx->priv_data; 1985 1986 switch (avctx->codec->id) { 1987 case AV_CODEC_ID_H264: 1988 params->codecPicParams.h264PicParams.sliceMode = 1989 ctx->encode_config.encodeCodecConfig.h264Config.sliceMode; 1990 params->codecPicParams.h264PicParams.sliceModeData = 1991 ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData; 1992 if (sei_count > 0) { 1993 params->codecPicParams.h264PicParams.seiPayloadArray = sei_data; 1994 params->codecPicParams.h264PicParams.seiPayloadArrayCnt = sei_count; 1995 } 1996 1997 break; 1998 case AV_CODEC_ID_HEVC: 1999 params->codecPicParams.hevcPicParams.sliceMode = 2000 ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode; 2001 params->codecPicParams.hevcPicParams.sliceModeData = 2002 ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData; 2003 if (sei_count > 0) { 2004 params->codecPicParams.hevcPicParams.seiPayloadArray = sei_data; 2005 params->codecPicParams.hevcPicParams.seiPayloadArrayCnt = sei_count; 2006 } 2007 2008 break; 2009 } 2010} 2011 2012static inline void timestamp_queue_enqueue(AVFifo *queue, int64_t timestamp) 2013{ 2014 av_fifo_write(queue, ×tamp, 1); 2015} 2016 2017static inline int64_t timestamp_queue_dequeue(AVFifo *queue) 2018{ 2019 int64_t timestamp = AV_NOPTS_VALUE; 2020 // The following call might fail if the queue is empty. 2021 av_fifo_read(queue, ×tamp, 1); 2022 2023 return timestamp; 2024} 2025 2026static int nvenc_set_timestamp(AVCodecContext *avctx, 2027 NV_ENC_LOCK_BITSTREAM *params, 2028 AVPacket *pkt) 2029{ 2030 NvencContext *ctx = avctx->priv_data; 2031 2032 pkt->pts = params->outputTimeStamp; 2033 pkt->dts = timestamp_queue_dequeue(ctx->timestamp_list); 2034 2035 pkt->dts -= FFMAX(ctx->encode_config.frameIntervalP - 1, 0) * FFMAX(avctx->ticks_per_frame, 1) * FFMAX(avctx->time_base.num, 1); 2036 2037 return 0; 2038} 2039 2040static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSurface *tmpoutsurf) 2041{ 2042 NvencContext *ctx = avctx->priv_data; 2043 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 2044 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 2045 2046 uint32_t slice_mode_data; 2047 uint32_t *slice_offsets = NULL; 2048 NV_ENC_LOCK_BITSTREAM lock_params = { 0 }; 2049 NVENCSTATUS nv_status; 2050 int res = 0; 2051 2052 enum AVPictureType pict_type; 2053 2054 switch (avctx->codec->id) { 2055 case AV_CODEC_ID_H264: 2056 slice_mode_data = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData; 2057 break; 2058 case AV_CODEC_ID_H265: 2059 slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData; 2060 break; 2061 default: 2062 av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n"); 2063 res = AVERROR(EINVAL); 2064 goto error; 2065 } 2066 slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets)); 2067 2068 if (!slice_offsets) { 2069 res = AVERROR(ENOMEM); 2070 goto error; 2071 } 2072 2073 lock_params.version = NV_ENC_LOCK_BITSTREAM_VER; 2074 2075 lock_params.doNotWait = 0; 2076 lock_params.outputBitstream = tmpoutsurf->output_surface; 2077 lock_params.sliceOffsets = slice_offsets; 2078 2079 nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params); 2080 if (nv_status != NV_ENC_SUCCESS) { 2081 res = nvenc_print_error(avctx, nv_status, "Failed locking bitstream buffer"); 2082 goto error; 2083 } 2084 2085 res = ff_get_encode_buffer(avctx, pkt, lock_params.bitstreamSizeInBytes, 0); 2086 2087 if (res < 0) { 2088 p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface); 2089 goto error; 2090 } 2091 2092 memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes); 2093 2094 nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface); 2095 if (nv_status != NV_ENC_SUCCESS) { 2096 res = nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open"); 2097 goto error; 2098 } 2099 2100 2101 if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) { 2102 ctx->registered_frames[tmpoutsurf->reg_idx].mapped -= 1; 2103 if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped == 0) { 2104 nv_status = p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->registered_frames[tmpoutsurf->reg_idx].in_map.mappedResource); 2105 if (nv_status != NV_ENC_SUCCESS) { 2106 res = nvenc_print_error(avctx, nv_status, "Failed unmapping input resource"); 2107 goto error; 2108 } 2109 } else if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped < 0) { 2110 res = AVERROR_BUG; 2111 goto error; 2112 } 2113 2114 av_frame_unref(tmpoutsurf->in_ref); 2115 2116 tmpoutsurf->input_surface = NULL; 2117 } 2118 2119 switch (lock_params.pictureType) { 2120 case NV_ENC_PIC_TYPE_IDR: 2121 pkt->flags |= AV_PKT_FLAG_KEY; 2122 case NV_ENC_PIC_TYPE_I: 2123 pict_type = AV_PICTURE_TYPE_I; 2124 break; 2125 case NV_ENC_PIC_TYPE_P: 2126 pict_type = AV_PICTURE_TYPE_P; 2127 break; 2128 case NV_ENC_PIC_TYPE_B: 2129 pict_type = AV_PICTURE_TYPE_B; 2130 break; 2131 case NV_ENC_PIC_TYPE_BI: 2132 pict_type = AV_PICTURE_TYPE_BI; 2133 break; 2134 default: 2135 av_log(avctx, AV_LOG_ERROR, "Unknown picture type encountered, expect the output to be broken.\n"); 2136 av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n"); 2137 res = AVERROR_EXTERNAL; 2138 goto error; 2139 } 2140 2141 ff_side_data_set_encoder_stats(pkt, 2142 (lock_params.frameAvgQP - 1) * FF_QP2LAMBDA, NULL, 0, pict_type); 2143 2144 res = nvenc_set_timestamp(avctx, &lock_params, pkt); 2145 if (res < 0) 2146 goto error2; 2147 2148 av_free(slice_offsets); 2149 2150 return 0; 2151 2152error: 2153 timestamp_queue_dequeue(ctx->timestamp_list); 2154 2155error2: 2156 av_free(slice_offsets); 2157 2158 return res; 2159} 2160 2161static int output_ready(AVCodecContext *avctx, int flush) 2162{ 2163 NvencContext *ctx = avctx->priv_data; 2164 int nb_ready, nb_pending; 2165 2166 nb_ready = av_fifo_can_read(ctx->output_surface_ready_queue); 2167 nb_pending = av_fifo_can_read(ctx->output_surface_queue); 2168 if (flush) 2169 return nb_ready > 0; 2170 return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth); 2171} 2172 2173static int prepare_sei_data_array(AVCodecContext *avctx, const AVFrame *frame) 2174{ 2175 NvencContext *ctx = avctx->priv_data; 2176 int sei_count = 0; 2177 int i, res; 2178 2179 if (ctx->a53_cc && av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) { 2180 void *a53_data = NULL; 2181 size_t a53_size = 0; 2182 2183 if (ff_alloc_a53_sei(frame, 0, &a53_data, &a53_size) < 0) { 2184 av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n"); 2185 } 2186 2187 if (a53_data) { 2188 void *tmp = av_fast_realloc(ctx->sei_data, 2189 &ctx->sei_data_size, 2190 (sei_count + 1) * sizeof(*ctx->sei_data)); 2191 if (!tmp) { 2192 av_free(a53_data); 2193 res = AVERROR(ENOMEM); 2194 goto error; 2195 } else { 2196 ctx->sei_data = tmp; 2197 ctx->sei_data[sei_count].payloadSize = (uint32_t)a53_size; 2198 ctx->sei_data[sei_count].payloadType = 4; 2199 ctx->sei_data[sei_count].payload = (uint8_t*)a53_data; 2200 sei_count++; 2201 } 2202 } 2203 } 2204 2205 if (ctx->s12m_tc && av_frame_get_side_data(frame, AV_FRAME_DATA_S12M_TIMECODE)) { 2206 void *tc_data = NULL; 2207 size_t tc_size = 0; 2208 2209 if (ff_alloc_timecode_sei(frame, avctx->framerate, 0, &tc_data, &tc_size) < 0) { 2210 av_log(ctx, AV_LOG_ERROR, "Not enough memory for timecode sei, skipping\n"); 2211 } 2212 2213 if (tc_data) { 2214 void *tmp = av_fast_realloc(ctx->sei_data, 2215 &ctx->sei_data_size, 2216 (sei_count + 1) * sizeof(*ctx->sei_data)); 2217 if (!tmp) { 2218 av_free(tc_data); 2219 res = AVERROR(ENOMEM); 2220 goto error; 2221 } else { 2222 ctx->sei_data = tmp; 2223 ctx->sei_data[sei_count].payloadSize = (uint32_t)tc_size; 2224 ctx->sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE; 2225 ctx->sei_data[sei_count].payload = (uint8_t*)tc_data; 2226 sei_count++; 2227 } 2228 } 2229 } 2230 2231 if (!ctx->udu_sei) 2232 return sei_count; 2233 2234 for (i = 0; i < frame->nb_side_data; i++) { 2235 AVFrameSideData *side_data = frame->side_data[i]; 2236 void *tmp; 2237 2238 if (side_data->type != AV_FRAME_DATA_SEI_UNREGISTERED) 2239 continue; 2240 2241 tmp = av_fast_realloc(ctx->sei_data, 2242 &ctx->sei_data_size, 2243 (sei_count + 1) * sizeof(*ctx->sei_data)); 2244 if (!tmp) { 2245 res = AVERROR(ENOMEM); 2246 goto error; 2247 } else { 2248 ctx->sei_data = tmp; 2249 ctx->sei_data[sei_count].payloadSize = side_data->size; 2250 ctx->sei_data[sei_count].payloadType = SEI_TYPE_USER_DATA_UNREGISTERED; 2251 ctx->sei_data[sei_count].payload = av_memdup(side_data->data, side_data->size); 2252 2253 if (!ctx->sei_data[sei_count].payload) { 2254 res = AVERROR(ENOMEM); 2255 goto error; 2256 } 2257 2258 sei_count++; 2259 } 2260 } 2261 2262 return sei_count; 2263 2264error: 2265 for (i = 0; i < sei_count; i++) 2266 av_freep(&(ctx->sei_data[i].payload)); 2267 2268 return res; 2269} 2270 2271static void reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame) 2272{ 2273 NvencContext *ctx = avctx->priv_data; 2274 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs; 2275 NVENCSTATUS ret; 2276 2277 NV_ENC_RECONFIGURE_PARAMS params = { 0 }; 2278 int needs_reconfig = 0; 2279 int needs_encode_config = 0; 2280 int reconfig_bitrate = 0, reconfig_dar = 0; 2281 int dw, dh; 2282 2283 params.version = NV_ENC_RECONFIGURE_PARAMS_VER; 2284 params.reInitEncodeParams = ctx->init_encode_params; 2285 2286 compute_dar(avctx, &dw, &dh); 2287 if (dw != ctx->init_encode_params.darWidth || dh != ctx->init_encode_params.darHeight) { 2288 av_log(avctx, AV_LOG_VERBOSE, 2289 "aspect ratio change (DAR): %d:%d -> %d:%d\n", 2290 ctx->init_encode_params.darWidth, 2291 ctx->init_encode_params.darHeight, dw, dh); 2292 2293 params.reInitEncodeParams.darHeight = dh; 2294 params.reInitEncodeParams.darWidth = dw; 2295 2296 needs_reconfig = 1; 2297 reconfig_dar = 1; 2298 } 2299 2300 if (ctx->rc != NV_ENC_PARAMS_RC_CONSTQP && ctx->support_dyn_bitrate) { 2301 if (avctx->bit_rate > 0 && params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate != avctx->bit_rate) { 2302 av_log(avctx, AV_LOG_VERBOSE, 2303 "avg bitrate change: %d -> %d\n", 2304 params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate, 2305 (uint32_t)avctx->bit_rate); 2306 2307 params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate = avctx->bit_rate; 2308 reconfig_bitrate = 1; 2309 } 2310 2311 if (avctx->rc_max_rate > 0 && ctx->encode_config.rcParams.maxBitRate != avctx->rc_max_rate) { 2312 av_log(avctx, AV_LOG_VERBOSE, 2313 "max bitrate change: %d -> %d\n", 2314 params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate, 2315 (uint32_t)avctx->rc_max_rate); 2316 2317 params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate = avctx->rc_max_rate; 2318 reconfig_bitrate = 1; 2319 } 2320 2321 if (avctx->rc_buffer_size > 0 && ctx->encode_config.rcParams.vbvBufferSize != avctx->rc_buffer_size) { 2322 av_log(avctx, AV_LOG_VERBOSE, 2323 "vbv buffer size change: %d -> %d\n", 2324 params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize, 2325 avctx->rc_buffer_size); 2326 2327 params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize = avctx->rc_buffer_size; 2328 reconfig_bitrate = 1; 2329 } 2330 2331 if (reconfig_bitrate) { 2332 params.resetEncoder = 1; 2333 params.forceIDR = 1; 2334 2335 needs_encode_config = 1; 2336 needs_reconfig = 1; 2337 } 2338 } 2339 2340 if (!needs_encode_config) 2341 params.reInitEncodeParams.encodeConfig = NULL; 2342 2343 if (needs_reconfig) { 2344 ret = p_nvenc->nvEncReconfigureEncoder(ctx->nvencoder, ¶ms); 2345 if (ret != NV_ENC_SUCCESS) { 2346 nvenc_print_error(avctx, ret, "failed to reconfigure nvenc"); 2347 } else { 2348 if (reconfig_dar) { 2349 ctx->init_encode_params.darHeight = dh; 2350 ctx->init_encode_params.darWidth = dw; 2351 } 2352 2353 if (reconfig_bitrate) { 2354 ctx->encode_config.rcParams.averageBitRate = params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate; 2355 ctx->encode_config.rcParams.maxBitRate = params.reInitEncodeParams.encodeConfig->rcParams.maxBitRate; 2356 ctx->encode_config.rcParams.vbvBufferSize = params.reInitEncodeParams.encodeConfig->rcParams.vbvBufferSize; 2357 } 2358 2359 } 2360 } 2361} 2362 2363static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame) 2364{ 2365 NVENCSTATUS nv_status; 2366 NvencSurface *tmp_out_surf, *in_surf; 2367 int res, res2; 2368 int sei_count = 0; 2369 int i; 2370 2371 NvencContext *ctx = avctx->priv_data; 2372 NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; 2373 NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; 2374 2375 NV_ENC_PIC_PARAMS pic_params = { 0 }; 2376 pic_params.version = NV_ENC_PIC_PARAMS_VER; 2377 2378 if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder) 2379 return AVERROR(EINVAL); 2380 2381 if (frame && frame->buf[0]) { 2382 in_surf = get_free_frame(ctx); 2383 if (!in_surf) 2384 return AVERROR(EAGAIN); 2385 2386 res = nvenc_push_context(avctx); 2387 if (res < 0) 2388 return res; 2389 2390 reconfig_encoder(avctx, frame); 2391 2392 res = nvenc_upload_frame(avctx, frame, in_surf); 2393 2394 res2 = nvenc_pop_context(avctx); 2395 if (res2 < 0) 2396 return res2; 2397 2398 if (res) 2399 return res; 2400 2401 pic_params.inputBuffer = in_surf->input_surface; 2402 pic_params.bufferFmt = in_surf->format; 2403 pic_params.inputWidth = in_surf->width; 2404 pic_params.inputHeight = in_surf->height; 2405 pic_params.inputPitch = in_surf->pitch; 2406 pic_params.outputBitstream = in_surf->output_surface; 2407 2408 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { 2409 if (frame->top_field_first) 2410 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM; 2411 else 2412 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP; 2413 } else { 2414 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; 2415 } 2416 2417 if (ctx->forced_idr >= 0 && frame->pict_type == AV_PICTURE_TYPE_I) { 2418 pic_params.encodePicFlags = 2419 ctx->forced_idr ? NV_ENC_PIC_FLAG_FORCEIDR : NV_ENC_PIC_FLAG_FORCEINTRA; 2420 } else { 2421 pic_params.encodePicFlags = 0; 2422 } 2423 2424 pic_params.inputTimeStamp = frame->pts; 2425 2426 if (ctx->extra_sei) { 2427 res = prepare_sei_data_array(avctx, frame); 2428 if (res < 0) 2429 return res; 2430 sei_count = res; 2431 } 2432 2433 nvenc_codec_specific_pic_params(avctx, &pic_params, ctx->sei_data, sei_count); 2434 } else { 2435 pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS; 2436 } 2437 2438 res = nvenc_push_context(avctx); 2439 if (res < 0) 2440 return res; 2441 2442 nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params); 2443 2444 for (i = 0; i < sei_count; i++) 2445 av_freep(&(ctx->sei_data[i].payload)); 2446 2447 res = nvenc_pop_context(avctx); 2448 if (res < 0) 2449 return res; 2450 2451 if (nv_status != NV_ENC_SUCCESS && 2452 nv_status != NV_ENC_ERR_NEED_MORE_INPUT) 2453 return nvenc_print_error(avctx, nv_status, "EncodePicture failed!"); 2454 2455 if (frame && frame->buf[0]) { 2456 av_fifo_write(ctx->output_surface_queue, &in_surf, 1); 2457 timestamp_queue_enqueue(ctx->timestamp_list, frame->pts); 2458 } 2459 2460 /* all the pending buffers are now ready for output */ 2461 if (nv_status == NV_ENC_SUCCESS) { 2462 while (av_fifo_read(ctx->output_surface_queue, &tmp_out_surf, 1) >= 0) 2463 av_fifo_write(ctx->output_surface_ready_queue, &tmp_out_surf, 1); 2464 } 2465 2466 return 0; 2467} 2468 2469int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt) 2470{ 2471 NvencSurface *tmp_out_surf; 2472 int res, res2; 2473 2474 NvencContext *ctx = avctx->priv_data; 2475 2476 AVFrame *frame = ctx->frame; 2477 2478 if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder) 2479 return AVERROR(EINVAL); 2480 2481 if (!frame->buf[0]) { 2482 res = ff_encode_get_frame(avctx, frame); 2483 if (res < 0 && res != AVERROR_EOF) 2484 return res; 2485 } 2486 2487 res = nvenc_send_frame(avctx, frame); 2488 if (res < 0) { 2489 if (res != AVERROR(EAGAIN)) 2490 return res; 2491 } else 2492 av_frame_unref(frame); 2493 2494 if (output_ready(avctx, avctx->internal->draining)) { 2495 av_fifo_read(ctx->output_surface_ready_queue, &tmp_out_surf, 1); 2496 2497 res = nvenc_push_context(avctx); 2498 if (res < 0) 2499 return res; 2500 2501 res = process_output_surface(avctx, pkt, tmp_out_surf); 2502 2503 res2 = nvenc_pop_context(avctx); 2504 if (res2 < 0) 2505 return res2; 2506 2507 if (res) 2508 return res; 2509 2510 av_fifo_write(ctx->unused_surface_queue, &tmp_out_surf, 1); 2511 } else if (avctx->internal->draining) { 2512 return AVERROR_EOF; 2513 } else { 2514 return AVERROR(EAGAIN); 2515 } 2516 2517 return 0; 2518} 2519 2520av_cold void ff_nvenc_encode_flush(AVCodecContext *avctx) 2521{ 2522 NvencContext *ctx = avctx->priv_data; 2523 2524 nvenc_send_frame(avctx, NULL); 2525 av_fifo_reset2(ctx->timestamp_list); 2526} 2527