1/* 2 * VC3/DNxHD encoder 3 * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com> 4 * Copyright (c) 2011 MirriAd Ltd 5 * 6 * VC-3 encoder funded by the British Broadcasting Corporation 7 * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com> 8 * 9 * This file is part of FFmpeg. 10 * 11 * FFmpeg is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * FFmpeg is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with FFmpeg; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24 */ 25 26#include "libavutil/attributes.h" 27#include "libavutil/internal.h" 28#include "libavutil/mem_internal.h" 29#include "libavutil/opt.h" 30 31#include "avcodec.h" 32#include "blockdsp.h" 33#include "codec_internal.h" 34#include "encode.h" 35#include "fdctdsp.h" 36#include "mathops.h" 37#include "mpegvideo.h" 38#include "mpegvideoenc.h" 39#include "pixblockdsp.h" 40#include "packet_internal.h" 41#include "profiles.h" 42#include "dnxhdenc.h" 43 44// The largest value that will not lead to overflow for 10-bit samples. 45#define DNX10BIT_QMAT_SHIFT 18 46#define RC_VARIANCE 1 // use variance or ssd for fast rc 47#define LAMBDA_FRAC_BITS 10 48 49#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM 50static const AVOption options[] = { 51 { "nitris_compat", "encode with Avid Nitris compatibility", 52 offsetof(DNXHDEncContext, nitris_compat), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, 53 { "ibias", "intra quant bias", 54 offsetof(DNXHDEncContext, intra_quant_bias), AV_OPT_TYPE_INT, 55 { .i64 = 0 }, INT_MIN, INT_MAX, VE }, 56 { "profile", NULL, offsetof(DNXHDEncContext, profile), AV_OPT_TYPE_INT, 57 { .i64 = FF_PROFILE_DNXHD }, 58 FF_PROFILE_DNXHD, FF_PROFILE_DNXHR_444, VE, "profile" }, 59 { "dnxhd", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHD }, 60 0, 0, VE, "profile" }, 61 { "dnxhr_444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_444 }, 62 0, 0, VE, "profile" }, 63 { "dnxhr_hqx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_HQX }, 64 0, 0, VE, "profile" }, 65 { "dnxhr_hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_HQ }, 66 0, 0, VE, "profile" }, 67 { "dnxhr_sq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_SQ }, 68 0, 0, VE, "profile" }, 69 { "dnxhr_lb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_LB }, 70 0, 0, VE, "profile" }, 71 { NULL } 72}; 73 74static const AVClass dnxhd_class = { 75 .class_name = "dnxhd", 76 .item_name = av_default_item_name, 77 .option = options, 78 .version = LIBAVUTIL_VERSION_INT, 79}; 80 81static void dnxhd_8bit_get_pixels_8x4_sym(int16_t *av_restrict block, 82 const uint8_t *pixels, 83 ptrdiff_t line_size) 84{ 85 int i; 86 for (i = 0; i < 4; i++) { 87 block[0] = pixels[0]; 88 block[1] = pixels[1]; 89 block[2] = pixels[2]; 90 block[3] = pixels[3]; 91 block[4] = pixels[4]; 92 block[5] = pixels[5]; 93 block[6] = pixels[6]; 94 block[7] = pixels[7]; 95 pixels += line_size; 96 block += 8; 97 } 98 memcpy(block, block - 8, sizeof(*block) * 8); 99 memcpy(block + 8, block - 16, sizeof(*block) * 8); 100 memcpy(block + 16, block - 24, sizeof(*block) * 8); 101 memcpy(block + 24, block - 32, sizeof(*block) * 8); 102} 103 104static av_always_inline 105void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block, 106 const uint8_t *pixels, 107 ptrdiff_t line_size) 108{ 109 memcpy(block + 0 * 8, pixels + 0 * line_size, 8 * sizeof(*block)); 110 memcpy(block + 7 * 8, pixels + 0 * line_size, 8 * sizeof(*block)); 111 memcpy(block + 1 * 8, pixels + 1 * line_size, 8 * sizeof(*block)); 112 memcpy(block + 6 * 8, pixels + 1 * line_size, 8 * sizeof(*block)); 113 memcpy(block + 2 * 8, pixels + 2 * line_size, 8 * sizeof(*block)); 114 memcpy(block + 5 * 8, pixels + 2 * line_size, 8 * sizeof(*block)); 115 memcpy(block + 3 * 8, pixels + 3 * line_size, 8 * sizeof(*block)); 116 memcpy(block + 4 * 8, pixels + 3 * line_size, 8 * sizeof(*block)); 117} 118 119static int dnxhd_10bit_dct_quantize_444(MpegEncContext *ctx, int16_t *block, 120 int n, int qscale, int *overflow) 121{ 122 int i, j, level, last_non_zero, start_i; 123 const int *qmat; 124 const uint8_t *scantable= ctx->intra_scantable.scantable; 125 int bias; 126 int max = 0; 127 unsigned int threshold1, threshold2; 128 129 ctx->fdsp.fdct(block); 130 131 block[0] = (block[0] + 2) >> 2; 132 start_i = 1; 133 last_non_zero = 0; 134 qmat = n < 4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale]; 135 bias= ctx->intra_quant_bias * (1 << (16 - 8)); 136 threshold1 = (1 << 16) - bias - 1; 137 threshold2 = (threshold1 << 1); 138 139 for (i = 63; i >= start_i; i--) { 140 j = scantable[i]; 141 level = block[j] * qmat[j]; 142 143 if (((unsigned)(level + threshold1)) > threshold2) { 144 last_non_zero = i; 145 break; 146 } else{ 147 block[j]=0; 148 } 149 } 150 151 for (i = start_i; i <= last_non_zero; i++) { 152 j = scantable[i]; 153 level = block[j] * qmat[j]; 154 155 if (((unsigned)(level + threshold1)) > threshold2) { 156 if (level > 0) { 157 level = (bias + level) >> 16; 158 block[j] = level; 159 } else{ 160 level = (bias - level) >> 16; 161 block[j] = -level; 162 } 163 max |= level; 164 } else { 165 block[j] = 0; 166 } 167 } 168 *overflow = ctx->max_qcoeff < max; //overflow might have happened 169 170 /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ 171 if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE) 172 ff_block_permute(block, ctx->idsp.idct_permutation, 173 scantable, last_non_zero); 174 175 return last_non_zero; 176} 177 178static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block, 179 int n, int qscale, int *overflow) 180{ 181 const uint8_t *scantable= ctx->intra_scantable.scantable; 182 const int *qmat = n<4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale]; 183 int last_non_zero = 0; 184 int i; 185 186 ctx->fdsp.fdct(block); 187 188 // Divide by 4 with rounding, to compensate scaling of DCT coefficients 189 block[0] = (block[0] + 2) >> 2; 190 191 for (i = 1; i < 64; ++i) { 192 int j = scantable[i]; 193 int sign = FF_SIGNBIT(block[j]); 194 int level = (block[j] ^ sign) - sign; 195 level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT; 196 block[j] = (level ^ sign) - sign; 197 if (level) 198 last_non_zero = i; 199 } 200 201 /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ 202 if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE) 203 ff_block_permute(block, ctx->idsp.idct_permutation, 204 scantable, last_non_zero); 205 206 return last_non_zero; 207} 208 209static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx) 210{ 211 int i, j, level, run; 212 int max_level = 1 << (ctx->bit_depth + 2); 213 214 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->orig_vlc_codes, max_level * 4) || 215 !FF_ALLOCZ_TYPED_ARRAY(ctx->orig_vlc_bits, max_level * 4) || 216 !(ctx->run_codes = av_mallocz(63 * 2)) || 217 !(ctx->run_bits = av_mallocz(63))) 218 return AVERROR(ENOMEM); 219 ctx->vlc_codes = ctx->orig_vlc_codes + max_level * 2; 220 ctx->vlc_bits = ctx->orig_vlc_bits + max_level * 2; 221 for (level = -max_level; level < max_level; level++) { 222 for (run = 0; run < 2; run++) { 223 int index = level * (1 << 1) | run; 224 int sign, offset = 0, alevel = level; 225 226 MASK_ABS(sign, alevel); 227 if (alevel > 64) { 228 offset = (alevel - 1) >> 6; 229 alevel -= offset << 6; 230 } 231 for (j = 0; j < 257; j++) { 232 if (ctx->cid_table->ac_info[2*j+0] >> 1 == alevel && 233 (!offset || (ctx->cid_table->ac_info[2*j+1] & 1) && offset) && 234 (!run || (ctx->cid_table->ac_info[2*j+1] & 2) && run)) { 235 av_assert1(!ctx->vlc_codes[index]); 236 if (alevel) { 237 ctx->vlc_codes[index] = 238 (ctx->cid_table->ac_codes[j] << 1) | (sign & 1); 239 ctx->vlc_bits[index] = ctx->cid_table->ac_bits[j] + 1; 240 } else { 241 ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j]; 242 ctx->vlc_bits[index] = ctx->cid_table->ac_bits[j]; 243 } 244 break; 245 } 246 } 247 av_assert0(!alevel || j < 257); 248 if (offset) { 249 ctx->vlc_codes[index] = 250 (ctx->vlc_codes[index] << ctx->cid_table->index_bits) | offset; 251 ctx->vlc_bits[index] += ctx->cid_table->index_bits; 252 } 253 } 254 } 255 for (i = 0; i < 62; i++) { 256 int run = ctx->cid_table->run[i]; 257 av_assert0(run < 63); 258 ctx->run_codes[run] = ctx->cid_table->run_codes[i]; 259 ctx->run_bits[run] = ctx->cid_table->run_bits[i]; 260 } 261 return 0; 262} 263 264static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias) 265{ 266 // init first elem to 1 to avoid div by 0 in convert_matrix 267 uint16_t weight_matrix[64] = { 1, }; // convert_matrix needs uint16_t* 268 int qscale, i; 269 const uint8_t *luma_weight_table = ctx->cid_table->luma_weight; 270 const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight; 271 272 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_l, ctx->m.avctx->qmax + 1) || 273 !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_c, ctx->m.avctx->qmax + 1) || 274 !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_l16, ctx->m.avctx->qmax + 1) || 275 !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_c16, ctx->m.avctx->qmax + 1)) 276 return AVERROR(ENOMEM); 277 278 if (ctx->bit_depth == 8) { 279 for (i = 1; i < 64; i++) { 280 int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]]; 281 weight_matrix[j] = ctx->cid_table->luma_weight[i]; 282 } 283 ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16, 284 weight_matrix, ctx->intra_quant_bias, 1, 285 ctx->m.avctx->qmax, 1); 286 for (i = 1; i < 64; i++) { 287 int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]]; 288 weight_matrix[j] = ctx->cid_table->chroma_weight[i]; 289 } 290 ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16, 291 weight_matrix, ctx->intra_quant_bias, 1, 292 ctx->m.avctx->qmax, 1); 293 294 for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { 295 for (i = 0; i < 64; i++) { 296 ctx->qmatrix_l[qscale][i] <<= 2; 297 ctx->qmatrix_c[qscale][i] <<= 2; 298 ctx->qmatrix_l16[qscale][0][i] <<= 2; 299 ctx->qmatrix_l16[qscale][1][i] <<= 2; 300 ctx->qmatrix_c16[qscale][0][i] <<= 2; 301 ctx->qmatrix_c16[qscale][1][i] <<= 2; 302 } 303 } 304 } else { 305 // 10-bit 306 for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { 307 for (i = 1; i < 64; i++) { 308 int j = ff_zigzag_direct[i]; 309 310 /* The quantization formula from the VC-3 standard is: 311 * quantized = sign(block[i]) * floor(abs(block[i]/s) * p / 312 * (qscale * weight_table[i])) 313 * Where p is 32 for 8-bit samples and 8 for 10-bit ones. 314 * The s factor compensates scaling of DCT coefficients done by 315 * the DCT routines, and therefore is not present in standard. 316 * It's 8 for 8-bit samples and 4 for 10-bit ones. 317 * We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be: 318 * ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / 319 * (qscale * weight_table[i]) 320 * For 10-bit samples, p / s == 2 */ 321 ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / 322 (qscale * luma_weight_table[i]); 323 ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / 324 (qscale * chroma_weight_table[i]); 325 } 326 } 327 } 328 329 ctx->m.q_chroma_intra_matrix16 = ctx->qmatrix_c16; 330 ctx->m.q_chroma_intra_matrix = ctx->qmatrix_c; 331 ctx->m.q_intra_matrix16 = ctx->qmatrix_l16; 332 ctx->m.q_intra_matrix = ctx->qmatrix_l; 333 334 return 0; 335} 336 337static av_cold int dnxhd_init_rc(DNXHDEncContext *ctx) 338{ 339 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->mb_rc, (ctx->m.avctx->qmax + 1) * ctx->m.mb_num)) 340 return AVERROR(ENOMEM); 341 342 if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD) { 343 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->mb_cmp, ctx->m.mb_num) || 344 !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_cmp_tmp, ctx->m.mb_num)) 345 return AVERROR(ENOMEM); 346 } 347 ctx->frame_bits = (ctx->coding_unit_size - 348 ctx->data_offset - 4 - ctx->min_padding) * 8; 349 ctx->qscale = 1; 350 ctx->lambda = 2 << LAMBDA_FRAC_BITS; // qscale 2 351 return 0; 352} 353 354static av_cold int dnxhd_encode_init(AVCodecContext *avctx) 355{ 356 DNXHDEncContext *ctx = avctx->priv_data; 357 int i, ret; 358 359 switch (avctx->pix_fmt) { 360 case AV_PIX_FMT_YUV422P: 361 ctx->bit_depth = 8; 362 break; 363 case AV_PIX_FMT_YUV422P10: 364 case AV_PIX_FMT_YUV444P10: 365 case AV_PIX_FMT_GBRP10: 366 ctx->bit_depth = 10; 367 break; 368 } 369 370 if ((ctx->profile == FF_PROFILE_DNXHR_444 && (avctx->pix_fmt != AV_PIX_FMT_YUV444P10 && 371 avctx->pix_fmt != AV_PIX_FMT_GBRP10)) || 372 (ctx->profile != FF_PROFILE_DNXHR_444 && (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || 373 avctx->pix_fmt == AV_PIX_FMT_GBRP10))) { 374 av_log(avctx, AV_LOG_ERROR, 375 "pixel format is incompatible with DNxHD profile\n"); 376 return AVERROR(EINVAL); 377 } 378 379 if (ctx->profile == FF_PROFILE_DNXHR_HQX && avctx->pix_fmt != AV_PIX_FMT_YUV422P10) { 380 av_log(avctx, AV_LOG_ERROR, 381 "pixel format is incompatible with DNxHR HQX profile\n"); 382 return AVERROR(EINVAL); 383 } 384 385 if ((ctx->profile == FF_PROFILE_DNXHR_LB || 386 ctx->profile == FF_PROFILE_DNXHR_SQ || 387 ctx->profile == FF_PROFILE_DNXHR_HQ) && avctx->pix_fmt != AV_PIX_FMT_YUV422P) { 388 av_log(avctx, AV_LOG_ERROR, 389 "pixel format is incompatible with DNxHR LB/SQ/HQ profile\n"); 390 return AVERROR(EINVAL); 391 } 392 393 ctx->is_444 = ctx->profile == FF_PROFILE_DNXHR_444; 394 avctx->profile = ctx->profile; 395 ctx->cid = ff_dnxhd_find_cid(avctx, ctx->bit_depth); 396 if (!ctx->cid) { 397 av_log(avctx, AV_LOG_ERROR, 398 "video parameters incompatible with DNxHD. Valid DNxHD profiles:\n"); 399 ff_dnxhd_print_profiles(avctx, AV_LOG_ERROR); 400 return AVERROR(EINVAL); 401 } 402 av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid); 403 404 if (ctx->cid >= 1270 && ctx->cid <= 1274) 405 avctx->codec_tag = MKTAG('A','V','d','h'); 406 407 if (avctx->width < 256 || avctx->height < 120) { 408 av_log(avctx, AV_LOG_ERROR, 409 "Input dimensions too small, input must be at least 256x120\n"); 410 return AVERROR(EINVAL); 411 } 412 413 ctx->cid_table = ff_dnxhd_get_cid_table(ctx->cid); 414 av_assert0(ctx->cid_table); 415 416 ctx->m.avctx = avctx; 417 ctx->m.mb_intra = 1; 418 ctx->m.h263_aic = 1; 419 420 avctx->bits_per_raw_sample = ctx->bit_depth; 421 422 ff_blockdsp_init(&ctx->bdsp, avctx); 423 ff_fdctdsp_init(&ctx->m.fdsp, avctx); 424 ff_mpv_idct_init(&ctx->m); 425 ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); 426 ff_pixblockdsp_init(&ctx->m.pdsp, avctx); 427 ff_dct_encode_init(&ctx->m); 428 429 if (ctx->profile != FF_PROFILE_DNXHD) 430 ff_videodsp_init(&ctx->m.vdsp, ctx->bit_depth); 431 432 if (!ctx->m.dct_quantize) 433 ctx->m.dct_quantize = ff_dct_quantize_c; 434 435 if (ctx->is_444 || ctx->profile == FF_PROFILE_DNXHR_HQX) { 436 ctx->m.dct_quantize = dnxhd_10bit_dct_quantize_444; 437 ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym; 438 ctx->block_width_l2 = 4; 439 } else if (ctx->bit_depth == 10) { 440 ctx->m.dct_quantize = dnxhd_10bit_dct_quantize; 441 ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym; 442 ctx->block_width_l2 = 4; 443 } else { 444 ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym; 445 ctx->block_width_l2 = 3; 446 } 447 448#if ARCH_X86 449 ff_dnxhdenc_init_x86(ctx); 450#endif 451 452 ctx->m.mb_height = (avctx->height + 15) / 16; 453 ctx->m.mb_width = (avctx->width + 15) / 16; 454 455 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { 456 ctx->interlaced = 1; 457 ctx->m.mb_height /= 2; 458 } 459 460 if (ctx->interlaced && ctx->profile != FF_PROFILE_DNXHD) { 461 av_log(avctx, AV_LOG_ERROR, 462 "Interlaced encoding is not supported for DNxHR profiles.\n"); 463 return AVERROR(EINVAL); 464 } 465 466 ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width; 467 468 if (ctx->cid_table->frame_size == DNXHD_VARIABLE) { 469 ctx->frame_size = ff_dnxhd_get_hr_frame_size(ctx->cid, 470 avctx->width, avctx->height); 471 av_assert0(ctx->frame_size >= 0); 472 ctx->coding_unit_size = ctx->frame_size; 473 } else { 474 ctx->frame_size = ctx->cid_table->frame_size; 475 ctx->coding_unit_size = ctx->cid_table->coding_unit_size; 476 } 477 478 if (ctx->m.mb_height > 68) 479 ctx->data_offset = 0x170 + (ctx->m.mb_height << 2); 480 else 481 ctx->data_offset = 0x280; 482 483 // XXX tune lbias/cbias 484 if ((ret = dnxhd_init_qmat(ctx, ctx->intra_quant_bias, 0)) < 0) 485 return ret; 486 487 /* Avid Nitris hardware decoder requires a minimum amount of padding 488 * in the coding unit payload */ 489 if (ctx->nitris_compat) 490 ctx->min_padding = 1600; 491 492 if ((ret = dnxhd_init_vlc(ctx)) < 0) 493 return ret; 494 if ((ret = dnxhd_init_rc(ctx)) < 0) 495 return ret; 496 497 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->slice_size, ctx->m.mb_height) || 498 !FF_ALLOCZ_TYPED_ARRAY(ctx->slice_offs, ctx->m.mb_height) || 499 !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_bits, ctx->m.mb_num) || 500 !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_qscale, ctx->m.mb_num)) 501 return AVERROR(ENOMEM); 502 503 if (avctx->active_thread_type == FF_THREAD_SLICE) { 504 if (avctx->thread_count > MAX_THREADS) { 505 av_log(avctx, AV_LOG_ERROR, "too many threads\n"); 506 return AVERROR(EINVAL); 507 } 508 } 509 510 if (avctx->qmax <= 1) { 511 av_log(avctx, AV_LOG_ERROR, "qmax must be at least 2\n"); 512 return AVERROR(EINVAL); 513 } 514 515 ctx->thread[0] = ctx; 516 if (avctx->active_thread_type == FF_THREAD_SLICE) { 517 for (i = 1; i < avctx->thread_count; i++) { 518 ctx->thread[i] = av_memdup(ctx, sizeof(DNXHDEncContext)); 519 if (!ctx->thread[i]) 520 return AVERROR(ENOMEM); 521 } 522 } 523 524 return 0; 525} 526 527static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf) 528{ 529 DNXHDEncContext *ctx = avctx->priv_data; 530 531 memset(buf, 0, ctx->data_offset); 532 533 // * write prefix */ 534 AV_WB16(buf + 0x02, ctx->data_offset); 535 if (ctx->cid >= 1270 && ctx->cid <= 1274) 536 buf[4] = 0x03; 537 else 538 buf[4] = 0x01; 539 540 buf[5] = ctx->interlaced ? ctx->cur_field + 2 : 0x01; 541 buf[6] = 0x80; // crc flag off 542 buf[7] = 0xa0; // reserved 543 AV_WB16(buf + 0x18, avctx->height >> ctx->interlaced); // ALPF 544 AV_WB16(buf + 0x1a, avctx->width); // SPL 545 AV_WB16(buf + 0x1d, avctx->height >> ctx->interlaced); // NAL 546 547 buf[0x21] = ctx->bit_depth == 10 ? 0x58 : 0x38; 548 buf[0x22] = 0x88 + (ctx->interlaced << 2); 549 AV_WB32(buf + 0x28, ctx->cid); // CID 550 buf[0x2c] = (!ctx->interlaced << 7) | (ctx->is_444 << 6) | (avctx->pix_fmt == AV_PIX_FMT_YUV444P10); 551 552 buf[0x5f] = 0x01; // UDL 553 554 buf[0x167] = 0x02; // reserved 555 AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS 556 AV_WB16(buf + 0x16c, ctx->m.mb_height); // Ns 557 buf[0x16f] = 0x10; // reserved 558 559 ctx->msip = buf + 0x170; 560 return 0; 561} 562 563static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff) 564{ 565 int nbits; 566 if (diff < 0) { 567 nbits = av_log2_16bit(-2 * diff); 568 diff--; 569 } else { 570 nbits = av_log2_16bit(2 * diff); 571 } 572 put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits, 573 (ctx->cid_table->dc_codes[nbits] << nbits) + 574 av_mod_uintp2(diff, nbits)); 575} 576 577static av_always_inline 578void dnxhd_encode_block(DNXHDEncContext *ctx, int16_t *block, 579 int last_index, int n) 580{ 581 int last_non_zero = 0; 582 int slevel, i, j; 583 584 dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]); 585 ctx->m.last_dc[n] = block[0]; 586 587 for (i = 1; i <= last_index; i++) { 588 j = ctx->m.intra_scantable.permutated[i]; 589 slevel = block[j]; 590 if (slevel) { 591 int run_level = i - last_non_zero - 1; 592 int rlevel = slevel * (1 << 1) | !!run_level; 593 put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]); 594 if (run_level) 595 put_bits(&ctx->m.pb, ctx->run_bits[run_level], 596 ctx->run_codes[run_level]); 597 last_non_zero = i; 598 } 599 } 600 put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB 601} 602 603static av_always_inline 604void dnxhd_unquantize_c(DNXHDEncContext *ctx, int16_t *block, int n, 605 int qscale, int last_index) 606{ 607 const uint8_t *weight_matrix; 608 int level; 609 int i; 610 611 if (ctx->is_444) { 612 weight_matrix = ((n % 6) < 2) ? ctx->cid_table->luma_weight 613 : ctx->cid_table->chroma_weight; 614 } else { 615 weight_matrix = (n & 2) ? ctx->cid_table->chroma_weight 616 : ctx->cid_table->luma_weight; 617 } 618 619 for (i = 1; i <= last_index; i++) { 620 int j = ctx->m.intra_scantable.permutated[i]; 621 level = block[j]; 622 if (level) { 623 if (level < 0) { 624 level = (1 - 2 * level) * qscale * weight_matrix[i]; 625 if (ctx->bit_depth == 10) { 626 if (weight_matrix[i] != 8) 627 level += 8; 628 level >>= 4; 629 } else { 630 if (weight_matrix[i] != 32) 631 level += 32; 632 level >>= 6; 633 } 634 level = -level; 635 } else { 636 level = (2 * level + 1) * qscale * weight_matrix[i]; 637 if (ctx->bit_depth == 10) { 638 if (weight_matrix[i] != 8) 639 level += 8; 640 level >>= 4; 641 } else { 642 if (weight_matrix[i] != 32) 643 level += 32; 644 level >>= 6; 645 } 646 } 647 block[j] = level; 648 } 649 } 650} 651 652static av_always_inline int dnxhd_ssd_block(int16_t *qblock, int16_t *block) 653{ 654 int score = 0; 655 int i; 656 for (i = 0; i < 64; i++) 657 score += (block[i] - qblock[i]) * (block[i] - qblock[i]); 658 return score; 659} 660 661static av_always_inline 662int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, int16_t *block, int last_index) 663{ 664 int last_non_zero = 0; 665 int bits = 0; 666 int i, j, level; 667 for (i = 1; i <= last_index; i++) { 668 j = ctx->m.intra_scantable.permutated[i]; 669 level = block[j]; 670 if (level) { 671 int run_level = i - last_non_zero - 1; 672 bits += ctx->vlc_bits[level * (1 << 1) | 673 !!run_level] + ctx->run_bits[run_level]; 674 last_non_zero = i; 675 } 676 } 677 return bits; 678} 679 680static av_always_inline 681void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) 682{ 683 const int bs = ctx->block_width_l2; 684 const int bw = 1 << bs; 685 int dct_y_offset = ctx->dct_y_offset; 686 int dct_uv_offset = ctx->dct_uv_offset; 687 int linesize = ctx->m.linesize; 688 int uvlinesize = ctx->m.uvlinesize; 689 const uint8_t *ptr_y = ctx->thread[0]->src[0] + 690 ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs + 1); 691 const uint8_t *ptr_u = ctx->thread[0]->src[1] + 692 ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444); 693 const uint8_t *ptr_v = ctx->thread[0]->src[2] + 694 ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444); 695 PixblockDSPContext *pdsp = &ctx->m.pdsp; 696 VideoDSPContext *vdsp = &ctx->m.vdsp; 697 698 if (ctx->bit_depth != 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width || 699 (mb_y << 4) + 16 > ctx->m.avctx->height)) { 700 int y_w = ctx->m.avctx->width - (mb_x << 4); 701 int y_h = ctx->m.avctx->height - (mb_y << 4); 702 int uv_w = (y_w + 1) / 2; 703 int uv_h = y_h; 704 linesize = 16; 705 uvlinesize = 8; 706 707 vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y, 708 linesize, ctx->m.linesize, 709 linesize, 16, 710 0, 0, y_w, y_h); 711 vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u, 712 uvlinesize, ctx->m.uvlinesize, 713 uvlinesize, 16, 714 0, 0, uv_w, uv_h); 715 vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v, 716 uvlinesize, ctx->m.uvlinesize, 717 uvlinesize, 16, 718 0, 0, uv_w, uv_h); 719 720 dct_y_offset = bw * linesize; 721 dct_uv_offset = bw * uvlinesize; 722 ptr_y = &ctx->edge_buf_y[0]; 723 ptr_u = &ctx->edge_buf_uv[0][0]; 724 ptr_v = &ctx->edge_buf_uv[1][0]; 725 } else if (ctx->bit_depth == 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width || 726 (mb_y << 4) + 16 > ctx->m.avctx->height)) { 727 int y_w = ctx->m.avctx->width - (mb_x << 4); 728 int y_h = ctx->m.avctx->height - (mb_y << 4); 729 int uv_w = ctx->is_444 ? y_w : (y_w + 1) / 2; 730 int uv_h = y_h; 731 linesize = 32; 732 uvlinesize = 16 + 16 * ctx->is_444; 733 734 vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y, 735 linesize, ctx->m.linesize, 736 linesize / 2, 16, 737 0, 0, y_w, y_h); 738 vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u, 739 uvlinesize, ctx->m.uvlinesize, 740 uvlinesize / 2, 16, 741 0, 0, uv_w, uv_h); 742 vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v, 743 uvlinesize, ctx->m.uvlinesize, 744 uvlinesize / 2, 16, 745 0, 0, uv_w, uv_h); 746 747 dct_y_offset = bw * linesize / 2; 748 dct_uv_offset = bw * uvlinesize / 2; 749 ptr_y = &ctx->edge_buf_y[0]; 750 ptr_u = &ctx->edge_buf_uv[0][0]; 751 ptr_v = &ctx->edge_buf_uv[1][0]; 752 } 753 754 if (!ctx->is_444) { 755 pdsp->get_pixels(ctx->blocks[0], ptr_y, linesize); 756 pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize); 757 pdsp->get_pixels(ctx->blocks[2], ptr_u, uvlinesize); 758 pdsp->get_pixels(ctx->blocks[3], ptr_v, uvlinesize); 759 760 if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { 761 if (ctx->interlaced) { 762 ctx->get_pixels_8x4_sym(ctx->blocks[4], 763 ptr_y + dct_y_offset, 764 linesize); 765 ctx->get_pixels_8x4_sym(ctx->blocks[5], 766 ptr_y + dct_y_offset + bw, 767 linesize); 768 ctx->get_pixels_8x4_sym(ctx->blocks[6], 769 ptr_u + dct_uv_offset, 770 uvlinesize); 771 ctx->get_pixels_8x4_sym(ctx->blocks[7], 772 ptr_v + dct_uv_offset, 773 uvlinesize); 774 } else { 775 ctx->bdsp.clear_block(ctx->blocks[4]); 776 ctx->bdsp.clear_block(ctx->blocks[5]); 777 ctx->bdsp.clear_block(ctx->blocks[6]); 778 ctx->bdsp.clear_block(ctx->blocks[7]); 779 } 780 } else { 781 pdsp->get_pixels(ctx->blocks[4], 782 ptr_y + dct_y_offset, linesize); 783 pdsp->get_pixels(ctx->blocks[5], 784 ptr_y + dct_y_offset + bw, linesize); 785 pdsp->get_pixels(ctx->blocks[6], 786 ptr_u + dct_uv_offset, uvlinesize); 787 pdsp->get_pixels(ctx->blocks[7], 788 ptr_v + dct_uv_offset, uvlinesize); 789 } 790 } else { 791 pdsp->get_pixels(ctx->blocks[0], ptr_y, linesize); 792 pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize); 793 pdsp->get_pixels(ctx->blocks[6], ptr_y + dct_y_offset, linesize); 794 pdsp->get_pixels(ctx->blocks[7], ptr_y + dct_y_offset + bw, linesize); 795 796 pdsp->get_pixels(ctx->blocks[2], ptr_u, uvlinesize); 797 pdsp->get_pixels(ctx->blocks[3], ptr_u + bw, uvlinesize); 798 pdsp->get_pixels(ctx->blocks[8], ptr_u + dct_uv_offset, uvlinesize); 799 pdsp->get_pixels(ctx->blocks[9], ptr_u + dct_uv_offset + bw, uvlinesize); 800 801 pdsp->get_pixels(ctx->blocks[4], ptr_v, uvlinesize); 802 pdsp->get_pixels(ctx->blocks[5], ptr_v + bw, uvlinesize); 803 pdsp->get_pixels(ctx->blocks[10], ptr_v + dct_uv_offset, uvlinesize); 804 pdsp->get_pixels(ctx->blocks[11], ptr_v + dct_uv_offset + bw, uvlinesize); 805 } 806} 807 808static av_always_inline 809int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i) 810{ 811 int x; 812 813 if (ctx->is_444) { 814 x = (i >> 1) % 3; 815 } else { 816 const static uint8_t component[8]={0,0,1,2,0,0,1,2}; 817 x = component[i]; 818 } 819 return x; 820} 821 822static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, 823 int jobnr, int threadnr) 824{ 825 DNXHDEncContext *ctx = avctx->priv_data; 826 int mb_y = jobnr, mb_x; 827 int qscale = ctx->qscale; 828 LOCAL_ALIGNED_16(int16_t, block, [64]); 829 ctx = ctx->thread[threadnr]; 830 831 ctx->m.last_dc[0] = 832 ctx->m.last_dc[1] = 833 ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2); 834 835 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { 836 unsigned mb = mb_y * ctx->m.mb_width + mb_x; 837 int ssd = 0; 838 int ac_bits = 0; 839 int dc_bits = 0; 840 int i; 841 842 dnxhd_get_blocks(ctx, mb_x, mb_y); 843 844 for (i = 0; i < 8 + 4 * ctx->is_444; i++) { 845 int16_t *src_block = ctx->blocks[i]; 846 int overflow, nbits, diff, last_index; 847 int n = dnxhd_switch_matrix(ctx, i); 848 849 memcpy(block, src_block, 64 * sizeof(*block)); 850 last_index = ctx->m.dct_quantize(&ctx->m, block, 851 ctx->is_444 ? 4 * (n > 0): 4 & (2*i), 852 qscale, &overflow); 853 ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index); 854 855 diff = block[0] - ctx->m.last_dc[n]; 856 if (diff < 0) 857 nbits = av_log2_16bit(-2 * diff); 858 else 859 nbits = av_log2_16bit(2 * diff); 860 861 av_assert1(nbits < ctx->bit_depth + 4); 862 dc_bits += ctx->cid_table->dc_bits[nbits] + nbits; 863 864 ctx->m.last_dc[n] = block[0]; 865 866 if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) { 867 dnxhd_unquantize_c(ctx, block, i, qscale, last_index); 868 ctx->m.idsp.idct(block); 869 ssd += dnxhd_ssd_block(block, src_block); 870 } 871 } 872 ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].ssd = ssd; 873 ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].bits = ac_bits + dc_bits + 12 + 874 (1 + ctx->is_444) * 8 * ctx->vlc_bits[0]; 875 } 876 return 0; 877} 878 879static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, 880 int jobnr, int threadnr) 881{ 882 DNXHDEncContext *ctx = avctx->priv_data; 883 int mb_y = jobnr, mb_x; 884 ctx = ctx->thread[threadnr]; 885 init_put_bits(&ctx->m.pb, (uint8_t *)arg + ctx->data_offset + ctx->slice_offs[jobnr], 886 ctx->slice_size[jobnr]); 887 888 ctx->m.last_dc[0] = 889 ctx->m.last_dc[1] = 890 ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2); 891 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { 892 unsigned mb = mb_y * ctx->m.mb_width + mb_x; 893 int qscale = ctx->mb_qscale[mb]; 894 int i; 895 896 put_bits(&ctx->m.pb, 11, qscale); 897 put_bits(&ctx->m.pb, 1, avctx->pix_fmt == AV_PIX_FMT_YUV444P10); 898 899 dnxhd_get_blocks(ctx, mb_x, mb_y); 900 901 for (i = 0; i < 8 + 4 * ctx->is_444; i++) { 902 int16_t *block = ctx->blocks[i]; 903 int overflow, n = dnxhd_switch_matrix(ctx, i); 904 int last_index = ctx->m.dct_quantize(&ctx->m, block, 905 ctx->is_444 ? (((i >> 1) % 3) < 1 ? 0 : 4): 4 & (2*i), 906 qscale, &overflow); 907 908 dnxhd_encode_block(ctx, block, last_index, n); 909 } 910 } 911 if (put_bits_count(&ctx->m.pb) & 31) 912 put_bits(&ctx->m.pb, 32 - (put_bits_count(&ctx->m.pb) & 31), 0); 913 flush_put_bits(&ctx->m.pb); 914 memset(put_bits_ptr(&ctx->m.pb), 0, put_bytes_left(&ctx->m.pb, 0)); 915 return 0; 916} 917 918static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx) 919{ 920 int mb_y, mb_x; 921 int offset = 0; 922 for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) { 923 int thread_size; 924 ctx->slice_offs[mb_y] = offset; 925 ctx->slice_size[mb_y] = 0; 926 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { 927 unsigned mb = mb_y * ctx->m.mb_width + mb_x; 928 ctx->slice_size[mb_y] += ctx->mb_bits[mb]; 929 } 930 ctx->slice_size[mb_y] = (ctx->slice_size[mb_y] + 31U) & ~31U; 931 ctx->slice_size[mb_y] >>= 3; 932 thread_size = ctx->slice_size[mb_y]; 933 offset += thread_size; 934 } 935} 936 937static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, 938 int jobnr, int threadnr) 939{ 940 DNXHDEncContext *ctx = avctx->priv_data; 941 int mb_y = jobnr, mb_x, x, y; 942 int partial_last_row = (mb_y == ctx->m.mb_height - 1) && 943 ((avctx->height >> ctx->interlaced) & 0xF); 944 945 ctx = ctx->thread[threadnr]; 946 if (ctx->bit_depth == 8) { 947 uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize); 948 for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) { 949 unsigned mb = mb_y * ctx->m.mb_width + mb_x; 950 int sum; 951 int varc; 952 953 if (!partial_last_row && mb_x * 16 <= avctx->width - 16 && (avctx->width % 16) == 0) { 954 sum = ctx->m.mpvencdsp.pix_sum(pix, ctx->m.linesize); 955 varc = ctx->m.mpvencdsp.pix_norm1(pix, ctx->m.linesize); 956 } else { 957 int bw = FFMIN(avctx->width - 16 * mb_x, 16); 958 int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16); 959 sum = varc = 0; 960 for (y = 0; y < bh; y++) { 961 for (x = 0; x < bw; x++) { 962 uint8_t val = pix[x + y * ctx->m.linesize]; 963 sum += val; 964 varc += val * val; 965 } 966 } 967 } 968 varc = (varc - (((unsigned) sum * sum) >> 8) + 128) >> 8; 969 970 ctx->mb_cmp[mb].value = varc; 971 ctx->mb_cmp[mb].mb = mb; 972 } 973 } else { // 10-bit 974 const int linesize = ctx->m.linesize >> 1; 975 for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) { 976 uint16_t *pix = (uint16_t *)ctx->thread[0]->src[0] + 977 ((mb_y << 4) * linesize) + (mb_x << 4); 978 unsigned mb = mb_y * ctx->m.mb_width + mb_x; 979 int sum = 0; 980 int sqsum = 0; 981 int bw = FFMIN(avctx->width - 16 * mb_x, 16); 982 int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16); 983 int mean, sqmean; 984 int i, j; 985 // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8. 986 for (i = 0; i < bh; ++i) { 987 for (j = 0; j < bw; ++j) { 988 // Turn 16-bit pixels into 10-bit ones. 989 const int sample = (unsigned) pix[j] >> 6; 990 sum += sample; 991 sqsum += sample * sample; 992 // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX 993 } 994 pix += linesize; 995 } 996 mean = sum >> 8; // 16*16 == 2^8 997 sqmean = sqsum >> 8; 998 ctx->mb_cmp[mb].value = sqmean - mean * mean; 999 ctx->mb_cmp[mb].mb = mb; 1000 } 1001 } 1002 return 0; 1003} 1004 1005static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx) 1006{ 1007 int lambda, up_step, down_step; 1008 int last_lower = INT_MAX, last_higher = 0; 1009 int x, y, q; 1010 1011 for (q = 1; q < avctx->qmax; q++) { 1012 ctx->qscale = q; 1013 avctx->execute2(avctx, dnxhd_calc_bits_thread, 1014 NULL, NULL, ctx->m.mb_height); 1015 } 1016 up_step = down_step = 2 << LAMBDA_FRAC_BITS; 1017 lambda = ctx->lambda; 1018 1019 for (;;) { 1020 int bits = 0; 1021 int end = 0; 1022 if (lambda == last_higher) { 1023 lambda++; 1024 end = 1; // need to set final qscales/bits 1025 } 1026 for (y = 0; y < ctx->m.mb_height; y++) { 1027 for (x = 0; x < ctx->m.mb_width; x++) { 1028 unsigned min = UINT_MAX; 1029 int qscale = 1; 1030 int mb = y * ctx->m.mb_width + x; 1031 int rc = 0; 1032 for (q = 1; q < avctx->qmax; q++) { 1033 int i = (q*ctx->m.mb_num) + mb; 1034 unsigned score = ctx->mb_rc[i].bits * lambda + 1035 ((unsigned) ctx->mb_rc[i].ssd << LAMBDA_FRAC_BITS); 1036 if (score < min) { 1037 min = score; 1038 qscale = q; 1039 rc = i; 1040 } 1041 } 1042 bits += ctx->mb_rc[rc].bits; 1043 ctx->mb_qscale[mb] = qscale; 1044 ctx->mb_bits[mb] = ctx->mb_rc[rc].bits; 1045 } 1046 bits = (bits + 31) & ~31; // padding 1047 if (bits > ctx->frame_bits) 1048 break; 1049 } 1050 if (end) { 1051 if (bits > ctx->frame_bits) 1052 return AVERROR(EINVAL); 1053 break; 1054 } 1055 if (bits < ctx->frame_bits) { 1056 last_lower = FFMIN(lambda, last_lower); 1057 if (last_higher != 0) 1058 lambda = (lambda+last_higher)>>1; 1059 else 1060 lambda -= down_step; 1061 down_step = FFMIN((int64_t)down_step*5, INT_MAX); 1062 up_step = 1<<LAMBDA_FRAC_BITS; 1063 lambda = FFMAX(1, lambda); 1064 if (lambda == last_lower) 1065 break; 1066 } else { 1067 last_higher = FFMAX(lambda, last_higher); 1068 if (last_lower != INT_MAX) 1069 lambda = (lambda+last_lower)>>1; 1070 else if ((int64_t)lambda + up_step > INT_MAX) 1071 return AVERROR(EINVAL); 1072 else 1073 lambda += up_step; 1074 up_step = FFMIN((int64_t)up_step*5, INT_MAX); 1075 down_step = 1<<LAMBDA_FRAC_BITS; 1076 } 1077 } 1078 ctx->lambda = lambda; 1079 return 0; 1080} 1081 1082static int dnxhd_find_qscale(DNXHDEncContext *ctx) 1083{ 1084 int bits = 0; 1085 int up_step = 1; 1086 int down_step = 1; 1087 int last_higher = 0; 1088 int last_lower = INT_MAX; 1089 int qscale; 1090 int x, y; 1091 1092 qscale = ctx->qscale; 1093 for (;;) { 1094 bits = 0; 1095 ctx->qscale = qscale; 1096 // XXX avoid recalculating bits 1097 ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread, 1098 NULL, NULL, ctx->m.mb_height); 1099 for (y = 0; y < ctx->m.mb_height; y++) { 1100 for (x = 0; x < ctx->m.mb_width; x++) 1101 bits += ctx->mb_rc[(qscale*ctx->m.mb_num) + (y*ctx->m.mb_width+x)].bits; 1102 bits = (bits+31)&~31; // padding 1103 if (bits > ctx->frame_bits) 1104 break; 1105 } 1106 if (bits < ctx->frame_bits) { 1107 if (qscale == 1) 1108 return 1; 1109 if (last_higher == qscale - 1) { 1110 qscale = last_higher; 1111 break; 1112 } 1113 last_lower = FFMIN(qscale, last_lower); 1114 if (last_higher != 0) 1115 qscale = (qscale + last_higher) >> 1; 1116 else 1117 qscale -= down_step++; 1118 if (qscale < 1) 1119 qscale = 1; 1120 up_step = 1; 1121 } else { 1122 if (last_lower == qscale + 1) 1123 break; 1124 last_higher = FFMAX(qscale, last_higher); 1125 if (last_lower != INT_MAX) 1126 qscale = (qscale + last_lower) >> 1; 1127 else 1128 qscale += up_step++; 1129 down_step = 1; 1130 if (qscale >= ctx->m.avctx->qmax) 1131 return AVERROR(EINVAL); 1132 } 1133 } 1134 ctx->qscale = qscale; 1135 return 0; 1136} 1137 1138#define BUCKET_BITS 8 1139#define RADIX_PASSES 4 1140#define NBUCKETS (1 << BUCKET_BITS) 1141 1142static inline int get_bucket(int value, int shift) 1143{ 1144 value >>= shift; 1145 value &= NBUCKETS - 1; 1146 return NBUCKETS - 1 - value; 1147} 1148 1149static void radix_count(const RCCMPEntry *data, int size, 1150 int buckets[RADIX_PASSES][NBUCKETS]) 1151{ 1152 int i, j; 1153 memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS); 1154 for (i = 0; i < size; i++) { 1155 int v = data[i].value; 1156 for (j = 0; j < RADIX_PASSES; j++) { 1157 buckets[j][get_bucket(v, 0)]++; 1158 v >>= BUCKET_BITS; 1159 } 1160 av_assert1(!v); 1161 } 1162 for (j = 0; j < RADIX_PASSES; j++) { 1163 int offset = size; 1164 for (i = NBUCKETS - 1; i >= 0; i--) 1165 buckets[j][i] = offset -= buckets[j][i]; 1166 av_assert1(!buckets[j][0]); 1167 } 1168} 1169 1170static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data, 1171 int size, int buckets[NBUCKETS], int pass) 1172{ 1173 int shift = pass * BUCKET_BITS; 1174 int i; 1175 for (i = 0; i < size; i++) { 1176 int v = get_bucket(data[i].value, shift); 1177 int pos = buckets[v]++; 1178 dst[pos] = data[i]; 1179 } 1180} 1181 1182static void radix_sort(RCCMPEntry *data, RCCMPEntry *tmp, int size) 1183{ 1184 int buckets[RADIX_PASSES][NBUCKETS]; 1185 radix_count(data, size, buckets); 1186 radix_sort_pass(tmp, data, size, buckets[0], 0); 1187 radix_sort_pass(data, tmp, size, buckets[1], 1); 1188 if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) { 1189 radix_sort_pass(tmp, data, size, buckets[2], 2); 1190 radix_sort_pass(data, tmp, size, buckets[3], 3); 1191 } 1192} 1193 1194static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx) 1195{ 1196 int max_bits = 0; 1197 int ret, x, y; 1198 if ((ret = dnxhd_find_qscale(ctx)) < 0) 1199 return ret; 1200 for (y = 0; y < ctx->m.mb_height; y++) { 1201 for (x = 0; x < ctx->m.mb_width; x++) { 1202 int mb = y * ctx->m.mb_width + x; 1203 int rc = (ctx->qscale * ctx->m.mb_num ) + mb; 1204 int delta_bits; 1205 ctx->mb_qscale[mb] = ctx->qscale; 1206 ctx->mb_bits[mb] = ctx->mb_rc[rc].bits; 1207 max_bits += ctx->mb_rc[rc].bits; 1208 if (!RC_VARIANCE) { 1209 delta_bits = ctx->mb_rc[rc].bits - 1210 ctx->mb_rc[rc + ctx->m.mb_num].bits; 1211 ctx->mb_cmp[mb].mb = mb; 1212 ctx->mb_cmp[mb].value = 1213 delta_bits ? ((ctx->mb_rc[rc].ssd - 1214 ctx->mb_rc[rc + ctx->m.mb_num].ssd) * 100) / 1215 delta_bits 1216 : INT_MIN; // avoid increasing qscale 1217 } 1218 } 1219 max_bits += 31; // worst padding 1220 } 1221 if (!ret) { 1222 if (RC_VARIANCE) 1223 avctx->execute2(avctx, dnxhd_mb_var_thread, 1224 NULL, NULL, ctx->m.mb_height); 1225 radix_sort(ctx->mb_cmp, ctx->mb_cmp_tmp, ctx->m.mb_num); 1226retry: 1227 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) { 1228 int mb = ctx->mb_cmp[x].mb; 1229 int rc = (ctx->qscale * ctx->m.mb_num ) + mb; 1230 max_bits -= ctx->mb_rc[rc].bits - 1231 ctx->mb_rc[rc + ctx->m.mb_num].bits; 1232 if (ctx->mb_qscale[mb] < 255) 1233 ctx->mb_qscale[mb]++; 1234 ctx->mb_bits[mb] = ctx->mb_rc[rc + ctx->m.mb_num].bits; 1235 } 1236 1237 if (max_bits > ctx->frame_bits) 1238 goto retry; 1239 } 1240 return 0; 1241} 1242 1243static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame) 1244{ 1245 int i; 1246 1247 for (i = 0; i < ctx->m.avctx->thread_count; i++) { 1248 ctx->thread[i]->m.linesize = frame->linesize[0] << ctx->interlaced; 1249 ctx->thread[i]->m.uvlinesize = frame->linesize[1] << ctx->interlaced; 1250 ctx->thread[i]->dct_y_offset = ctx->m.linesize *8; 1251 ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8; 1252 } 1253 1254 ctx->cur_field = frame->interlaced_frame && !frame->top_field_first; 1255} 1256 1257static int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt, 1258 const AVFrame *frame, int *got_packet) 1259{ 1260 DNXHDEncContext *ctx = avctx->priv_data; 1261 int first_field = 1; 1262 int offset, i, ret; 1263 uint8_t *buf; 1264 1265 if ((ret = ff_get_encode_buffer(avctx, pkt, ctx->frame_size, 0)) < 0) 1266 return ret; 1267 buf = pkt->data; 1268 1269 dnxhd_load_picture(ctx, frame); 1270 1271encode_coding_unit: 1272 for (i = 0; i < 3; i++) { 1273 ctx->src[i] = frame->data[i]; 1274 if (ctx->interlaced && ctx->cur_field) 1275 ctx->src[i] += frame->linesize[i]; 1276 } 1277 1278 dnxhd_write_header(avctx, buf); 1279 1280 if (avctx->mb_decision == FF_MB_DECISION_RD) 1281 ret = dnxhd_encode_rdo(avctx, ctx); 1282 else 1283 ret = dnxhd_encode_fast(avctx, ctx); 1284 if (ret < 0) { 1285 av_log(avctx, AV_LOG_ERROR, 1286 "picture could not fit ratecontrol constraints, increase qmax\n"); 1287 return ret; 1288 } 1289 1290 dnxhd_setup_threads_slices(ctx); 1291 1292 offset = 0; 1293 for (i = 0; i < ctx->m.mb_height; i++) { 1294 AV_WB32(ctx->msip + i * 4, offset); 1295 offset += ctx->slice_size[i]; 1296 av_assert1(!(ctx->slice_size[i] & 3)); 1297 } 1298 1299 avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height); 1300 1301 av_assert1(ctx->data_offset + offset + 4 <= ctx->coding_unit_size); 1302 memset(buf + ctx->data_offset + offset, 0, 1303 ctx->coding_unit_size - 4 - offset - ctx->data_offset); 1304 1305 AV_WB32(buf + ctx->coding_unit_size - 4, 0x600DC0DE); // EOF 1306 1307 if (ctx->interlaced && first_field) { 1308 first_field = 0; 1309 ctx->cur_field ^= 1; 1310 buf += ctx->coding_unit_size; 1311 goto encode_coding_unit; 1312 } 1313 1314 ff_side_data_set_encoder_stats(pkt, ctx->qscale * FF_QP2LAMBDA, NULL, 0, AV_PICTURE_TYPE_I); 1315 1316 *got_packet = 1; 1317 return 0; 1318} 1319 1320static av_cold int dnxhd_encode_end(AVCodecContext *avctx) 1321{ 1322 DNXHDEncContext *ctx = avctx->priv_data; 1323 int i; 1324 1325 av_freep(&ctx->orig_vlc_codes); 1326 av_freep(&ctx->orig_vlc_bits); 1327 av_freep(&ctx->run_codes); 1328 av_freep(&ctx->run_bits); 1329 1330 av_freep(&ctx->mb_bits); 1331 av_freep(&ctx->mb_qscale); 1332 av_freep(&ctx->mb_rc); 1333 av_freep(&ctx->mb_cmp); 1334 av_freep(&ctx->mb_cmp_tmp); 1335 av_freep(&ctx->slice_size); 1336 av_freep(&ctx->slice_offs); 1337 1338 av_freep(&ctx->qmatrix_c); 1339 av_freep(&ctx->qmatrix_l); 1340 av_freep(&ctx->qmatrix_c16); 1341 av_freep(&ctx->qmatrix_l16); 1342 1343 if (ctx->thread[1]) { 1344 for (i = 1; i < avctx->thread_count; i++) 1345 av_freep(&ctx->thread[i]); 1346 } 1347 1348 return 0; 1349} 1350 1351static const FFCodecDefault dnxhd_defaults[] = { 1352 { "qmax", "1024" }, /* Maximum quantization scale factor allowed for VC-3 */ 1353 { NULL }, 1354}; 1355 1356const FFCodec ff_dnxhd_encoder = { 1357 .p.name = "dnxhd", 1358 .p.long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"), 1359 .p.type = AVMEDIA_TYPE_VIDEO, 1360 .p.id = AV_CODEC_ID_DNXHD, 1361 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | 1362 AV_CODEC_CAP_SLICE_THREADS, 1363 .priv_data_size = sizeof(DNXHDEncContext), 1364 .init = dnxhd_encode_init, 1365 FF_CODEC_ENCODE_CB(dnxhd_encode_picture), 1366 .close = dnxhd_encode_end, 1367 .p.pix_fmts = (const enum AVPixelFormat[]) { 1368 AV_PIX_FMT_YUV422P, 1369 AV_PIX_FMT_YUV422P10, 1370 AV_PIX_FMT_YUV444P10, 1371 AV_PIX_FMT_GBRP10, 1372 AV_PIX_FMT_NONE 1373 }, 1374 .p.priv_class = &dnxhd_class, 1375 .defaults = dnxhd_defaults, 1376 .p.profiles = NULL_IF_CONFIG_SMALL(ff_dnxhd_profiles), 1377 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 1378}; 1379