1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * VC3/DNxHD encoder 3cabdff1aSopenharmony_ci * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com> 4cabdff1aSopenharmony_ci * Copyright (c) 2011 MirriAd Ltd 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * VC-3 encoder funded by the British Broadcasting Corporation 7cabdff1aSopenharmony_ci * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com> 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * This file is part of FFmpeg. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 12cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 13cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 14cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 17cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 18cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19cabdff1aSopenharmony_ci * Lesser General Public License for more details. 20cabdff1aSopenharmony_ci * 21cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 22cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 23cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24cabdff1aSopenharmony_ci */ 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 27cabdff1aSopenharmony_ci#include "libavutil/internal.h" 28cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 29cabdff1aSopenharmony_ci#include "libavutil/opt.h" 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci#include "avcodec.h" 32cabdff1aSopenharmony_ci#include "blockdsp.h" 33cabdff1aSopenharmony_ci#include "codec_internal.h" 34cabdff1aSopenharmony_ci#include "encode.h" 35cabdff1aSopenharmony_ci#include "fdctdsp.h" 36cabdff1aSopenharmony_ci#include "mathops.h" 37cabdff1aSopenharmony_ci#include "mpegvideo.h" 38cabdff1aSopenharmony_ci#include "mpegvideoenc.h" 39cabdff1aSopenharmony_ci#include "pixblockdsp.h" 40cabdff1aSopenharmony_ci#include "packet_internal.h" 41cabdff1aSopenharmony_ci#include "profiles.h" 42cabdff1aSopenharmony_ci#include "dnxhdenc.h" 43cabdff1aSopenharmony_ci 44cabdff1aSopenharmony_ci// The largest value that will not lead to overflow for 10-bit samples. 45cabdff1aSopenharmony_ci#define DNX10BIT_QMAT_SHIFT 18 46cabdff1aSopenharmony_ci#define RC_VARIANCE 1 // use variance or ssd for fast rc 47cabdff1aSopenharmony_ci#define LAMBDA_FRAC_BITS 10 48cabdff1aSopenharmony_ci 49cabdff1aSopenharmony_ci#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM 50cabdff1aSopenharmony_cistatic const AVOption options[] = { 51cabdff1aSopenharmony_ci { "nitris_compat", "encode with Avid Nitris compatibility", 52cabdff1aSopenharmony_ci offsetof(DNXHDEncContext, nitris_compat), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, 53cabdff1aSopenharmony_ci { "ibias", "intra quant bias", 54cabdff1aSopenharmony_ci offsetof(DNXHDEncContext, intra_quant_bias), AV_OPT_TYPE_INT, 55cabdff1aSopenharmony_ci { .i64 = 0 }, INT_MIN, INT_MAX, VE }, 56cabdff1aSopenharmony_ci { "profile", NULL, offsetof(DNXHDEncContext, profile), AV_OPT_TYPE_INT, 57cabdff1aSopenharmony_ci { .i64 = FF_PROFILE_DNXHD }, 58cabdff1aSopenharmony_ci FF_PROFILE_DNXHD, FF_PROFILE_DNXHR_444, VE, "profile" }, 59cabdff1aSopenharmony_ci { "dnxhd", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHD }, 60cabdff1aSopenharmony_ci 0, 0, VE, "profile" }, 61cabdff1aSopenharmony_ci { "dnxhr_444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_444 }, 62cabdff1aSopenharmony_ci 0, 0, VE, "profile" }, 63cabdff1aSopenharmony_ci { "dnxhr_hqx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_HQX }, 64cabdff1aSopenharmony_ci 0, 0, VE, "profile" }, 65cabdff1aSopenharmony_ci { "dnxhr_hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_HQ }, 66cabdff1aSopenharmony_ci 0, 0, VE, "profile" }, 67cabdff1aSopenharmony_ci { "dnxhr_sq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_SQ }, 68cabdff1aSopenharmony_ci 0, 0, VE, "profile" }, 69cabdff1aSopenharmony_ci { "dnxhr_lb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_LB }, 70cabdff1aSopenharmony_ci 0, 0, VE, "profile" }, 71cabdff1aSopenharmony_ci { NULL } 72cabdff1aSopenharmony_ci}; 73cabdff1aSopenharmony_ci 74cabdff1aSopenharmony_cistatic const AVClass dnxhd_class = { 75cabdff1aSopenharmony_ci .class_name = "dnxhd", 76cabdff1aSopenharmony_ci .item_name = av_default_item_name, 77cabdff1aSopenharmony_ci .option = options, 78cabdff1aSopenharmony_ci .version = LIBAVUTIL_VERSION_INT, 79cabdff1aSopenharmony_ci}; 80cabdff1aSopenharmony_ci 81cabdff1aSopenharmony_cistatic void dnxhd_8bit_get_pixels_8x4_sym(int16_t *av_restrict block, 82cabdff1aSopenharmony_ci const uint8_t *pixels, 83cabdff1aSopenharmony_ci ptrdiff_t line_size) 84cabdff1aSopenharmony_ci{ 85cabdff1aSopenharmony_ci int i; 86cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 87cabdff1aSopenharmony_ci block[0] = pixels[0]; 88cabdff1aSopenharmony_ci block[1] = pixels[1]; 89cabdff1aSopenharmony_ci block[2] = pixels[2]; 90cabdff1aSopenharmony_ci block[3] = pixels[3]; 91cabdff1aSopenharmony_ci block[4] = pixels[4]; 92cabdff1aSopenharmony_ci block[5] = pixels[5]; 93cabdff1aSopenharmony_ci block[6] = pixels[6]; 94cabdff1aSopenharmony_ci block[7] = pixels[7]; 95cabdff1aSopenharmony_ci pixels += line_size; 96cabdff1aSopenharmony_ci block += 8; 97cabdff1aSopenharmony_ci } 98cabdff1aSopenharmony_ci memcpy(block, block - 8, sizeof(*block) * 8); 99cabdff1aSopenharmony_ci memcpy(block + 8, block - 16, sizeof(*block) * 8); 100cabdff1aSopenharmony_ci memcpy(block + 16, block - 24, sizeof(*block) * 8); 101cabdff1aSopenharmony_ci memcpy(block + 24, block - 32, sizeof(*block) * 8); 102cabdff1aSopenharmony_ci} 103cabdff1aSopenharmony_ci 104cabdff1aSopenharmony_cistatic av_always_inline 105cabdff1aSopenharmony_civoid dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block, 106cabdff1aSopenharmony_ci const uint8_t *pixels, 107cabdff1aSopenharmony_ci ptrdiff_t line_size) 108cabdff1aSopenharmony_ci{ 109cabdff1aSopenharmony_ci memcpy(block + 0 * 8, pixels + 0 * line_size, 8 * sizeof(*block)); 110cabdff1aSopenharmony_ci memcpy(block + 7 * 8, pixels + 0 * line_size, 8 * sizeof(*block)); 111cabdff1aSopenharmony_ci memcpy(block + 1 * 8, pixels + 1 * line_size, 8 * sizeof(*block)); 112cabdff1aSopenharmony_ci memcpy(block + 6 * 8, pixels + 1 * line_size, 8 * sizeof(*block)); 113cabdff1aSopenharmony_ci memcpy(block + 2 * 8, pixels + 2 * line_size, 8 * sizeof(*block)); 114cabdff1aSopenharmony_ci memcpy(block + 5 * 8, pixels + 2 * line_size, 8 * sizeof(*block)); 115cabdff1aSopenharmony_ci memcpy(block + 3 * 8, pixels + 3 * line_size, 8 * sizeof(*block)); 116cabdff1aSopenharmony_ci memcpy(block + 4 * 8, pixels + 3 * line_size, 8 * sizeof(*block)); 117cabdff1aSopenharmony_ci} 118cabdff1aSopenharmony_ci 119cabdff1aSopenharmony_cistatic int dnxhd_10bit_dct_quantize_444(MpegEncContext *ctx, int16_t *block, 120cabdff1aSopenharmony_ci int n, int qscale, int *overflow) 121cabdff1aSopenharmony_ci{ 122cabdff1aSopenharmony_ci int i, j, level, last_non_zero, start_i; 123cabdff1aSopenharmony_ci const int *qmat; 124cabdff1aSopenharmony_ci const uint8_t *scantable= ctx->intra_scantable.scantable; 125cabdff1aSopenharmony_ci int bias; 126cabdff1aSopenharmony_ci int max = 0; 127cabdff1aSopenharmony_ci unsigned int threshold1, threshold2; 128cabdff1aSopenharmony_ci 129cabdff1aSopenharmony_ci ctx->fdsp.fdct(block); 130cabdff1aSopenharmony_ci 131cabdff1aSopenharmony_ci block[0] = (block[0] + 2) >> 2; 132cabdff1aSopenharmony_ci start_i = 1; 133cabdff1aSopenharmony_ci last_non_zero = 0; 134cabdff1aSopenharmony_ci qmat = n < 4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale]; 135cabdff1aSopenharmony_ci bias= ctx->intra_quant_bias * (1 << (16 - 8)); 136cabdff1aSopenharmony_ci threshold1 = (1 << 16) - bias - 1; 137cabdff1aSopenharmony_ci threshold2 = (threshold1 << 1); 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_ci for (i = 63; i >= start_i; i--) { 140cabdff1aSopenharmony_ci j = scantable[i]; 141cabdff1aSopenharmony_ci level = block[j] * qmat[j]; 142cabdff1aSopenharmony_ci 143cabdff1aSopenharmony_ci if (((unsigned)(level + threshold1)) > threshold2) { 144cabdff1aSopenharmony_ci last_non_zero = i; 145cabdff1aSopenharmony_ci break; 146cabdff1aSopenharmony_ci } else{ 147cabdff1aSopenharmony_ci block[j]=0; 148cabdff1aSopenharmony_ci } 149cabdff1aSopenharmony_ci } 150cabdff1aSopenharmony_ci 151cabdff1aSopenharmony_ci for (i = start_i; i <= last_non_zero; i++) { 152cabdff1aSopenharmony_ci j = scantable[i]; 153cabdff1aSopenharmony_ci level = block[j] * qmat[j]; 154cabdff1aSopenharmony_ci 155cabdff1aSopenharmony_ci if (((unsigned)(level + threshold1)) > threshold2) { 156cabdff1aSopenharmony_ci if (level > 0) { 157cabdff1aSopenharmony_ci level = (bias + level) >> 16; 158cabdff1aSopenharmony_ci block[j] = level; 159cabdff1aSopenharmony_ci } else{ 160cabdff1aSopenharmony_ci level = (bias - level) >> 16; 161cabdff1aSopenharmony_ci block[j] = -level; 162cabdff1aSopenharmony_ci } 163cabdff1aSopenharmony_ci max |= level; 164cabdff1aSopenharmony_ci } else { 165cabdff1aSopenharmony_ci block[j] = 0; 166cabdff1aSopenharmony_ci } 167cabdff1aSopenharmony_ci } 168cabdff1aSopenharmony_ci *overflow = ctx->max_qcoeff < max; //overflow might have happened 169cabdff1aSopenharmony_ci 170cabdff1aSopenharmony_ci /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ 171cabdff1aSopenharmony_ci if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE) 172cabdff1aSopenharmony_ci ff_block_permute(block, ctx->idsp.idct_permutation, 173cabdff1aSopenharmony_ci scantable, last_non_zero); 174cabdff1aSopenharmony_ci 175cabdff1aSopenharmony_ci return last_non_zero; 176cabdff1aSopenharmony_ci} 177cabdff1aSopenharmony_ci 178cabdff1aSopenharmony_cistatic int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block, 179cabdff1aSopenharmony_ci int n, int qscale, int *overflow) 180cabdff1aSopenharmony_ci{ 181cabdff1aSopenharmony_ci const uint8_t *scantable= ctx->intra_scantable.scantable; 182cabdff1aSopenharmony_ci const int *qmat = n<4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale]; 183cabdff1aSopenharmony_ci int last_non_zero = 0; 184cabdff1aSopenharmony_ci int i; 185cabdff1aSopenharmony_ci 186cabdff1aSopenharmony_ci ctx->fdsp.fdct(block); 187cabdff1aSopenharmony_ci 188cabdff1aSopenharmony_ci // Divide by 4 with rounding, to compensate scaling of DCT coefficients 189cabdff1aSopenharmony_ci block[0] = (block[0] + 2) >> 2; 190cabdff1aSopenharmony_ci 191cabdff1aSopenharmony_ci for (i = 1; i < 64; ++i) { 192cabdff1aSopenharmony_ci int j = scantable[i]; 193cabdff1aSopenharmony_ci int sign = FF_SIGNBIT(block[j]); 194cabdff1aSopenharmony_ci int level = (block[j] ^ sign) - sign; 195cabdff1aSopenharmony_ci level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT; 196cabdff1aSopenharmony_ci block[j] = (level ^ sign) - sign; 197cabdff1aSopenharmony_ci if (level) 198cabdff1aSopenharmony_ci last_non_zero = i; 199cabdff1aSopenharmony_ci } 200cabdff1aSopenharmony_ci 201cabdff1aSopenharmony_ci /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ 202cabdff1aSopenharmony_ci if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE) 203cabdff1aSopenharmony_ci ff_block_permute(block, ctx->idsp.idct_permutation, 204cabdff1aSopenharmony_ci scantable, last_non_zero); 205cabdff1aSopenharmony_ci 206cabdff1aSopenharmony_ci return last_non_zero; 207cabdff1aSopenharmony_ci} 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_cistatic av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx) 210cabdff1aSopenharmony_ci{ 211cabdff1aSopenharmony_ci int i, j, level, run; 212cabdff1aSopenharmony_ci int max_level = 1 << (ctx->bit_depth + 2); 213cabdff1aSopenharmony_ci 214cabdff1aSopenharmony_ci if (!FF_ALLOCZ_TYPED_ARRAY(ctx->orig_vlc_codes, max_level * 4) || 215cabdff1aSopenharmony_ci !FF_ALLOCZ_TYPED_ARRAY(ctx->orig_vlc_bits, max_level * 4) || 216cabdff1aSopenharmony_ci !(ctx->run_codes = av_mallocz(63 * 2)) || 217cabdff1aSopenharmony_ci !(ctx->run_bits = av_mallocz(63))) 218cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 219cabdff1aSopenharmony_ci ctx->vlc_codes = ctx->orig_vlc_codes + max_level * 2; 220cabdff1aSopenharmony_ci ctx->vlc_bits = ctx->orig_vlc_bits + max_level * 2; 221cabdff1aSopenharmony_ci for (level = -max_level; level < max_level; level++) { 222cabdff1aSopenharmony_ci for (run = 0; run < 2; run++) { 223cabdff1aSopenharmony_ci int index = level * (1 << 1) | run; 224cabdff1aSopenharmony_ci int sign, offset = 0, alevel = level; 225cabdff1aSopenharmony_ci 226cabdff1aSopenharmony_ci MASK_ABS(sign, alevel); 227cabdff1aSopenharmony_ci if (alevel > 64) { 228cabdff1aSopenharmony_ci offset = (alevel - 1) >> 6; 229cabdff1aSopenharmony_ci alevel -= offset << 6; 230cabdff1aSopenharmony_ci } 231cabdff1aSopenharmony_ci for (j = 0; j < 257; j++) { 232cabdff1aSopenharmony_ci if (ctx->cid_table->ac_info[2*j+0] >> 1 == alevel && 233cabdff1aSopenharmony_ci (!offset || (ctx->cid_table->ac_info[2*j+1] & 1) && offset) && 234cabdff1aSopenharmony_ci (!run || (ctx->cid_table->ac_info[2*j+1] & 2) && run)) { 235cabdff1aSopenharmony_ci av_assert1(!ctx->vlc_codes[index]); 236cabdff1aSopenharmony_ci if (alevel) { 237cabdff1aSopenharmony_ci ctx->vlc_codes[index] = 238cabdff1aSopenharmony_ci (ctx->cid_table->ac_codes[j] << 1) | (sign & 1); 239cabdff1aSopenharmony_ci ctx->vlc_bits[index] = ctx->cid_table->ac_bits[j] + 1; 240cabdff1aSopenharmony_ci } else { 241cabdff1aSopenharmony_ci ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j]; 242cabdff1aSopenharmony_ci ctx->vlc_bits[index] = ctx->cid_table->ac_bits[j]; 243cabdff1aSopenharmony_ci } 244cabdff1aSopenharmony_ci break; 245cabdff1aSopenharmony_ci } 246cabdff1aSopenharmony_ci } 247cabdff1aSopenharmony_ci av_assert0(!alevel || j < 257); 248cabdff1aSopenharmony_ci if (offset) { 249cabdff1aSopenharmony_ci ctx->vlc_codes[index] = 250cabdff1aSopenharmony_ci (ctx->vlc_codes[index] << ctx->cid_table->index_bits) | offset; 251cabdff1aSopenharmony_ci ctx->vlc_bits[index] += ctx->cid_table->index_bits; 252cabdff1aSopenharmony_ci } 253cabdff1aSopenharmony_ci } 254cabdff1aSopenharmony_ci } 255cabdff1aSopenharmony_ci for (i = 0; i < 62; i++) { 256cabdff1aSopenharmony_ci int run = ctx->cid_table->run[i]; 257cabdff1aSopenharmony_ci av_assert0(run < 63); 258cabdff1aSopenharmony_ci ctx->run_codes[run] = ctx->cid_table->run_codes[i]; 259cabdff1aSopenharmony_ci ctx->run_bits[run] = ctx->cid_table->run_bits[i]; 260cabdff1aSopenharmony_ci } 261cabdff1aSopenharmony_ci return 0; 262cabdff1aSopenharmony_ci} 263cabdff1aSopenharmony_ci 264cabdff1aSopenharmony_cistatic av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias) 265cabdff1aSopenharmony_ci{ 266cabdff1aSopenharmony_ci // init first elem to 1 to avoid div by 0 in convert_matrix 267cabdff1aSopenharmony_ci uint16_t weight_matrix[64] = { 1, }; // convert_matrix needs uint16_t* 268cabdff1aSopenharmony_ci int qscale, i; 269cabdff1aSopenharmony_ci const uint8_t *luma_weight_table = ctx->cid_table->luma_weight; 270cabdff1aSopenharmony_ci const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight; 271cabdff1aSopenharmony_ci 272cabdff1aSopenharmony_ci if (!FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_l, ctx->m.avctx->qmax + 1) || 273cabdff1aSopenharmony_ci !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_c, ctx->m.avctx->qmax + 1) || 274cabdff1aSopenharmony_ci !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_l16, ctx->m.avctx->qmax + 1) || 275cabdff1aSopenharmony_ci !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_c16, ctx->m.avctx->qmax + 1)) 276cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 277cabdff1aSopenharmony_ci 278cabdff1aSopenharmony_ci if (ctx->bit_depth == 8) { 279cabdff1aSopenharmony_ci for (i = 1; i < 64; i++) { 280cabdff1aSopenharmony_ci int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]]; 281cabdff1aSopenharmony_ci weight_matrix[j] = ctx->cid_table->luma_weight[i]; 282cabdff1aSopenharmony_ci } 283cabdff1aSopenharmony_ci ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16, 284cabdff1aSopenharmony_ci weight_matrix, ctx->intra_quant_bias, 1, 285cabdff1aSopenharmony_ci ctx->m.avctx->qmax, 1); 286cabdff1aSopenharmony_ci for (i = 1; i < 64; i++) { 287cabdff1aSopenharmony_ci int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]]; 288cabdff1aSopenharmony_ci weight_matrix[j] = ctx->cid_table->chroma_weight[i]; 289cabdff1aSopenharmony_ci } 290cabdff1aSopenharmony_ci ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16, 291cabdff1aSopenharmony_ci weight_matrix, ctx->intra_quant_bias, 1, 292cabdff1aSopenharmony_ci ctx->m.avctx->qmax, 1); 293cabdff1aSopenharmony_ci 294cabdff1aSopenharmony_ci for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { 295cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) { 296cabdff1aSopenharmony_ci ctx->qmatrix_l[qscale][i] <<= 2; 297cabdff1aSopenharmony_ci ctx->qmatrix_c[qscale][i] <<= 2; 298cabdff1aSopenharmony_ci ctx->qmatrix_l16[qscale][0][i] <<= 2; 299cabdff1aSopenharmony_ci ctx->qmatrix_l16[qscale][1][i] <<= 2; 300cabdff1aSopenharmony_ci ctx->qmatrix_c16[qscale][0][i] <<= 2; 301cabdff1aSopenharmony_ci ctx->qmatrix_c16[qscale][1][i] <<= 2; 302cabdff1aSopenharmony_ci } 303cabdff1aSopenharmony_ci } 304cabdff1aSopenharmony_ci } else { 305cabdff1aSopenharmony_ci // 10-bit 306cabdff1aSopenharmony_ci for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { 307cabdff1aSopenharmony_ci for (i = 1; i < 64; i++) { 308cabdff1aSopenharmony_ci int j = ff_zigzag_direct[i]; 309cabdff1aSopenharmony_ci 310cabdff1aSopenharmony_ci /* The quantization formula from the VC-3 standard is: 311cabdff1aSopenharmony_ci * quantized = sign(block[i]) * floor(abs(block[i]/s) * p / 312cabdff1aSopenharmony_ci * (qscale * weight_table[i])) 313cabdff1aSopenharmony_ci * Where p is 32 for 8-bit samples and 8 for 10-bit ones. 314cabdff1aSopenharmony_ci * The s factor compensates scaling of DCT coefficients done by 315cabdff1aSopenharmony_ci * the DCT routines, and therefore is not present in standard. 316cabdff1aSopenharmony_ci * It's 8 for 8-bit samples and 4 for 10-bit ones. 317cabdff1aSopenharmony_ci * We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be: 318cabdff1aSopenharmony_ci * ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / 319cabdff1aSopenharmony_ci * (qscale * weight_table[i]) 320cabdff1aSopenharmony_ci * For 10-bit samples, p / s == 2 */ 321cabdff1aSopenharmony_ci ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / 322cabdff1aSopenharmony_ci (qscale * luma_weight_table[i]); 323cabdff1aSopenharmony_ci ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / 324cabdff1aSopenharmony_ci (qscale * chroma_weight_table[i]); 325cabdff1aSopenharmony_ci } 326cabdff1aSopenharmony_ci } 327cabdff1aSopenharmony_ci } 328cabdff1aSopenharmony_ci 329cabdff1aSopenharmony_ci ctx->m.q_chroma_intra_matrix16 = ctx->qmatrix_c16; 330cabdff1aSopenharmony_ci ctx->m.q_chroma_intra_matrix = ctx->qmatrix_c; 331cabdff1aSopenharmony_ci ctx->m.q_intra_matrix16 = ctx->qmatrix_l16; 332cabdff1aSopenharmony_ci ctx->m.q_intra_matrix = ctx->qmatrix_l; 333cabdff1aSopenharmony_ci 334cabdff1aSopenharmony_ci return 0; 335cabdff1aSopenharmony_ci} 336cabdff1aSopenharmony_ci 337cabdff1aSopenharmony_cistatic av_cold int dnxhd_init_rc(DNXHDEncContext *ctx) 338cabdff1aSopenharmony_ci{ 339cabdff1aSopenharmony_ci if (!FF_ALLOCZ_TYPED_ARRAY(ctx->mb_rc, (ctx->m.avctx->qmax + 1) * ctx->m.mb_num)) 340cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 341cabdff1aSopenharmony_ci 342cabdff1aSopenharmony_ci if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD) { 343cabdff1aSopenharmony_ci if (!FF_ALLOCZ_TYPED_ARRAY(ctx->mb_cmp, ctx->m.mb_num) || 344cabdff1aSopenharmony_ci !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_cmp_tmp, ctx->m.mb_num)) 345cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 346cabdff1aSopenharmony_ci } 347cabdff1aSopenharmony_ci ctx->frame_bits = (ctx->coding_unit_size - 348cabdff1aSopenharmony_ci ctx->data_offset - 4 - ctx->min_padding) * 8; 349cabdff1aSopenharmony_ci ctx->qscale = 1; 350cabdff1aSopenharmony_ci ctx->lambda = 2 << LAMBDA_FRAC_BITS; // qscale 2 351cabdff1aSopenharmony_ci return 0; 352cabdff1aSopenharmony_ci} 353cabdff1aSopenharmony_ci 354cabdff1aSopenharmony_cistatic av_cold int dnxhd_encode_init(AVCodecContext *avctx) 355cabdff1aSopenharmony_ci{ 356cabdff1aSopenharmony_ci DNXHDEncContext *ctx = avctx->priv_data; 357cabdff1aSopenharmony_ci int i, ret; 358cabdff1aSopenharmony_ci 359cabdff1aSopenharmony_ci switch (avctx->pix_fmt) { 360cabdff1aSopenharmony_ci case AV_PIX_FMT_YUV422P: 361cabdff1aSopenharmony_ci ctx->bit_depth = 8; 362cabdff1aSopenharmony_ci break; 363cabdff1aSopenharmony_ci case AV_PIX_FMT_YUV422P10: 364cabdff1aSopenharmony_ci case AV_PIX_FMT_YUV444P10: 365cabdff1aSopenharmony_ci case AV_PIX_FMT_GBRP10: 366cabdff1aSopenharmony_ci ctx->bit_depth = 10; 367cabdff1aSopenharmony_ci break; 368cabdff1aSopenharmony_ci } 369cabdff1aSopenharmony_ci 370cabdff1aSopenharmony_ci if ((ctx->profile == FF_PROFILE_DNXHR_444 && (avctx->pix_fmt != AV_PIX_FMT_YUV444P10 && 371cabdff1aSopenharmony_ci avctx->pix_fmt != AV_PIX_FMT_GBRP10)) || 372cabdff1aSopenharmony_ci (ctx->profile != FF_PROFILE_DNXHR_444 && (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || 373cabdff1aSopenharmony_ci avctx->pix_fmt == AV_PIX_FMT_GBRP10))) { 374cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, 375cabdff1aSopenharmony_ci "pixel format is incompatible with DNxHD profile\n"); 376cabdff1aSopenharmony_ci return AVERROR(EINVAL); 377cabdff1aSopenharmony_ci } 378cabdff1aSopenharmony_ci 379cabdff1aSopenharmony_ci if (ctx->profile == FF_PROFILE_DNXHR_HQX && avctx->pix_fmt != AV_PIX_FMT_YUV422P10) { 380cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, 381cabdff1aSopenharmony_ci "pixel format is incompatible with DNxHR HQX profile\n"); 382cabdff1aSopenharmony_ci return AVERROR(EINVAL); 383cabdff1aSopenharmony_ci } 384cabdff1aSopenharmony_ci 385cabdff1aSopenharmony_ci if ((ctx->profile == FF_PROFILE_DNXHR_LB || 386cabdff1aSopenharmony_ci ctx->profile == FF_PROFILE_DNXHR_SQ || 387cabdff1aSopenharmony_ci ctx->profile == FF_PROFILE_DNXHR_HQ) && avctx->pix_fmt != AV_PIX_FMT_YUV422P) { 388cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, 389cabdff1aSopenharmony_ci "pixel format is incompatible with DNxHR LB/SQ/HQ profile\n"); 390cabdff1aSopenharmony_ci return AVERROR(EINVAL); 391cabdff1aSopenharmony_ci } 392cabdff1aSopenharmony_ci 393cabdff1aSopenharmony_ci ctx->is_444 = ctx->profile == FF_PROFILE_DNXHR_444; 394cabdff1aSopenharmony_ci avctx->profile = ctx->profile; 395cabdff1aSopenharmony_ci ctx->cid = ff_dnxhd_find_cid(avctx, ctx->bit_depth); 396cabdff1aSopenharmony_ci if (!ctx->cid) { 397cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, 398cabdff1aSopenharmony_ci "video parameters incompatible with DNxHD. Valid DNxHD profiles:\n"); 399cabdff1aSopenharmony_ci ff_dnxhd_print_profiles(avctx, AV_LOG_ERROR); 400cabdff1aSopenharmony_ci return AVERROR(EINVAL); 401cabdff1aSopenharmony_ci } 402cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid); 403cabdff1aSopenharmony_ci 404cabdff1aSopenharmony_ci if (ctx->cid >= 1270 && ctx->cid <= 1274) 405cabdff1aSopenharmony_ci avctx->codec_tag = MKTAG('A','V','d','h'); 406cabdff1aSopenharmony_ci 407cabdff1aSopenharmony_ci if (avctx->width < 256 || avctx->height < 120) { 408cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, 409cabdff1aSopenharmony_ci "Input dimensions too small, input must be at least 256x120\n"); 410cabdff1aSopenharmony_ci return AVERROR(EINVAL); 411cabdff1aSopenharmony_ci } 412cabdff1aSopenharmony_ci 413cabdff1aSopenharmony_ci ctx->cid_table = ff_dnxhd_get_cid_table(ctx->cid); 414cabdff1aSopenharmony_ci av_assert0(ctx->cid_table); 415cabdff1aSopenharmony_ci 416cabdff1aSopenharmony_ci ctx->m.avctx = avctx; 417cabdff1aSopenharmony_ci ctx->m.mb_intra = 1; 418cabdff1aSopenharmony_ci ctx->m.h263_aic = 1; 419cabdff1aSopenharmony_ci 420cabdff1aSopenharmony_ci avctx->bits_per_raw_sample = ctx->bit_depth; 421cabdff1aSopenharmony_ci 422cabdff1aSopenharmony_ci ff_blockdsp_init(&ctx->bdsp, avctx); 423cabdff1aSopenharmony_ci ff_fdctdsp_init(&ctx->m.fdsp, avctx); 424cabdff1aSopenharmony_ci ff_mpv_idct_init(&ctx->m); 425cabdff1aSopenharmony_ci ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); 426cabdff1aSopenharmony_ci ff_pixblockdsp_init(&ctx->m.pdsp, avctx); 427cabdff1aSopenharmony_ci ff_dct_encode_init(&ctx->m); 428cabdff1aSopenharmony_ci 429cabdff1aSopenharmony_ci if (ctx->profile != FF_PROFILE_DNXHD) 430cabdff1aSopenharmony_ci ff_videodsp_init(&ctx->m.vdsp, ctx->bit_depth); 431cabdff1aSopenharmony_ci 432cabdff1aSopenharmony_ci if (!ctx->m.dct_quantize) 433cabdff1aSopenharmony_ci ctx->m.dct_quantize = ff_dct_quantize_c; 434cabdff1aSopenharmony_ci 435cabdff1aSopenharmony_ci if (ctx->is_444 || ctx->profile == FF_PROFILE_DNXHR_HQX) { 436cabdff1aSopenharmony_ci ctx->m.dct_quantize = dnxhd_10bit_dct_quantize_444; 437cabdff1aSopenharmony_ci ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym; 438cabdff1aSopenharmony_ci ctx->block_width_l2 = 4; 439cabdff1aSopenharmony_ci } else if (ctx->bit_depth == 10) { 440cabdff1aSopenharmony_ci ctx->m.dct_quantize = dnxhd_10bit_dct_quantize; 441cabdff1aSopenharmony_ci ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym; 442cabdff1aSopenharmony_ci ctx->block_width_l2 = 4; 443cabdff1aSopenharmony_ci } else { 444cabdff1aSopenharmony_ci ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym; 445cabdff1aSopenharmony_ci ctx->block_width_l2 = 3; 446cabdff1aSopenharmony_ci } 447cabdff1aSopenharmony_ci 448cabdff1aSopenharmony_ci#if ARCH_X86 449cabdff1aSopenharmony_ci ff_dnxhdenc_init_x86(ctx); 450cabdff1aSopenharmony_ci#endif 451cabdff1aSopenharmony_ci 452cabdff1aSopenharmony_ci ctx->m.mb_height = (avctx->height + 15) / 16; 453cabdff1aSopenharmony_ci ctx->m.mb_width = (avctx->width + 15) / 16; 454cabdff1aSopenharmony_ci 455cabdff1aSopenharmony_ci if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { 456cabdff1aSopenharmony_ci ctx->interlaced = 1; 457cabdff1aSopenharmony_ci ctx->m.mb_height /= 2; 458cabdff1aSopenharmony_ci } 459cabdff1aSopenharmony_ci 460cabdff1aSopenharmony_ci if (ctx->interlaced && ctx->profile != FF_PROFILE_DNXHD) { 461cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, 462cabdff1aSopenharmony_ci "Interlaced encoding is not supported for DNxHR profiles.\n"); 463cabdff1aSopenharmony_ci return AVERROR(EINVAL); 464cabdff1aSopenharmony_ci } 465cabdff1aSopenharmony_ci 466cabdff1aSopenharmony_ci ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width; 467cabdff1aSopenharmony_ci 468cabdff1aSopenharmony_ci if (ctx->cid_table->frame_size == DNXHD_VARIABLE) { 469cabdff1aSopenharmony_ci ctx->frame_size = ff_dnxhd_get_hr_frame_size(ctx->cid, 470cabdff1aSopenharmony_ci avctx->width, avctx->height); 471cabdff1aSopenharmony_ci av_assert0(ctx->frame_size >= 0); 472cabdff1aSopenharmony_ci ctx->coding_unit_size = ctx->frame_size; 473cabdff1aSopenharmony_ci } else { 474cabdff1aSopenharmony_ci ctx->frame_size = ctx->cid_table->frame_size; 475cabdff1aSopenharmony_ci ctx->coding_unit_size = ctx->cid_table->coding_unit_size; 476cabdff1aSopenharmony_ci } 477cabdff1aSopenharmony_ci 478cabdff1aSopenharmony_ci if (ctx->m.mb_height > 68) 479cabdff1aSopenharmony_ci ctx->data_offset = 0x170 + (ctx->m.mb_height << 2); 480cabdff1aSopenharmony_ci else 481cabdff1aSopenharmony_ci ctx->data_offset = 0x280; 482cabdff1aSopenharmony_ci 483cabdff1aSopenharmony_ci // XXX tune lbias/cbias 484cabdff1aSopenharmony_ci if ((ret = dnxhd_init_qmat(ctx, ctx->intra_quant_bias, 0)) < 0) 485cabdff1aSopenharmony_ci return ret; 486cabdff1aSopenharmony_ci 487cabdff1aSopenharmony_ci /* Avid Nitris hardware decoder requires a minimum amount of padding 488cabdff1aSopenharmony_ci * in the coding unit payload */ 489cabdff1aSopenharmony_ci if (ctx->nitris_compat) 490cabdff1aSopenharmony_ci ctx->min_padding = 1600; 491cabdff1aSopenharmony_ci 492cabdff1aSopenharmony_ci if ((ret = dnxhd_init_vlc(ctx)) < 0) 493cabdff1aSopenharmony_ci return ret; 494cabdff1aSopenharmony_ci if ((ret = dnxhd_init_rc(ctx)) < 0) 495cabdff1aSopenharmony_ci return ret; 496cabdff1aSopenharmony_ci 497cabdff1aSopenharmony_ci if (!FF_ALLOCZ_TYPED_ARRAY(ctx->slice_size, ctx->m.mb_height) || 498cabdff1aSopenharmony_ci !FF_ALLOCZ_TYPED_ARRAY(ctx->slice_offs, ctx->m.mb_height) || 499cabdff1aSopenharmony_ci !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_bits, ctx->m.mb_num) || 500cabdff1aSopenharmony_ci !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_qscale, ctx->m.mb_num)) 501cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 502cabdff1aSopenharmony_ci 503cabdff1aSopenharmony_ci if (avctx->active_thread_type == FF_THREAD_SLICE) { 504cabdff1aSopenharmony_ci if (avctx->thread_count > MAX_THREADS) { 505cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, "too many threads\n"); 506cabdff1aSopenharmony_ci return AVERROR(EINVAL); 507cabdff1aSopenharmony_ci } 508cabdff1aSopenharmony_ci } 509cabdff1aSopenharmony_ci 510cabdff1aSopenharmony_ci if (avctx->qmax <= 1) { 511cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, "qmax must be at least 2\n"); 512cabdff1aSopenharmony_ci return AVERROR(EINVAL); 513cabdff1aSopenharmony_ci } 514cabdff1aSopenharmony_ci 515cabdff1aSopenharmony_ci ctx->thread[0] = ctx; 516cabdff1aSopenharmony_ci if (avctx->active_thread_type == FF_THREAD_SLICE) { 517cabdff1aSopenharmony_ci for (i = 1; i < avctx->thread_count; i++) { 518cabdff1aSopenharmony_ci ctx->thread[i] = av_memdup(ctx, sizeof(DNXHDEncContext)); 519cabdff1aSopenharmony_ci if (!ctx->thread[i]) 520cabdff1aSopenharmony_ci return AVERROR(ENOMEM); 521cabdff1aSopenharmony_ci } 522cabdff1aSopenharmony_ci } 523cabdff1aSopenharmony_ci 524cabdff1aSopenharmony_ci return 0; 525cabdff1aSopenharmony_ci} 526cabdff1aSopenharmony_ci 527cabdff1aSopenharmony_cistatic int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf) 528cabdff1aSopenharmony_ci{ 529cabdff1aSopenharmony_ci DNXHDEncContext *ctx = avctx->priv_data; 530cabdff1aSopenharmony_ci 531cabdff1aSopenharmony_ci memset(buf, 0, ctx->data_offset); 532cabdff1aSopenharmony_ci 533cabdff1aSopenharmony_ci // * write prefix */ 534cabdff1aSopenharmony_ci AV_WB16(buf + 0x02, ctx->data_offset); 535cabdff1aSopenharmony_ci if (ctx->cid >= 1270 && ctx->cid <= 1274) 536cabdff1aSopenharmony_ci buf[4] = 0x03; 537cabdff1aSopenharmony_ci else 538cabdff1aSopenharmony_ci buf[4] = 0x01; 539cabdff1aSopenharmony_ci 540cabdff1aSopenharmony_ci buf[5] = ctx->interlaced ? ctx->cur_field + 2 : 0x01; 541cabdff1aSopenharmony_ci buf[6] = 0x80; // crc flag off 542cabdff1aSopenharmony_ci buf[7] = 0xa0; // reserved 543cabdff1aSopenharmony_ci AV_WB16(buf + 0x18, avctx->height >> ctx->interlaced); // ALPF 544cabdff1aSopenharmony_ci AV_WB16(buf + 0x1a, avctx->width); // SPL 545cabdff1aSopenharmony_ci AV_WB16(buf + 0x1d, avctx->height >> ctx->interlaced); // NAL 546cabdff1aSopenharmony_ci 547cabdff1aSopenharmony_ci buf[0x21] = ctx->bit_depth == 10 ? 0x58 : 0x38; 548cabdff1aSopenharmony_ci buf[0x22] = 0x88 + (ctx->interlaced << 2); 549cabdff1aSopenharmony_ci AV_WB32(buf + 0x28, ctx->cid); // CID 550cabdff1aSopenharmony_ci buf[0x2c] = (!ctx->interlaced << 7) | (ctx->is_444 << 6) | (avctx->pix_fmt == AV_PIX_FMT_YUV444P10); 551cabdff1aSopenharmony_ci 552cabdff1aSopenharmony_ci buf[0x5f] = 0x01; // UDL 553cabdff1aSopenharmony_ci 554cabdff1aSopenharmony_ci buf[0x167] = 0x02; // reserved 555cabdff1aSopenharmony_ci AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS 556cabdff1aSopenharmony_ci AV_WB16(buf + 0x16c, ctx->m.mb_height); // Ns 557cabdff1aSopenharmony_ci buf[0x16f] = 0x10; // reserved 558cabdff1aSopenharmony_ci 559cabdff1aSopenharmony_ci ctx->msip = buf + 0x170; 560cabdff1aSopenharmony_ci return 0; 561cabdff1aSopenharmony_ci} 562cabdff1aSopenharmony_ci 563cabdff1aSopenharmony_cistatic av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff) 564cabdff1aSopenharmony_ci{ 565cabdff1aSopenharmony_ci int nbits; 566cabdff1aSopenharmony_ci if (diff < 0) { 567cabdff1aSopenharmony_ci nbits = av_log2_16bit(-2 * diff); 568cabdff1aSopenharmony_ci diff--; 569cabdff1aSopenharmony_ci } else { 570cabdff1aSopenharmony_ci nbits = av_log2_16bit(2 * diff); 571cabdff1aSopenharmony_ci } 572cabdff1aSopenharmony_ci put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits, 573cabdff1aSopenharmony_ci (ctx->cid_table->dc_codes[nbits] << nbits) + 574cabdff1aSopenharmony_ci av_mod_uintp2(diff, nbits)); 575cabdff1aSopenharmony_ci} 576cabdff1aSopenharmony_ci 577cabdff1aSopenharmony_cistatic av_always_inline 578cabdff1aSopenharmony_civoid dnxhd_encode_block(DNXHDEncContext *ctx, int16_t *block, 579cabdff1aSopenharmony_ci int last_index, int n) 580cabdff1aSopenharmony_ci{ 581cabdff1aSopenharmony_ci int last_non_zero = 0; 582cabdff1aSopenharmony_ci int slevel, i, j; 583cabdff1aSopenharmony_ci 584cabdff1aSopenharmony_ci dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]); 585cabdff1aSopenharmony_ci ctx->m.last_dc[n] = block[0]; 586cabdff1aSopenharmony_ci 587cabdff1aSopenharmony_ci for (i = 1; i <= last_index; i++) { 588cabdff1aSopenharmony_ci j = ctx->m.intra_scantable.permutated[i]; 589cabdff1aSopenharmony_ci slevel = block[j]; 590cabdff1aSopenharmony_ci if (slevel) { 591cabdff1aSopenharmony_ci int run_level = i - last_non_zero - 1; 592cabdff1aSopenharmony_ci int rlevel = slevel * (1 << 1) | !!run_level; 593cabdff1aSopenharmony_ci put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]); 594cabdff1aSopenharmony_ci if (run_level) 595cabdff1aSopenharmony_ci put_bits(&ctx->m.pb, ctx->run_bits[run_level], 596cabdff1aSopenharmony_ci ctx->run_codes[run_level]); 597cabdff1aSopenharmony_ci last_non_zero = i; 598cabdff1aSopenharmony_ci } 599cabdff1aSopenharmony_ci } 600cabdff1aSopenharmony_ci put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB 601cabdff1aSopenharmony_ci} 602cabdff1aSopenharmony_ci 603cabdff1aSopenharmony_cistatic av_always_inline 604cabdff1aSopenharmony_civoid dnxhd_unquantize_c(DNXHDEncContext *ctx, int16_t *block, int n, 605cabdff1aSopenharmony_ci int qscale, int last_index) 606cabdff1aSopenharmony_ci{ 607cabdff1aSopenharmony_ci const uint8_t *weight_matrix; 608cabdff1aSopenharmony_ci int level; 609cabdff1aSopenharmony_ci int i; 610cabdff1aSopenharmony_ci 611cabdff1aSopenharmony_ci if (ctx->is_444) { 612cabdff1aSopenharmony_ci weight_matrix = ((n % 6) < 2) ? ctx->cid_table->luma_weight 613cabdff1aSopenharmony_ci : ctx->cid_table->chroma_weight; 614cabdff1aSopenharmony_ci } else { 615cabdff1aSopenharmony_ci weight_matrix = (n & 2) ? ctx->cid_table->chroma_weight 616cabdff1aSopenharmony_ci : ctx->cid_table->luma_weight; 617cabdff1aSopenharmony_ci } 618cabdff1aSopenharmony_ci 619cabdff1aSopenharmony_ci for (i = 1; i <= last_index; i++) { 620cabdff1aSopenharmony_ci int j = ctx->m.intra_scantable.permutated[i]; 621cabdff1aSopenharmony_ci level = block[j]; 622cabdff1aSopenharmony_ci if (level) { 623cabdff1aSopenharmony_ci if (level < 0) { 624cabdff1aSopenharmony_ci level = (1 - 2 * level) * qscale * weight_matrix[i]; 625cabdff1aSopenharmony_ci if (ctx->bit_depth == 10) { 626cabdff1aSopenharmony_ci if (weight_matrix[i] != 8) 627cabdff1aSopenharmony_ci level += 8; 628cabdff1aSopenharmony_ci level >>= 4; 629cabdff1aSopenharmony_ci } else { 630cabdff1aSopenharmony_ci if (weight_matrix[i] != 32) 631cabdff1aSopenharmony_ci level += 32; 632cabdff1aSopenharmony_ci level >>= 6; 633cabdff1aSopenharmony_ci } 634cabdff1aSopenharmony_ci level = -level; 635cabdff1aSopenharmony_ci } else { 636cabdff1aSopenharmony_ci level = (2 * level + 1) * qscale * weight_matrix[i]; 637cabdff1aSopenharmony_ci if (ctx->bit_depth == 10) { 638cabdff1aSopenharmony_ci if (weight_matrix[i] != 8) 639cabdff1aSopenharmony_ci level += 8; 640cabdff1aSopenharmony_ci level >>= 4; 641cabdff1aSopenharmony_ci } else { 642cabdff1aSopenharmony_ci if (weight_matrix[i] != 32) 643cabdff1aSopenharmony_ci level += 32; 644cabdff1aSopenharmony_ci level >>= 6; 645cabdff1aSopenharmony_ci } 646cabdff1aSopenharmony_ci } 647cabdff1aSopenharmony_ci block[j] = level; 648cabdff1aSopenharmony_ci } 649cabdff1aSopenharmony_ci } 650cabdff1aSopenharmony_ci} 651cabdff1aSopenharmony_ci 652cabdff1aSopenharmony_cistatic av_always_inline int dnxhd_ssd_block(int16_t *qblock, int16_t *block) 653cabdff1aSopenharmony_ci{ 654cabdff1aSopenharmony_ci int score = 0; 655cabdff1aSopenharmony_ci int i; 656cabdff1aSopenharmony_ci for (i = 0; i < 64; i++) 657cabdff1aSopenharmony_ci score += (block[i] - qblock[i]) * (block[i] - qblock[i]); 658cabdff1aSopenharmony_ci return score; 659cabdff1aSopenharmony_ci} 660cabdff1aSopenharmony_ci 661cabdff1aSopenharmony_cistatic av_always_inline 662cabdff1aSopenharmony_ciint dnxhd_calc_ac_bits(DNXHDEncContext *ctx, int16_t *block, int last_index) 663cabdff1aSopenharmony_ci{ 664cabdff1aSopenharmony_ci int last_non_zero = 0; 665cabdff1aSopenharmony_ci int bits = 0; 666cabdff1aSopenharmony_ci int i, j, level; 667cabdff1aSopenharmony_ci for (i = 1; i <= last_index; i++) { 668cabdff1aSopenharmony_ci j = ctx->m.intra_scantable.permutated[i]; 669cabdff1aSopenharmony_ci level = block[j]; 670cabdff1aSopenharmony_ci if (level) { 671cabdff1aSopenharmony_ci int run_level = i - last_non_zero - 1; 672cabdff1aSopenharmony_ci bits += ctx->vlc_bits[level * (1 << 1) | 673cabdff1aSopenharmony_ci !!run_level] + ctx->run_bits[run_level]; 674cabdff1aSopenharmony_ci last_non_zero = i; 675cabdff1aSopenharmony_ci } 676cabdff1aSopenharmony_ci } 677cabdff1aSopenharmony_ci return bits; 678cabdff1aSopenharmony_ci} 679cabdff1aSopenharmony_ci 680cabdff1aSopenharmony_cistatic av_always_inline 681cabdff1aSopenharmony_civoid dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) 682cabdff1aSopenharmony_ci{ 683cabdff1aSopenharmony_ci const int bs = ctx->block_width_l2; 684cabdff1aSopenharmony_ci const int bw = 1 << bs; 685cabdff1aSopenharmony_ci int dct_y_offset = ctx->dct_y_offset; 686cabdff1aSopenharmony_ci int dct_uv_offset = ctx->dct_uv_offset; 687cabdff1aSopenharmony_ci int linesize = ctx->m.linesize; 688cabdff1aSopenharmony_ci int uvlinesize = ctx->m.uvlinesize; 689cabdff1aSopenharmony_ci const uint8_t *ptr_y = ctx->thread[0]->src[0] + 690cabdff1aSopenharmony_ci ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs + 1); 691cabdff1aSopenharmony_ci const uint8_t *ptr_u = ctx->thread[0]->src[1] + 692cabdff1aSopenharmony_ci ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444); 693cabdff1aSopenharmony_ci const uint8_t *ptr_v = ctx->thread[0]->src[2] + 694cabdff1aSopenharmony_ci ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444); 695cabdff1aSopenharmony_ci PixblockDSPContext *pdsp = &ctx->m.pdsp; 696cabdff1aSopenharmony_ci VideoDSPContext *vdsp = &ctx->m.vdsp; 697cabdff1aSopenharmony_ci 698cabdff1aSopenharmony_ci if (ctx->bit_depth != 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width || 699cabdff1aSopenharmony_ci (mb_y << 4) + 16 > ctx->m.avctx->height)) { 700cabdff1aSopenharmony_ci int y_w = ctx->m.avctx->width - (mb_x << 4); 701cabdff1aSopenharmony_ci int y_h = ctx->m.avctx->height - (mb_y << 4); 702cabdff1aSopenharmony_ci int uv_w = (y_w + 1) / 2; 703cabdff1aSopenharmony_ci int uv_h = y_h; 704cabdff1aSopenharmony_ci linesize = 16; 705cabdff1aSopenharmony_ci uvlinesize = 8; 706cabdff1aSopenharmony_ci 707cabdff1aSopenharmony_ci vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y, 708cabdff1aSopenharmony_ci linesize, ctx->m.linesize, 709cabdff1aSopenharmony_ci linesize, 16, 710cabdff1aSopenharmony_ci 0, 0, y_w, y_h); 711cabdff1aSopenharmony_ci vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u, 712cabdff1aSopenharmony_ci uvlinesize, ctx->m.uvlinesize, 713cabdff1aSopenharmony_ci uvlinesize, 16, 714cabdff1aSopenharmony_ci 0, 0, uv_w, uv_h); 715cabdff1aSopenharmony_ci vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v, 716cabdff1aSopenharmony_ci uvlinesize, ctx->m.uvlinesize, 717cabdff1aSopenharmony_ci uvlinesize, 16, 718cabdff1aSopenharmony_ci 0, 0, uv_w, uv_h); 719cabdff1aSopenharmony_ci 720cabdff1aSopenharmony_ci dct_y_offset = bw * linesize; 721cabdff1aSopenharmony_ci dct_uv_offset = bw * uvlinesize; 722cabdff1aSopenharmony_ci ptr_y = &ctx->edge_buf_y[0]; 723cabdff1aSopenharmony_ci ptr_u = &ctx->edge_buf_uv[0][0]; 724cabdff1aSopenharmony_ci ptr_v = &ctx->edge_buf_uv[1][0]; 725cabdff1aSopenharmony_ci } else if (ctx->bit_depth == 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width || 726cabdff1aSopenharmony_ci (mb_y << 4) + 16 > ctx->m.avctx->height)) { 727cabdff1aSopenharmony_ci int y_w = ctx->m.avctx->width - (mb_x << 4); 728cabdff1aSopenharmony_ci int y_h = ctx->m.avctx->height - (mb_y << 4); 729cabdff1aSopenharmony_ci int uv_w = ctx->is_444 ? y_w : (y_w + 1) / 2; 730cabdff1aSopenharmony_ci int uv_h = y_h; 731cabdff1aSopenharmony_ci linesize = 32; 732cabdff1aSopenharmony_ci uvlinesize = 16 + 16 * ctx->is_444; 733cabdff1aSopenharmony_ci 734cabdff1aSopenharmony_ci vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y, 735cabdff1aSopenharmony_ci linesize, ctx->m.linesize, 736cabdff1aSopenharmony_ci linesize / 2, 16, 737cabdff1aSopenharmony_ci 0, 0, y_w, y_h); 738cabdff1aSopenharmony_ci vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u, 739cabdff1aSopenharmony_ci uvlinesize, ctx->m.uvlinesize, 740cabdff1aSopenharmony_ci uvlinesize / 2, 16, 741cabdff1aSopenharmony_ci 0, 0, uv_w, uv_h); 742cabdff1aSopenharmony_ci vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v, 743cabdff1aSopenharmony_ci uvlinesize, ctx->m.uvlinesize, 744cabdff1aSopenharmony_ci uvlinesize / 2, 16, 745cabdff1aSopenharmony_ci 0, 0, uv_w, uv_h); 746cabdff1aSopenharmony_ci 747cabdff1aSopenharmony_ci dct_y_offset = bw * linesize / 2; 748cabdff1aSopenharmony_ci dct_uv_offset = bw * uvlinesize / 2; 749cabdff1aSopenharmony_ci ptr_y = &ctx->edge_buf_y[0]; 750cabdff1aSopenharmony_ci ptr_u = &ctx->edge_buf_uv[0][0]; 751cabdff1aSopenharmony_ci ptr_v = &ctx->edge_buf_uv[1][0]; 752cabdff1aSopenharmony_ci } 753cabdff1aSopenharmony_ci 754cabdff1aSopenharmony_ci if (!ctx->is_444) { 755cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[0], ptr_y, linesize); 756cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize); 757cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[2], ptr_u, uvlinesize); 758cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[3], ptr_v, uvlinesize); 759cabdff1aSopenharmony_ci 760cabdff1aSopenharmony_ci if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { 761cabdff1aSopenharmony_ci if (ctx->interlaced) { 762cabdff1aSopenharmony_ci ctx->get_pixels_8x4_sym(ctx->blocks[4], 763cabdff1aSopenharmony_ci ptr_y + dct_y_offset, 764cabdff1aSopenharmony_ci linesize); 765cabdff1aSopenharmony_ci ctx->get_pixels_8x4_sym(ctx->blocks[5], 766cabdff1aSopenharmony_ci ptr_y + dct_y_offset + bw, 767cabdff1aSopenharmony_ci linesize); 768cabdff1aSopenharmony_ci ctx->get_pixels_8x4_sym(ctx->blocks[6], 769cabdff1aSopenharmony_ci ptr_u + dct_uv_offset, 770cabdff1aSopenharmony_ci uvlinesize); 771cabdff1aSopenharmony_ci ctx->get_pixels_8x4_sym(ctx->blocks[7], 772cabdff1aSopenharmony_ci ptr_v + dct_uv_offset, 773cabdff1aSopenharmony_ci uvlinesize); 774cabdff1aSopenharmony_ci } else { 775cabdff1aSopenharmony_ci ctx->bdsp.clear_block(ctx->blocks[4]); 776cabdff1aSopenharmony_ci ctx->bdsp.clear_block(ctx->blocks[5]); 777cabdff1aSopenharmony_ci ctx->bdsp.clear_block(ctx->blocks[6]); 778cabdff1aSopenharmony_ci ctx->bdsp.clear_block(ctx->blocks[7]); 779cabdff1aSopenharmony_ci } 780cabdff1aSopenharmony_ci } else { 781cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[4], 782cabdff1aSopenharmony_ci ptr_y + dct_y_offset, linesize); 783cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[5], 784cabdff1aSopenharmony_ci ptr_y + dct_y_offset + bw, linesize); 785cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[6], 786cabdff1aSopenharmony_ci ptr_u + dct_uv_offset, uvlinesize); 787cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[7], 788cabdff1aSopenharmony_ci ptr_v + dct_uv_offset, uvlinesize); 789cabdff1aSopenharmony_ci } 790cabdff1aSopenharmony_ci } else { 791cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[0], ptr_y, linesize); 792cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize); 793cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[6], ptr_y + dct_y_offset, linesize); 794cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[7], ptr_y + dct_y_offset + bw, linesize); 795cabdff1aSopenharmony_ci 796cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[2], ptr_u, uvlinesize); 797cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[3], ptr_u + bw, uvlinesize); 798cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[8], ptr_u + dct_uv_offset, uvlinesize); 799cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[9], ptr_u + dct_uv_offset + bw, uvlinesize); 800cabdff1aSopenharmony_ci 801cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[4], ptr_v, uvlinesize); 802cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[5], ptr_v + bw, uvlinesize); 803cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[10], ptr_v + dct_uv_offset, uvlinesize); 804cabdff1aSopenharmony_ci pdsp->get_pixels(ctx->blocks[11], ptr_v + dct_uv_offset + bw, uvlinesize); 805cabdff1aSopenharmony_ci } 806cabdff1aSopenharmony_ci} 807cabdff1aSopenharmony_ci 808cabdff1aSopenharmony_cistatic av_always_inline 809cabdff1aSopenharmony_ciint dnxhd_switch_matrix(DNXHDEncContext *ctx, int i) 810cabdff1aSopenharmony_ci{ 811cabdff1aSopenharmony_ci int x; 812cabdff1aSopenharmony_ci 813cabdff1aSopenharmony_ci if (ctx->is_444) { 814cabdff1aSopenharmony_ci x = (i >> 1) % 3; 815cabdff1aSopenharmony_ci } else { 816cabdff1aSopenharmony_ci const static uint8_t component[8]={0,0,1,2,0,0,1,2}; 817cabdff1aSopenharmony_ci x = component[i]; 818cabdff1aSopenharmony_ci } 819cabdff1aSopenharmony_ci return x; 820cabdff1aSopenharmony_ci} 821cabdff1aSopenharmony_ci 822cabdff1aSopenharmony_cistatic int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, 823cabdff1aSopenharmony_ci int jobnr, int threadnr) 824cabdff1aSopenharmony_ci{ 825cabdff1aSopenharmony_ci DNXHDEncContext *ctx = avctx->priv_data; 826cabdff1aSopenharmony_ci int mb_y = jobnr, mb_x; 827cabdff1aSopenharmony_ci int qscale = ctx->qscale; 828cabdff1aSopenharmony_ci LOCAL_ALIGNED_16(int16_t, block, [64]); 829cabdff1aSopenharmony_ci ctx = ctx->thread[threadnr]; 830cabdff1aSopenharmony_ci 831cabdff1aSopenharmony_ci ctx->m.last_dc[0] = 832cabdff1aSopenharmony_ci ctx->m.last_dc[1] = 833cabdff1aSopenharmony_ci ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2); 834cabdff1aSopenharmony_ci 835cabdff1aSopenharmony_ci for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { 836cabdff1aSopenharmony_ci unsigned mb = mb_y * ctx->m.mb_width + mb_x; 837cabdff1aSopenharmony_ci int ssd = 0; 838cabdff1aSopenharmony_ci int ac_bits = 0; 839cabdff1aSopenharmony_ci int dc_bits = 0; 840cabdff1aSopenharmony_ci int i; 841cabdff1aSopenharmony_ci 842cabdff1aSopenharmony_ci dnxhd_get_blocks(ctx, mb_x, mb_y); 843cabdff1aSopenharmony_ci 844cabdff1aSopenharmony_ci for (i = 0; i < 8 + 4 * ctx->is_444; i++) { 845cabdff1aSopenharmony_ci int16_t *src_block = ctx->blocks[i]; 846cabdff1aSopenharmony_ci int overflow, nbits, diff, last_index; 847cabdff1aSopenharmony_ci int n = dnxhd_switch_matrix(ctx, i); 848cabdff1aSopenharmony_ci 849cabdff1aSopenharmony_ci memcpy(block, src_block, 64 * sizeof(*block)); 850cabdff1aSopenharmony_ci last_index = ctx->m.dct_quantize(&ctx->m, block, 851cabdff1aSopenharmony_ci ctx->is_444 ? 4 * (n > 0): 4 & (2*i), 852cabdff1aSopenharmony_ci qscale, &overflow); 853cabdff1aSopenharmony_ci ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index); 854cabdff1aSopenharmony_ci 855cabdff1aSopenharmony_ci diff = block[0] - ctx->m.last_dc[n]; 856cabdff1aSopenharmony_ci if (diff < 0) 857cabdff1aSopenharmony_ci nbits = av_log2_16bit(-2 * diff); 858cabdff1aSopenharmony_ci else 859cabdff1aSopenharmony_ci nbits = av_log2_16bit(2 * diff); 860cabdff1aSopenharmony_ci 861cabdff1aSopenharmony_ci av_assert1(nbits < ctx->bit_depth + 4); 862cabdff1aSopenharmony_ci dc_bits += ctx->cid_table->dc_bits[nbits] + nbits; 863cabdff1aSopenharmony_ci 864cabdff1aSopenharmony_ci ctx->m.last_dc[n] = block[0]; 865cabdff1aSopenharmony_ci 866cabdff1aSopenharmony_ci if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) { 867cabdff1aSopenharmony_ci dnxhd_unquantize_c(ctx, block, i, qscale, last_index); 868cabdff1aSopenharmony_ci ctx->m.idsp.idct(block); 869cabdff1aSopenharmony_ci ssd += dnxhd_ssd_block(block, src_block); 870cabdff1aSopenharmony_ci } 871cabdff1aSopenharmony_ci } 872cabdff1aSopenharmony_ci ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].ssd = ssd; 873cabdff1aSopenharmony_ci ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].bits = ac_bits + dc_bits + 12 + 874cabdff1aSopenharmony_ci (1 + ctx->is_444) * 8 * ctx->vlc_bits[0]; 875cabdff1aSopenharmony_ci } 876cabdff1aSopenharmony_ci return 0; 877cabdff1aSopenharmony_ci} 878cabdff1aSopenharmony_ci 879cabdff1aSopenharmony_cistatic int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, 880cabdff1aSopenharmony_ci int jobnr, int threadnr) 881cabdff1aSopenharmony_ci{ 882cabdff1aSopenharmony_ci DNXHDEncContext *ctx = avctx->priv_data; 883cabdff1aSopenharmony_ci int mb_y = jobnr, mb_x; 884cabdff1aSopenharmony_ci ctx = ctx->thread[threadnr]; 885cabdff1aSopenharmony_ci init_put_bits(&ctx->m.pb, (uint8_t *)arg + ctx->data_offset + ctx->slice_offs[jobnr], 886cabdff1aSopenharmony_ci ctx->slice_size[jobnr]); 887cabdff1aSopenharmony_ci 888cabdff1aSopenharmony_ci ctx->m.last_dc[0] = 889cabdff1aSopenharmony_ci ctx->m.last_dc[1] = 890cabdff1aSopenharmony_ci ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2); 891cabdff1aSopenharmony_ci for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { 892cabdff1aSopenharmony_ci unsigned mb = mb_y * ctx->m.mb_width + mb_x; 893cabdff1aSopenharmony_ci int qscale = ctx->mb_qscale[mb]; 894cabdff1aSopenharmony_ci int i; 895cabdff1aSopenharmony_ci 896cabdff1aSopenharmony_ci put_bits(&ctx->m.pb, 11, qscale); 897cabdff1aSopenharmony_ci put_bits(&ctx->m.pb, 1, avctx->pix_fmt == AV_PIX_FMT_YUV444P10); 898cabdff1aSopenharmony_ci 899cabdff1aSopenharmony_ci dnxhd_get_blocks(ctx, mb_x, mb_y); 900cabdff1aSopenharmony_ci 901cabdff1aSopenharmony_ci for (i = 0; i < 8 + 4 * ctx->is_444; i++) { 902cabdff1aSopenharmony_ci int16_t *block = ctx->blocks[i]; 903cabdff1aSopenharmony_ci int overflow, n = dnxhd_switch_matrix(ctx, i); 904cabdff1aSopenharmony_ci int last_index = ctx->m.dct_quantize(&ctx->m, block, 905cabdff1aSopenharmony_ci ctx->is_444 ? (((i >> 1) % 3) < 1 ? 0 : 4): 4 & (2*i), 906cabdff1aSopenharmony_ci qscale, &overflow); 907cabdff1aSopenharmony_ci 908cabdff1aSopenharmony_ci dnxhd_encode_block(ctx, block, last_index, n); 909cabdff1aSopenharmony_ci } 910cabdff1aSopenharmony_ci } 911cabdff1aSopenharmony_ci if (put_bits_count(&ctx->m.pb) & 31) 912cabdff1aSopenharmony_ci put_bits(&ctx->m.pb, 32 - (put_bits_count(&ctx->m.pb) & 31), 0); 913cabdff1aSopenharmony_ci flush_put_bits(&ctx->m.pb); 914cabdff1aSopenharmony_ci memset(put_bits_ptr(&ctx->m.pb), 0, put_bytes_left(&ctx->m.pb, 0)); 915cabdff1aSopenharmony_ci return 0; 916cabdff1aSopenharmony_ci} 917cabdff1aSopenharmony_ci 918cabdff1aSopenharmony_cistatic void dnxhd_setup_threads_slices(DNXHDEncContext *ctx) 919cabdff1aSopenharmony_ci{ 920cabdff1aSopenharmony_ci int mb_y, mb_x; 921cabdff1aSopenharmony_ci int offset = 0; 922cabdff1aSopenharmony_ci for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) { 923cabdff1aSopenharmony_ci int thread_size; 924cabdff1aSopenharmony_ci ctx->slice_offs[mb_y] = offset; 925cabdff1aSopenharmony_ci ctx->slice_size[mb_y] = 0; 926cabdff1aSopenharmony_ci for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { 927cabdff1aSopenharmony_ci unsigned mb = mb_y * ctx->m.mb_width + mb_x; 928cabdff1aSopenharmony_ci ctx->slice_size[mb_y] += ctx->mb_bits[mb]; 929cabdff1aSopenharmony_ci } 930cabdff1aSopenharmony_ci ctx->slice_size[mb_y] = (ctx->slice_size[mb_y] + 31U) & ~31U; 931cabdff1aSopenharmony_ci ctx->slice_size[mb_y] >>= 3; 932cabdff1aSopenharmony_ci thread_size = ctx->slice_size[mb_y]; 933cabdff1aSopenharmony_ci offset += thread_size; 934cabdff1aSopenharmony_ci } 935cabdff1aSopenharmony_ci} 936cabdff1aSopenharmony_ci 937cabdff1aSopenharmony_cistatic int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, 938cabdff1aSopenharmony_ci int jobnr, int threadnr) 939cabdff1aSopenharmony_ci{ 940cabdff1aSopenharmony_ci DNXHDEncContext *ctx = avctx->priv_data; 941cabdff1aSopenharmony_ci int mb_y = jobnr, mb_x, x, y; 942cabdff1aSopenharmony_ci int partial_last_row = (mb_y == ctx->m.mb_height - 1) && 943cabdff1aSopenharmony_ci ((avctx->height >> ctx->interlaced) & 0xF); 944cabdff1aSopenharmony_ci 945cabdff1aSopenharmony_ci ctx = ctx->thread[threadnr]; 946cabdff1aSopenharmony_ci if (ctx->bit_depth == 8) { 947cabdff1aSopenharmony_ci uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize); 948cabdff1aSopenharmony_ci for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) { 949cabdff1aSopenharmony_ci unsigned mb = mb_y * ctx->m.mb_width + mb_x; 950cabdff1aSopenharmony_ci int sum; 951cabdff1aSopenharmony_ci int varc; 952cabdff1aSopenharmony_ci 953cabdff1aSopenharmony_ci if (!partial_last_row && mb_x * 16 <= avctx->width - 16 && (avctx->width % 16) == 0) { 954cabdff1aSopenharmony_ci sum = ctx->m.mpvencdsp.pix_sum(pix, ctx->m.linesize); 955cabdff1aSopenharmony_ci varc = ctx->m.mpvencdsp.pix_norm1(pix, ctx->m.linesize); 956cabdff1aSopenharmony_ci } else { 957cabdff1aSopenharmony_ci int bw = FFMIN(avctx->width - 16 * mb_x, 16); 958cabdff1aSopenharmony_ci int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16); 959cabdff1aSopenharmony_ci sum = varc = 0; 960cabdff1aSopenharmony_ci for (y = 0; y < bh; y++) { 961cabdff1aSopenharmony_ci for (x = 0; x < bw; x++) { 962cabdff1aSopenharmony_ci uint8_t val = pix[x + y * ctx->m.linesize]; 963cabdff1aSopenharmony_ci sum += val; 964cabdff1aSopenharmony_ci varc += val * val; 965cabdff1aSopenharmony_ci } 966cabdff1aSopenharmony_ci } 967cabdff1aSopenharmony_ci } 968cabdff1aSopenharmony_ci varc = (varc - (((unsigned) sum * sum) >> 8) + 128) >> 8; 969cabdff1aSopenharmony_ci 970cabdff1aSopenharmony_ci ctx->mb_cmp[mb].value = varc; 971cabdff1aSopenharmony_ci ctx->mb_cmp[mb].mb = mb; 972cabdff1aSopenharmony_ci } 973cabdff1aSopenharmony_ci } else { // 10-bit 974cabdff1aSopenharmony_ci const int linesize = ctx->m.linesize >> 1; 975cabdff1aSopenharmony_ci for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) { 976cabdff1aSopenharmony_ci uint16_t *pix = (uint16_t *)ctx->thread[0]->src[0] + 977cabdff1aSopenharmony_ci ((mb_y << 4) * linesize) + (mb_x << 4); 978cabdff1aSopenharmony_ci unsigned mb = mb_y * ctx->m.mb_width + mb_x; 979cabdff1aSopenharmony_ci int sum = 0; 980cabdff1aSopenharmony_ci int sqsum = 0; 981cabdff1aSopenharmony_ci int bw = FFMIN(avctx->width - 16 * mb_x, 16); 982cabdff1aSopenharmony_ci int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16); 983cabdff1aSopenharmony_ci int mean, sqmean; 984cabdff1aSopenharmony_ci int i, j; 985cabdff1aSopenharmony_ci // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8. 986cabdff1aSopenharmony_ci for (i = 0; i < bh; ++i) { 987cabdff1aSopenharmony_ci for (j = 0; j < bw; ++j) { 988cabdff1aSopenharmony_ci // Turn 16-bit pixels into 10-bit ones. 989cabdff1aSopenharmony_ci const int sample = (unsigned) pix[j] >> 6; 990cabdff1aSopenharmony_ci sum += sample; 991cabdff1aSopenharmony_ci sqsum += sample * sample; 992cabdff1aSopenharmony_ci // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX 993cabdff1aSopenharmony_ci } 994cabdff1aSopenharmony_ci pix += linesize; 995cabdff1aSopenharmony_ci } 996cabdff1aSopenharmony_ci mean = sum >> 8; // 16*16 == 2^8 997cabdff1aSopenharmony_ci sqmean = sqsum >> 8; 998cabdff1aSopenharmony_ci ctx->mb_cmp[mb].value = sqmean - mean * mean; 999cabdff1aSopenharmony_ci ctx->mb_cmp[mb].mb = mb; 1000cabdff1aSopenharmony_ci } 1001cabdff1aSopenharmony_ci } 1002cabdff1aSopenharmony_ci return 0; 1003cabdff1aSopenharmony_ci} 1004cabdff1aSopenharmony_ci 1005cabdff1aSopenharmony_cistatic int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx) 1006cabdff1aSopenharmony_ci{ 1007cabdff1aSopenharmony_ci int lambda, up_step, down_step; 1008cabdff1aSopenharmony_ci int last_lower = INT_MAX, last_higher = 0; 1009cabdff1aSopenharmony_ci int x, y, q; 1010cabdff1aSopenharmony_ci 1011cabdff1aSopenharmony_ci for (q = 1; q < avctx->qmax; q++) { 1012cabdff1aSopenharmony_ci ctx->qscale = q; 1013cabdff1aSopenharmony_ci avctx->execute2(avctx, dnxhd_calc_bits_thread, 1014cabdff1aSopenharmony_ci NULL, NULL, ctx->m.mb_height); 1015cabdff1aSopenharmony_ci } 1016cabdff1aSopenharmony_ci up_step = down_step = 2 << LAMBDA_FRAC_BITS; 1017cabdff1aSopenharmony_ci lambda = ctx->lambda; 1018cabdff1aSopenharmony_ci 1019cabdff1aSopenharmony_ci for (;;) { 1020cabdff1aSopenharmony_ci int bits = 0; 1021cabdff1aSopenharmony_ci int end = 0; 1022cabdff1aSopenharmony_ci if (lambda == last_higher) { 1023cabdff1aSopenharmony_ci lambda++; 1024cabdff1aSopenharmony_ci end = 1; // need to set final qscales/bits 1025cabdff1aSopenharmony_ci } 1026cabdff1aSopenharmony_ci for (y = 0; y < ctx->m.mb_height; y++) { 1027cabdff1aSopenharmony_ci for (x = 0; x < ctx->m.mb_width; x++) { 1028cabdff1aSopenharmony_ci unsigned min = UINT_MAX; 1029cabdff1aSopenharmony_ci int qscale = 1; 1030cabdff1aSopenharmony_ci int mb = y * ctx->m.mb_width + x; 1031cabdff1aSopenharmony_ci int rc = 0; 1032cabdff1aSopenharmony_ci for (q = 1; q < avctx->qmax; q++) { 1033cabdff1aSopenharmony_ci int i = (q*ctx->m.mb_num) + mb; 1034cabdff1aSopenharmony_ci unsigned score = ctx->mb_rc[i].bits * lambda + 1035cabdff1aSopenharmony_ci ((unsigned) ctx->mb_rc[i].ssd << LAMBDA_FRAC_BITS); 1036cabdff1aSopenharmony_ci if (score < min) { 1037cabdff1aSopenharmony_ci min = score; 1038cabdff1aSopenharmony_ci qscale = q; 1039cabdff1aSopenharmony_ci rc = i; 1040cabdff1aSopenharmony_ci } 1041cabdff1aSopenharmony_ci } 1042cabdff1aSopenharmony_ci bits += ctx->mb_rc[rc].bits; 1043cabdff1aSopenharmony_ci ctx->mb_qscale[mb] = qscale; 1044cabdff1aSopenharmony_ci ctx->mb_bits[mb] = ctx->mb_rc[rc].bits; 1045cabdff1aSopenharmony_ci } 1046cabdff1aSopenharmony_ci bits = (bits + 31) & ~31; // padding 1047cabdff1aSopenharmony_ci if (bits > ctx->frame_bits) 1048cabdff1aSopenharmony_ci break; 1049cabdff1aSopenharmony_ci } 1050cabdff1aSopenharmony_ci if (end) { 1051cabdff1aSopenharmony_ci if (bits > ctx->frame_bits) 1052cabdff1aSopenharmony_ci return AVERROR(EINVAL); 1053cabdff1aSopenharmony_ci break; 1054cabdff1aSopenharmony_ci } 1055cabdff1aSopenharmony_ci if (bits < ctx->frame_bits) { 1056cabdff1aSopenharmony_ci last_lower = FFMIN(lambda, last_lower); 1057cabdff1aSopenharmony_ci if (last_higher != 0) 1058cabdff1aSopenharmony_ci lambda = (lambda+last_higher)>>1; 1059cabdff1aSopenharmony_ci else 1060cabdff1aSopenharmony_ci lambda -= down_step; 1061cabdff1aSopenharmony_ci down_step = FFMIN((int64_t)down_step*5, INT_MAX); 1062cabdff1aSopenharmony_ci up_step = 1<<LAMBDA_FRAC_BITS; 1063cabdff1aSopenharmony_ci lambda = FFMAX(1, lambda); 1064cabdff1aSopenharmony_ci if (lambda == last_lower) 1065cabdff1aSopenharmony_ci break; 1066cabdff1aSopenharmony_ci } else { 1067cabdff1aSopenharmony_ci last_higher = FFMAX(lambda, last_higher); 1068cabdff1aSopenharmony_ci if (last_lower != INT_MAX) 1069cabdff1aSopenharmony_ci lambda = (lambda+last_lower)>>1; 1070cabdff1aSopenharmony_ci else if ((int64_t)lambda + up_step > INT_MAX) 1071cabdff1aSopenharmony_ci return AVERROR(EINVAL); 1072cabdff1aSopenharmony_ci else 1073cabdff1aSopenharmony_ci lambda += up_step; 1074cabdff1aSopenharmony_ci up_step = FFMIN((int64_t)up_step*5, INT_MAX); 1075cabdff1aSopenharmony_ci down_step = 1<<LAMBDA_FRAC_BITS; 1076cabdff1aSopenharmony_ci } 1077cabdff1aSopenharmony_ci } 1078cabdff1aSopenharmony_ci ctx->lambda = lambda; 1079cabdff1aSopenharmony_ci return 0; 1080cabdff1aSopenharmony_ci} 1081cabdff1aSopenharmony_ci 1082cabdff1aSopenharmony_cistatic int dnxhd_find_qscale(DNXHDEncContext *ctx) 1083cabdff1aSopenharmony_ci{ 1084cabdff1aSopenharmony_ci int bits = 0; 1085cabdff1aSopenharmony_ci int up_step = 1; 1086cabdff1aSopenharmony_ci int down_step = 1; 1087cabdff1aSopenharmony_ci int last_higher = 0; 1088cabdff1aSopenharmony_ci int last_lower = INT_MAX; 1089cabdff1aSopenharmony_ci int qscale; 1090cabdff1aSopenharmony_ci int x, y; 1091cabdff1aSopenharmony_ci 1092cabdff1aSopenharmony_ci qscale = ctx->qscale; 1093cabdff1aSopenharmony_ci for (;;) { 1094cabdff1aSopenharmony_ci bits = 0; 1095cabdff1aSopenharmony_ci ctx->qscale = qscale; 1096cabdff1aSopenharmony_ci // XXX avoid recalculating bits 1097cabdff1aSopenharmony_ci ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread, 1098cabdff1aSopenharmony_ci NULL, NULL, ctx->m.mb_height); 1099cabdff1aSopenharmony_ci for (y = 0; y < ctx->m.mb_height; y++) { 1100cabdff1aSopenharmony_ci for (x = 0; x < ctx->m.mb_width; x++) 1101cabdff1aSopenharmony_ci bits += ctx->mb_rc[(qscale*ctx->m.mb_num) + (y*ctx->m.mb_width+x)].bits; 1102cabdff1aSopenharmony_ci bits = (bits+31)&~31; // padding 1103cabdff1aSopenharmony_ci if (bits > ctx->frame_bits) 1104cabdff1aSopenharmony_ci break; 1105cabdff1aSopenharmony_ci } 1106cabdff1aSopenharmony_ci if (bits < ctx->frame_bits) { 1107cabdff1aSopenharmony_ci if (qscale == 1) 1108cabdff1aSopenharmony_ci return 1; 1109cabdff1aSopenharmony_ci if (last_higher == qscale - 1) { 1110cabdff1aSopenharmony_ci qscale = last_higher; 1111cabdff1aSopenharmony_ci break; 1112cabdff1aSopenharmony_ci } 1113cabdff1aSopenharmony_ci last_lower = FFMIN(qscale, last_lower); 1114cabdff1aSopenharmony_ci if (last_higher != 0) 1115cabdff1aSopenharmony_ci qscale = (qscale + last_higher) >> 1; 1116cabdff1aSopenharmony_ci else 1117cabdff1aSopenharmony_ci qscale -= down_step++; 1118cabdff1aSopenharmony_ci if (qscale < 1) 1119cabdff1aSopenharmony_ci qscale = 1; 1120cabdff1aSopenharmony_ci up_step = 1; 1121cabdff1aSopenharmony_ci } else { 1122cabdff1aSopenharmony_ci if (last_lower == qscale + 1) 1123cabdff1aSopenharmony_ci break; 1124cabdff1aSopenharmony_ci last_higher = FFMAX(qscale, last_higher); 1125cabdff1aSopenharmony_ci if (last_lower != INT_MAX) 1126cabdff1aSopenharmony_ci qscale = (qscale + last_lower) >> 1; 1127cabdff1aSopenharmony_ci else 1128cabdff1aSopenharmony_ci qscale += up_step++; 1129cabdff1aSopenharmony_ci down_step = 1; 1130cabdff1aSopenharmony_ci if (qscale >= ctx->m.avctx->qmax) 1131cabdff1aSopenharmony_ci return AVERROR(EINVAL); 1132cabdff1aSopenharmony_ci } 1133cabdff1aSopenharmony_ci } 1134cabdff1aSopenharmony_ci ctx->qscale = qscale; 1135cabdff1aSopenharmony_ci return 0; 1136cabdff1aSopenharmony_ci} 1137cabdff1aSopenharmony_ci 1138cabdff1aSopenharmony_ci#define BUCKET_BITS 8 1139cabdff1aSopenharmony_ci#define RADIX_PASSES 4 1140cabdff1aSopenharmony_ci#define NBUCKETS (1 << BUCKET_BITS) 1141cabdff1aSopenharmony_ci 1142cabdff1aSopenharmony_cistatic inline int get_bucket(int value, int shift) 1143cabdff1aSopenharmony_ci{ 1144cabdff1aSopenharmony_ci value >>= shift; 1145cabdff1aSopenharmony_ci value &= NBUCKETS - 1; 1146cabdff1aSopenharmony_ci return NBUCKETS - 1 - value; 1147cabdff1aSopenharmony_ci} 1148cabdff1aSopenharmony_ci 1149cabdff1aSopenharmony_cistatic void radix_count(const RCCMPEntry *data, int size, 1150cabdff1aSopenharmony_ci int buckets[RADIX_PASSES][NBUCKETS]) 1151cabdff1aSopenharmony_ci{ 1152cabdff1aSopenharmony_ci int i, j; 1153cabdff1aSopenharmony_ci memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS); 1154cabdff1aSopenharmony_ci for (i = 0; i < size; i++) { 1155cabdff1aSopenharmony_ci int v = data[i].value; 1156cabdff1aSopenharmony_ci for (j = 0; j < RADIX_PASSES; j++) { 1157cabdff1aSopenharmony_ci buckets[j][get_bucket(v, 0)]++; 1158cabdff1aSopenharmony_ci v >>= BUCKET_BITS; 1159cabdff1aSopenharmony_ci } 1160cabdff1aSopenharmony_ci av_assert1(!v); 1161cabdff1aSopenharmony_ci } 1162cabdff1aSopenharmony_ci for (j = 0; j < RADIX_PASSES; j++) { 1163cabdff1aSopenharmony_ci int offset = size; 1164cabdff1aSopenharmony_ci for (i = NBUCKETS - 1; i >= 0; i--) 1165cabdff1aSopenharmony_ci buckets[j][i] = offset -= buckets[j][i]; 1166cabdff1aSopenharmony_ci av_assert1(!buckets[j][0]); 1167cabdff1aSopenharmony_ci } 1168cabdff1aSopenharmony_ci} 1169cabdff1aSopenharmony_ci 1170cabdff1aSopenharmony_cistatic void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data, 1171cabdff1aSopenharmony_ci int size, int buckets[NBUCKETS], int pass) 1172cabdff1aSopenharmony_ci{ 1173cabdff1aSopenharmony_ci int shift = pass * BUCKET_BITS; 1174cabdff1aSopenharmony_ci int i; 1175cabdff1aSopenharmony_ci for (i = 0; i < size; i++) { 1176cabdff1aSopenharmony_ci int v = get_bucket(data[i].value, shift); 1177cabdff1aSopenharmony_ci int pos = buckets[v]++; 1178cabdff1aSopenharmony_ci dst[pos] = data[i]; 1179cabdff1aSopenharmony_ci } 1180cabdff1aSopenharmony_ci} 1181cabdff1aSopenharmony_ci 1182cabdff1aSopenharmony_cistatic void radix_sort(RCCMPEntry *data, RCCMPEntry *tmp, int size) 1183cabdff1aSopenharmony_ci{ 1184cabdff1aSopenharmony_ci int buckets[RADIX_PASSES][NBUCKETS]; 1185cabdff1aSopenharmony_ci radix_count(data, size, buckets); 1186cabdff1aSopenharmony_ci radix_sort_pass(tmp, data, size, buckets[0], 0); 1187cabdff1aSopenharmony_ci radix_sort_pass(data, tmp, size, buckets[1], 1); 1188cabdff1aSopenharmony_ci if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) { 1189cabdff1aSopenharmony_ci radix_sort_pass(tmp, data, size, buckets[2], 2); 1190cabdff1aSopenharmony_ci radix_sort_pass(data, tmp, size, buckets[3], 3); 1191cabdff1aSopenharmony_ci } 1192cabdff1aSopenharmony_ci} 1193cabdff1aSopenharmony_ci 1194cabdff1aSopenharmony_cistatic int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx) 1195cabdff1aSopenharmony_ci{ 1196cabdff1aSopenharmony_ci int max_bits = 0; 1197cabdff1aSopenharmony_ci int ret, x, y; 1198cabdff1aSopenharmony_ci if ((ret = dnxhd_find_qscale(ctx)) < 0) 1199cabdff1aSopenharmony_ci return ret; 1200cabdff1aSopenharmony_ci for (y = 0; y < ctx->m.mb_height; y++) { 1201cabdff1aSopenharmony_ci for (x = 0; x < ctx->m.mb_width; x++) { 1202cabdff1aSopenharmony_ci int mb = y * ctx->m.mb_width + x; 1203cabdff1aSopenharmony_ci int rc = (ctx->qscale * ctx->m.mb_num ) + mb; 1204cabdff1aSopenharmony_ci int delta_bits; 1205cabdff1aSopenharmony_ci ctx->mb_qscale[mb] = ctx->qscale; 1206cabdff1aSopenharmony_ci ctx->mb_bits[mb] = ctx->mb_rc[rc].bits; 1207cabdff1aSopenharmony_ci max_bits += ctx->mb_rc[rc].bits; 1208cabdff1aSopenharmony_ci if (!RC_VARIANCE) { 1209cabdff1aSopenharmony_ci delta_bits = ctx->mb_rc[rc].bits - 1210cabdff1aSopenharmony_ci ctx->mb_rc[rc + ctx->m.mb_num].bits; 1211cabdff1aSopenharmony_ci ctx->mb_cmp[mb].mb = mb; 1212cabdff1aSopenharmony_ci ctx->mb_cmp[mb].value = 1213cabdff1aSopenharmony_ci delta_bits ? ((ctx->mb_rc[rc].ssd - 1214cabdff1aSopenharmony_ci ctx->mb_rc[rc + ctx->m.mb_num].ssd) * 100) / 1215cabdff1aSopenharmony_ci delta_bits 1216cabdff1aSopenharmony_ci : INT_MIN; // avoid increasing qscale 1217cabdff1aSopenharmony_ci } 1218cabdff1aSopenharmony_ci } 1219cabdff1aSopenharmony_ci max_bits += 31; // worst padding 1220cabdff1aSopenharmony_ci } 1221cabdff1aSopenharmony_ci if (!ret) { 1222cabdff1aSopenharmony_ci if (RC_VARIANCE) 1223cabdff1aSopenharmony_ci avctx->execute2(avctx, dnxhd_mb_var_thread, 1224cabdff1aSopenharmony_ci NULL, NULL, ctx->m.mb_height); 1225cabdff1aSopenharmony_ci radix_sort(ctx->mb_cmp, ctx->mb_cmp_tmp, ctx->m.mb_num); 1226cabdff1aSopenharmony_ciretry: 1227cabdff1aSopenharmony_ci for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) { 1228cabdff1aSopenharmony_ci int mb = ctx->mb_cmp[x].mb; 1229cabdff1aSopenharmony_ci int rc = (ctx->qscale * ctx->m.mb_num ) + mb; 1230cabdff1aSopenharmony_ci max_bits -= ctx->mb_rc[rc].bits - 1231cabdff1aSopenharmony_ci ctx->mb_rc[rc + ctx->m.mb_num].bits; 1232cabdff1aSopenharmony_ci if (ctx->mb_qscale[mb] < 255) 1233cabdff1aSopenharmony_ci ctx->mb_qscale[mb]++; 1234cabdff1aSopenharmony_ci ctx->mb_bits[mb] = ctx->mb_rc[rc + ctx->m.mb_num].bits; 1235cabdff1aSopenharmony_ci } 1236cabdff1aSopenharmony_ci 1237cabdff1aSopenharmony_ci if (max_bits > ctx->frame_bits) 1238cabdff1aSopenharmony_ci goto retry; 1239cabdff1aSopenharmony_ci } 1240cabdff1aSopenharmony_ci return 0; 1241cabdff1aSopenharmony_ci} 1242cabdff1aSopenharmony_ci 1243cabdff1aSopenharmony_cistatic void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame) 1244cabdff1aSopenharmony_ci{ 1245cabdff1aSopenharmony_ci int i; 1246cabdff1aSopenharmony_ci 1247cabdff1aSopenharmony_ci for (i = 0; i < ctx->m.avctx->thread_count; i++) { 1248cabdff1aSopenharmony_ci ctx->thread[i]->m.linesize = frame->linesize[0] << ctx->interlaced; 1249cabdff1aSopenharmony_ci ctx->thread[i]->m.uvlinesize = frame->linesize[1] << ctx->interlaced; 1250cabdff1aSopenharmony_ci ctx->thread[i]->dct_y_offset = ctx->m.linesize *8; 1251cabdff1aSopenharmony_ci ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8; 1252cabdff1aSopenharmony_ci } 1253cabdff1aSopenharmony_ci 1254cabdff1aSopenharmony_ci ctx->cur_field = frame->interlaced_frame && !frame->top_field_first; 1255cabdff1aSopenharmony_ci} 1256cabdff1aSopenharmony_ci 1257cabdff1aSopenharmony_cistatic int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt, 1258cabdff1aSopenharmony_ci const AVFrame *frame, int *got_packet) 1259cabdff1aSopenharmony_ci{ 1260cabdff1aSopenharmony_ci DNXHDEncContext *ctx = avctx->priv_data; 1261cabdff1aSopenharmony_ci int first_field = 1; 1262cabdff1aSopenharmony_ci int offset, i, ret; 1263cabdff1aSopenharmony_ci uint8_t *buf; 1264cabdff1aSopenharmony_ci 1265cabdff1aSopenharmony_ci if ((ret = ff_get_encode_buffer(avctx, pkt, ctx->frame_size, 0)) < 0) 1266cabdff1aSopenharmony_ci return ret; 1267cabdff1aSopenharmony_ci buf = pkt->data; 1268cabdff1aSopenharmony_ci 1269cabdff1aSopenharmony_ci dnxhd_load_picture(ctx, frame); 1270cabdff1aSopenharmony_ci 1271cabdff1aSopenharmony_ciencode_coding_unit: 1272cabdff1aSopenharmony_ci for (i = 0; i < 3; i++) { 1273cabdff1aSopenharmony_ci ctx->src[i] = frame->data[i]; 1274cabdff1aSopenharmony_ci if (ctx->interlaced && ctx->cur_field) 1275cabdff1aSopenharmony_ci ctx->src[i] += frame->linesize[i]; 1276cabdff1aSopenharmony_ci } 1277cabdff1aSopenharmony_ci 1278cabdff1aSopenharmony_ci dnxhd_write_header(avctx, buf); 1279cabdff1aSopenharmony_ci 1280cabdff1aSopenharmony_ci if (avctx->mb_decision == FF_MB_DECISION_RD) 1281cabdff1aSopenharmony_ci ret = dnxhd_encode_rdo(avctx, ctx); 1282cabdff1aSopenharmony_ci else 1283cabdff1aSopenharmony_ci ret = dnxhd_encode_fast(avctx, ctx); 1284cabdff1aSopenharmony_ci if (ret < 0) { 1285cabdff1aSopenharmony_ci av_log(avctx, AV_LOG_ERROR, 1286cabdff1aSopenharmony_ci "picture could not fit ratecontrol constraints, increase qmax\n"); 1287cabdff1aSopenharmony_ci return ret; 1288cabdff1aSopenharmony_ci } 1289cabdff1aSopenharmony_ci 1290cabdff1aSopenharmony_ci dnxhd_setup_threads_slices(ctx); 1291cabdff1aSopenharmony_ci 1292cabdff1aSopenharmony_ci offset = 0; 1293cabdff1aSopenharmony_ci for (i = 0; i < ctx->m.mb_height; i++) { 1294cabdff1aSopenharmony_ci AV_WB32(ctx->msip + i * 4, offset); 1295cabdff1aSopenharmony_ci offset += ctx->slice_size[i]; 1296cabdff1aSopenharmony_ci av_assert1(!(ctx->slice_size[i] & 3)); 1297cabdff1aSopenharmony_ci } 1298cabdff1aSopenharmony_ci 1299cabdff1aSopenharmony_ci avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height); 1300cabdff1aSopenharmony_ci 1301cabdff1aSopenharmony_ci av_assert1(ctx->data_offset + offset + 4 <= ctx->coding_unit_size); 1302cabdff1aSopenharmony_ci memset(buf + ctx->data_offset + offset, 0, 1303cabdff1aSopenharmony_ci ctx->coding_unit_size - 4 - offset - ctx->data_offset); 1304cabdff1aSopenharmony_ci 1305cabdff1aSopenharmony_ci AV_WB32(buf + ctx->coding_unit_size - 4, 0x600DC0DE); // EOF 1306cabdff1aSopenharmony_ci 1307cabdff1aSopenharmony_ci if (ctx->interlaced && first_field) { 1308cabdff1aSopenharmony_ci first_field = 0; 1309cabdff1aSopenharmony_ci ctx->cur_field ^= 1; 1310cabdff1aSopenharmony_ci buf += ctx->coding_unit_size; 1311cabdff1aSopenharmony_ci goto encode_coding_unit; 1312cabdff1aSopenharmony_ci } 1313cabdff1aSopenharmony_ci 1314cabdff1aSopenharmony_ci ff_side_data_set_encoder_stats(pkt, ctx->qscale * FF_QP2LAMBDA, NULL, 0, AV_PICTURE_TYPE_I); 1315cabdff1aSopenharmony_ci 1316cabdff1aSopenharmony_ci *got_packet = 1; 1317cabdff1aSopenharmony_ci return 0; 1318cabdff1aSopenharmony_ci} 1319cabdff1aSopenharmony_ci 1320cabdff1aSopenharmony_cistatic av_cold int dnxhd_encode_end(AVCodecContext *avctx) 1321cabdff1aSopenharmony_ci{ 1322cabdff1aSopenharmony_ci DNXHDEncContext *ctx = avctx->priv_data; 1323cabdff1aSopenharmony_ci int i; 1324cabdff1aSopenharmony_ci 1325cabdff1aSopenharmony_ci av_freep(&ctx->orig_vlc_codes); 1326cabdff1aSopenharmony_ci av_freep(&ctx->orig_vlc_bits); 1327cabdff1aSopenharmony_ci av_freep(&ctx->run_codes); 1328cabdff1aSopenharmony_ci av_freep(&ctx->run_bits); 1329cabdff1aSopenharmony_ci 1330cabdff1aSopenharmony_ci av_freep(&ctx->mb_bits); 1331cabdff1aSopenharmony_ci av_freep(&ctx->mb_qscale); 1332cabdff1aSopenharmony_ci av_freep(&ctx->mb_rc); 1333cabdff1aSopenharmony_ci av_freep(&ctx->mb_cmp); 1334cabdff1aSopenharmony_ci av_freep(&ctx->mb_cmp_tmp); 1335cabdff1aSopenharmony_ci av_freep(&ctx->slice_size); 1336cabdff1aSopenharmony_ci av_freep(&ctx->slice_offs); 1337cabdff1aSopenharmony_ci 1338cabdff1aSopenharmony_ci av_freep(&ctx->qmatrix_c); 1339cabdff1aSopenharmony_ci av_freep(&ctx->qmatrix_l); 1340cabdff1aSopenharmony_ci av_freep(&ctx->qmatrix_c16); 1341cabdff1aSopenharmony_ci av_freep(&ctx->qmatrix_l16); 1342cabdff1aSopenharmony_ci 1343cabdff1aSopenharmony_ci if (ctx->thread[1]) { 1344cabdff1aSopenharmony_ci for (i = 1; i < avctx->thread_count; i++) 1345cabdff1aSopenharmony_ci av_freep(&ctx->thread[i]); 1346cabdff1aSopenharmony_ci } 1347cabdff1aSopenharmony_ci 1348cabdff1aSopenharmony_ci return 0; 1349cabdff1aSopenharmony_ci} 1350cabdff1aSopenharmony_ci 1351cabdff1aSopenharmony_cistatic const FFCodecDefault dnxhd_defaults[] = { 1352cabdff1aSopenharmony_ci { "qmax", "1024" }, /* Maximum quantization scale factor allowed for VC-3 */ 1353cabdff1aSopenharmony_ci { NULL }, 1354cabdff1aSopenharmony_ci}; 1355cabdff1aSopenharmony_ci 1356cabdff1aSopenharmony_ciconst FFCodec ff_dnxhd_encoder = { 1357cabdff1aSopenharmony_ci .p.name = "dnxhd", 1358cabdff1aSopenharmony_ci .p.long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"), 1359cabdff1aSopenharmony_ci .p.type = AVMEDIA_TYPE_VIDEO, 1360cabdff1aSopenharmony_ci .p.id = AV_CODEC_ID_DNXHD, 1361cabdff1aSopenharmony_ci .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | 1362cabdff1aSopenharmony_ci AV_CODEC_CAP_SLICE_THREADS, 1363cabdff1aSopenharmony_ci .priv_data_size = sizeof(DNXHDEncContext), 1364cabdff1aSopenharmony_ci .init = dnxhd_encode_init, 1365cabdff1aSopenharmony_ci FF_CODEC_ENCODE_CB(dnxhd_encode_picture), 1366cabdff1aSopenharmony_ci .close = dnxhd_encode_end, 1367cabdff1aSopenharmony_ci .p.pix_fmts = (const enum AVPixelFormat[]) { 1368cabdff1aSopenharmony_ci AV_PIX_FMT_YUV422P, 1369cabdff1aSopenharmony_ci AV_PIX_FMT_YUV422P10, 1370cabdff1aSopenharmony_ci AV_PIX_FMT_YUV444P10, 1371cabdff1aSopenharmony_ci AV_PIX_FMT_GBRP10, 1372cabdff1aSopenharmony_ci AV_PIX_FMT_NONE 1373cabdff1aSopenharmony_ci }, 1374cabdff1aSopenharmony_ci .p.priv_class = &dnxhd_class, 1375cabdff1aSopenharmony_ci .defaults = dnxhd_defaults, 1376cabdff1aSopenharmony_ci .p.profiles = NULL_IF_CONFIG_SMALL(ff_dnxhd_profiles), 1377cabdff1aSopenharmony_ci .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP, 1378cabdff1aSopenharmony_ci}; 1379