Lines Matching refs:ctx

119 static int dnxhd_10bit_dct_quantize_444(MpegEncContext *ctx, int16_t *block,
124 const uint8_t *scantable= ctx->intra_scantable.scantable;
129 ctx->fdsp.fdct(block);
134 qmat = n < 4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale];
135 bias= ctx->intra_quant_bias * (1 << (16 - 8));
168 *overflow = ctx->max_qcoeff < max; //overflow might have happened
171 if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE)
172 ff_block_permute(block, ctx->idsp.idct_permutation,
178 static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
181 const uint8_t *scantable= ctx->intra_scantable.scantable;
182 const int *qmat = n<4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale];
186 ctx->fdsp.fdct(block);
202 if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE)
203 ff_block_permute(block, ctx->idsp.idct_permutation,
209 static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx)
212 int max_level = 1 << (ctx->bit_depth + 2);
214 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->orig_vlc_codes, max_level * 4) ||
215 !FF_ALLOCZ_TYPED_ARRAY(ctx->orig_vlc_bits, max_level * 4) ||
216 !(ctx->run_codes = av_mallocz(63 * 2)) ||
217 !(ctx->run_bits = av_mallocz(63)))
219 ctx->vlc_codes = ctx->orig_vlc_codes + max_level * 2;
220 ctx->vlc_bits = ctx->orig_vlc_bits + max_level * 2;
232 if (ctx->cid_table->ac_info[2*j+0] >> 1 == alevel &&
233 (!offset || (ctx->cid_table->ac_info[2*j+1] & 1) && offset) &&
234 (!run || (ctx->cid_table->ac_info[2*j+1] & 2) && run)) {
235 av_assert1(!ctx->vlc_codes[index]);
237 ctx->vlc_codes[index] =
238 (ctx->cid_table->ac_codes[j] << 1) | (sign & 1);
239 ctx->vlc_bits[index] = ctx->cid_table->ac_bits[j] + 1;
241 ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j];
242 ctx->vlc_bits[index] = ctx->cid_table->ac_bits[j];
249 ctx->vlc_codes[index] =
250 (ctx->vlc_codes[index] << ctx->cid_table->index_bits) | offset;
251 ctx->vlc_bits[index] += ctx->cid_table->index_bits;
256 int run = ctx->cid_table->run[i];
258 ctx->run_codes[run] = ctx->cid_table->run_codes[i];
259 ctx->run_bits[run] = ctx->cid_table->run_bits[i];
264 static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
269 const uint8_t *luma_weight_table = ctx->cid_table->luma_weight;
270 const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight;
272 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_l, ctx->m.avctx->qmax + 1) ||
273 !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_c, ctx->m.avctx->qmax + 1) ||
274 !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_l16, ctx->m.avctx->qmax + 1) ||
275 !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_c16, ctx->m.avctx->qmax + 1))
278 if (ctx->bit_depth == 8) {
280 int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
281 weight_matrix[j] = ctx->cid_table->luma_weight[i];
283 ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16,
284 weight_matrix, ctx->intra_quant_bias, 1,
285 ctx->m.avctx->qmax, 1);
287 int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
288 weight_matrix[j] = ctx->cid_table->chroma_weight[i];
290 ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16,
291 weight_matrix, ctx->intra_quant_bias, 1,
292 ctx->m.avctx->qmax, 1);
294 for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
296 ctx->qmatrix_l[qscale][i] <<= 2;
297 ctx->qmatrix_c[qscale][i] <<= 2;
298 ctx->qmatrix_l16[qscale][0][i] <<= 2;
299 ctx->qmatrix_l16[qscale][1][i] <<= 2;
300 ctx->qmatrix_c16[qscale][0][i] <<= 2;
301 ctx->qmatrix_c16[qscale][1][i] <<= 2;
306 for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
317 * We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be:
321 ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) /
323 ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) /
329 ctx->m.q_chroma_intra_matrix16 = ctx->qmatrix_c16;
330 ctx->m.q_chroma_intra_matrix = ctx->qmatrix_c;
331 ctx->m.q_intra_matrix16 = ctx->qmatrix_l16;
332 ctx->m.q_intra_matrix = ctx->qmatrix_l;
337 static av_cold int dnxhd_init_rc(DNXHDEncContext *ctx)
339 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->mb_rc, (ctx->m.avctx->qmax + 1) * ctx->m.mb_num))
342 if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD) {
343 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->mb_cmp, ctx->m.mb_num) ||
344 !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_cmp_tmp, ctx->m.mb_num))
347 ctx->frame_bits = (ctx->coding_unit_size -
348 ctx->data_offset - 4 - ctx->min_padding) * 8;
349 ctx->qscale = 1;
350 ctx->lambda = 2 << LAMBDA_FRAC_BITS; // qscale 2
356 DNXHDEncContext *ctx = avctx->priv_data;
361 ctx->bit_depth = 8;
366 ctx->bit_depth = 10;
370 if ((ctx->profile == FF_PROFILE_DNXHR_444 && (avctx->pix_fmt != AV_PIX_FMT_YUV444P10 &&
372 (ctx->profile != FF_PROFILE_DNXHR_444 && (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 ||
379 if (ctx->profile == FF_PROFILE_DNXHR_HQX && avctx->pix_fmt != AV_PIX_FMT_YUV422P10) {
385 if ((ctx->profile == FF_PROFILE_DNXHR_LB ||
386 ctx->profile == FF_PROFILE_DNXHR_SQ ||
387 ctx->profile == FF_PROFILE_DNXHR_HQ) && avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
393 ctx->is_444 = ctx->profile == FF_PROFILE_DNXHR_444;
394 avctx->profile = ctx->profile;
395 ctx->cid = ff_dnxhd_find_cid(avctx, ctx->bit_depth);
396 if (!ctx->cid) {
402 av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
404 if (ctx->cid >= 1270 && ctx->cid <= 1274)
413 ctx->cid_table = ff_dnxhd_get_cid_table(ctx->cid);
414 av_assert0(ctx->cid_table);
416 ctx->m.avctx = avctx;
417 ctx->m.mb_intra = 1;
418 ctx->m.h263_aic = 1;
420 avctx->bits_per_raw_sample = ctx->bit_depth;
422 ff_blockdsp_init(&ctx->bdsp, avctx);
423 ff_fdctdsp_init(&ctx->m.fdsp, avctx);
424 ff_mpv_idct_init(&ctx->m);
425 ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
426 ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
427 ff_dct_encode_init(&ctx->m);
429 if (ctx->profile != FF_PROFILE_DNXHD)
430 ff_videodsp_init(&ctx->m.vdsp, ctx->bit_depth);
432 if (!ctx->m.dct_quantize)
433 ctx->m.dct_quantize = ff_dct_quantize_c;
435 if (ctx->is_444 || ctx->profile == FF_PROFILE_DNXHR_HQX) {
436 ctx->m.dct_quantize = dnxhd_10bit_dct_quantize_444;
437 ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
438 ctx->block_width_l2 = 4;
439 } else if (ctx->bit_depth == 10) {
440 ctx->m.dct_quantize = dnxhd_10bit_dct_quantize;
441 ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
442 ctx->block_width_l2 = 4;
444 ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym;
445 ctx->block_width_l2 = 3;
449 ff_dnxhdenc_init_x86(ctx);
452 ctx->m.mb_height = (avctx->height + 15) / 16;
453 ctx->m.mb_width = (avctx->width + 15) / 16;
456 ctx->interlaced = 1;
457 ctx->m.mb_height /= 2;
460 if (ctx->interlaced && ctx->profile != FF_PROFILE_DNXHD) {
466 ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
468 if (ctx->cid_table->frame_size == DNXHD_VARIABLE) {
469 ctx->frame_size = ff_dnxhd_get_hr_frame_size(ctx->cid,
471 av_assert0(ctx->frame_size >= 0);
472 ctx->coding_unit_size = ctx->frame_size;
474 ctx->frame_size = ctx->cid_table->frame_size;
475 ctx->coding_unit_size = ctx->cid_table->coding_unit_size;
478 if (ctx->m.mb_height > 68)
479 ctx->data_offset = 0x170 + (ctx->m.mb_height << 2);
481 ctx->data_offset = 0x280;
484 if ((ret = dnxhd_init_qmat(ctx, ctx->intra_quant_bias, 0)) < 0)
489 if (ctx->nitris_compat)
490 ctx->min_padding = 1600;
492 if ((ret = dnxhd_init_vlc(ctx)) < 0)
494 if ((ret = dnxhd_init_rc(ctx)) < 0)
497 if (!FF_ALLOCZ_TYPED_ARRAY(ctx->slice_size, ctx->m.mb_height) ||
498 !FF_ALLOCZ_TYPED_ARRAY(ctx->slice_offs, ctx->m.mb_height) ||
499 !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_bits, ctx->m.mb_num) ||
500 !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_qscale, ctx->m.mb_num))
515 ctx->thread[0] = ctx;
518 ctx->thread[i] = av_memdup(ctx, sizeof(DNXHDEncContext));
519 if (!ctx->thread[i])
529 DNXHDEncContext *ctx = avctx->priv_data;
531 memset(buf, 0, ctx->data_offset);
534 AV_WB16(buf + 0x02, ctx->data_offset);
535 if (ctx->cid >= 1270 && ctx->cid <= 1274)
540 buf[5] = ctx->interlaced ? ctx->cur_field + 2 : 0x01;
543 AV_WB16(buf + 0x18, avctx->height >> ctx->interlaced); // ALPF
545 AV_WB16(buf + 0x1d, avctx->height >> ctx->interlaced); // NAL
547 buf[0x21] = ctx->bit_depth == 10 ? 0x58 : 0x38;
548 buf[0x22] = 0x88 + (ctx->interlaced << 2);
549 AV_WB32(buf + 0x28, ctx->cid); // CID
550 buf[0x2c] = (!ctx->interlaced << 7) | (ctx->is_444 << 6) | (avctx->pix_fmt == AV_PIX_FMT_YUV444P10);
555 AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS
556 AV_WB16(buf + 0x16c, ctx->m.mb_height); // Ns
559 ctx->msip = buf + 0x170;
563 static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
572 put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
573 (ctx->cid_table->dc_codes[nbits] << nbits) +
578 void dnxhd_encode_block(DNXHDEncContext *ctx, int16_t *block,
584 dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
585 ctx->m.last_dc[n] = block[0];
588 j = ctx->m.intra_scantable.permutated[i];
593 put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]);
595 put_bits(&ctx->m.pb, ctx->run_bits[run_level],
596 ctx->run_codes[run_level]);
600 put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB
604 void dnxhd_unquantize_c(DNXHDEncContext *ctx, int16_t *block, int n,
611 if (ctx->is_444) {
612 weight_matrix = ((n % 6) < 2) ? ctx->cid_table->luma_weight
613 : ctx->cid_table->chroma_weight;
615 weight_matrix = (n & 2) ? ctx->cid_table->chroma_weight
616 : ctx->cid_table->luma_weight;
620 int j = ctx->m.intra_scantable.permutated[i];
625 if (ctx->bit_depth == 10) {
637 if (ctx->bit_depth == 10) {
662 int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, int16_t *block, int last_index)
668 j = ctx->m.intra_scantable.permutated[i];
672 bits += ctx->vlc_bits[level * (1 << 1) |
673 !!run_level] + ctx->run_bits[run_level];
681 void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
683 const int bs = ctx->block_width_l2;
685 int dct_y_offset = ctx->dct_y_offset;
686 int dct_uv_offset = ctx->dct_uv_offset;
687 int linesize = ctx->m.linesize;
688 int uvlinesize = ctx->m.uvlinesize;
689 const uint8_t *ptr_y = ctx->thread[0]->src[0] +
690 ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs + 1);
691 const uint8_t *ptr_u = ctx->thread[0]->src[1] +
692 ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444);
693 const uint8_t *ptr_v = ctx->thread[0]->src[2] +
694 ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444);
695 PixblockDSPContext *pdsp = &ctx->m.pdsp;
696 VideoDSPContext *vdsp = &ctx->m.vdsp;
698 if (ctx->bit_depth != 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width ||
699 (mb_y << 4) + 16 > ctx->m.avctx->height)) {
700 int y_w = ctx->m.avctx->width - (mb_x << 4);
701 int y_h = ctx->m.avctx->height - (mb_y << 4);
707 vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y,
708 linesize, ctx->m.linesize,
711 vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u,
712 uvlinesize, ctx->m.uvlinesize,
715 vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v,
716 uvlinesize, ctx->m.uvlinesize,
722 ptr_y = &ctx->edge_buf_y[0];
723 ptr_u = &ctx->edge_buf_uv[0][0];
724 ptr_v = &ctx->edge_buf_uv[1][0];
725 } else if (ctx->bit_depth == 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width ||
726 (mb_y << 4) + 16 > ctx->m.avctx->height)) {
727 int y_w = ctx->m.avctx->width - (mb_x << 4);
728 int y_h = ctx->m.avctx->height - (mb_y << 4);
729 int uv_w = ctx->is_444 ? y_w : (y_w + 1) / 2;
732 uvlinesize = 16 + 16 * ctx->is_444;
734 vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y,
735 linesize, ctx->m.linesize,
738 vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u,
739 uvlinesize, ctx->m.uvlinesize,
742 vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v,
743 uvlinesize, ctx->m.uvlinesize,
749 ptr_y = &ctx->edge_buf_y[0];
750 ptr_u = &ctx->edge_buf_uv[0][0];
751 ptr_v = &ctx->edge_buf_uv[1][0];
754 if (!ctx->is_444) {
755 pdsp->get_pixels(ctx->blocks[0], ptr_y, linesize);
756 pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize);
757 pdsp->get_pixels(ctx->blocks[2], ptr_u, uvlinesize);
758 pdsp->get_pixels(ctx->blocks[3], ptr_v, uvlinesize);
760 if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
761 if (ctx->interlaced) {
762 ctx->get_pixels_8x4_sym(ctx->blocks[4],
765 ctx->get_pixels_8x4_sym(ctx->blocks[5],
768 ctx->get_pixels_8x4_sym(ctx->blocks[6],
771 ctx->get_pixels_8x4_sym(ctx->blocks[7],
775 ctx->bdsp.clear_block(ctx->blocks[4]);
776 ctx->bdsp.clear_block(ctx->blocks[5]);
777 ctx->bdsp.clear_block(ctx->blocks[6]);
778 ctx->bdsp.clear_block(ctx->blocks[7]);
781 pdsp->get_pixels(ctx->blocks[4],
783 pdsp->get_pixels(ctx->blocks[5],
785 pdsp->get_pixels(ctx->blocks[6],
787 pdsp->get_pixels(ctx->blocks[7],
791 pdsp->get_pixels(ctx->blocks[0], ptr_y, linesize);
792 pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize);
793 pdsp->get_pixels(ctx->blocks[6], ptr_y + dct_y_offset, linesize);
794 pdsp->get_pixels(ctx->blocks[7], ptr_y + dct_y_offset + bw, linesize);
796 pdsp->get_pixels(ctx->blocks[2], ptr_u, uvlinesize);
797 pdsp->get_pixels(ctx->blocks[3], ptr_u + bw, uvlinesize);
798 pdsp->get_pixels(ctx->blocks[8], ptr_u + dct_uv_offset, uvlinesize);
799 pdsp->get_pixels(ctx->blocks[9], ptr_u + dct_uv_offset + bw, uvlinesize);
801 pdsp->get_pixels(ctx->blocks[4], ptr_v, uvlinesize);
802 pdsp->get_pixels(ctx->blocks[5], ptr_v + bw, uvlinesize);
803 pdsp->get_pixels(ctx->blocks[10], ptr_v + dct_uv_offset, uvlinesize);
804 pdsp->get_pixels(ctx->blocks[11], ptr_v + dct_uv_offset + bw, uvlinesize);
809 int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
813 if (ctx->is_444) {
825 DNXHDEncContext *ctx = avctx->priv_data;
827 int qscale = ctx->qscale;
829 ctx = ctx->thread[threadnr];
831 ctx->m.last_dc[0] =
832 ctx->m.last_dc[1] =
833 ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2);
835 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
836 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
842 dnxhd_get_blocks(ctx, mb_x, mb_y);
844 for (i = 0; i < 8 + 4 * ctx->is_444; i++) {
845 int16_t *src_block = ctx->blocks[i];
847 int n = dnxhd_switch_matrix(ctx, i);
850 last_index = ctx->m.dct_quantize(&ctx->m, block,
851 ctx->is_444 ? 4 * (n > 0): 4 & (2*i),
853 ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index);
855 diff = block[0] - ctx->m.last_dc[n];
861 av_assert1(nbits < ctx->bit_depth + 4);
862 dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
864 ctx->m.last_dc[n] = block[0];
867 dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
868 ctx->m.idsp.idct(block);
872 ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].ssd = ssd;
873 ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].bits = ac_bits + dc_bits + 12 +
874 (1 + ctx->is_444) * 8 * ctx->vlc_bits[0];
882 DNXHDEncContext *ctx = avctx->priv_data;
884 ctx = ctx->thread[threadnr];
885 init_put_bits(&ctx->m.pb, (uint8_t *)arg + ctx->data_offset + ctx->slice_offs[jobnr],
886 ctx->slice_size[jobnr]);
888 ctx->m.last_dc[0] =
889 ctx->m.last_dc[1] =
890 ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2);
891 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
892 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
893 int qscale = ctx->mb_qscale[mb];
896 put_bits(&ctx->m.pb, 11, qscale);
897 put_bits(&ctx->m.pb, 1, avctx->pix_fmt == AV_PIX_FMT_YUV444P10);
899 dnxhd_get_blocks(ctx, mb_x, mb_y);
901 for (i = 0; i < 8 + 4 * ctx->is_444; i++) {
902 int16_t *block = ctx->blocks[i];
903 int overflow, n = dnxhd_switch_matrix(ctx, i);
904 int last_index = ctx->m.dct_quantize(&ctx->m, block,
905 ctx->is_444 ? (((i >> 1) % 3) < 1 ? 0 : 4): 4 & (2*i),
908 dnxhd_encode_block(ctx, block, last_index, n);
911 if (put_bits_count(&ctx->m.pb) & 31)
912 put_bits(&ctx->m.pb, 32 - (put_bits_count(&ctx->m.pb) & 31), 0);
913 flush_put_bits(&ctx->m.pb);
914 memset(put_bits_ptr(&ctx->m.pb), 0, put_bytes_left(&ctx->m.pb, 0));
918 static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx)
922 for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) {
924 ctx->slice_offs[mb_y] = offset;
925 ctx->slice_size[mb_y] = 0;
926 for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
927 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
928 ctx->slice_size[mb_y] += ctx->mb_bits[mb];
930 ctx->slice_size[mb_y] = (ctx->slice_size[mb_y] + 31U) & ~31U;
931 ctx->slice_size[mb_y] >>= 3;
932 thread_size = ctx->slice_size[mb_y];
940 DNXHDEncContext *ctx = avctx->priv_data;
942 int partial_last_row = (mb_y == ctx->m.mb_height - 1) &&
943 ((avctx->height >> ctx->interlaced) & 0xF);
945 ctx = ctx->thread[threadnr];
946 if (ctx->bit_depth == 8) {
947 uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize);
948 for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) {
949 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
954 sum = ctx->m.mpvencdsp.pix_sum(pix, ctx->m.linesize);
955 varc = ctx->m.mpvencdsp.pix_norm1(pix, ctx->m.linesize);
958 int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
962 uint8_t val = pix[x + y * ctx->m.linesize];
970 ctx->mb_cmp[mb].value = varc;
971 ctx->mb_cmp[mb].mb = mb;
974 const int linesize = ctx->m.linesize >> 1;
975 for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) {
976 uint16_t *pix = (uint16_t *)ctx->thread[0]->src[0] +
978 unsigned mb = mb_y * ctx->m.mb_width + mb_x;
982 int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
998 ctx->mb_cmp[mb].value = sqmean - mean * mean;
999 ctx->mb_cmp[mb].mb = mb;
1005 static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
1012 ctx->qscale = q;
1014 NULL, NULL, ctx->m.mb_height);
1017 lambda = ctx->lambda;
1026 for (y = 0; y < ctx->m.mb_height; y++) {
1027 for (x = 0; x < ctx->m.mb_width; x++) {
1030 int mb = y * ctx->m.mb_width + x;
1033 int i = (q*ctx->m.mb_num) + mb;
1034 unsigned score = ctx->mb_rc[i].bits * lambda +
1035 ((unsigned) ctx->mb_rc[i].ssd << LAMBDA_FRAC_BITS);
1042 bits += ctx->mb_rc[rc].bits;
1043 ctx->mb_qscale[mb] = qscale;
1044 ctx->mb_bits[mb] = ctx->mb_rc[rc].bits;
1047 if (bits > ctx->frame_bits)
1051 if (bits > ctx->frame_bits)
1055 if (bits < ctx->frame_bits) {
1078 ctx->lambda = lambda;
1082 static int dnxhd_find_qscale(DNXHDEncContext *ctx)
1092 qscale = ctx->qscale;
1095 ctx->qscale = qscale;
1097 ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread,
1098 NULL, NULL, ctx->m.mb_height);
1099 for (y = 0; y < ctx->m.mb_height; y++) {
1100 for (x = 0; x < ctx->m.mb_width; x++)
1101 bits += ctx->mb_rc[(qscale*ctx->m.mb_num) + (y*ctx->m.mb_width+x)].bits;
1103 if (bits > ctx->frame_bits)
1106 if (bits < ctx->frame_bits) {
1130 if (qscale >= ctx->m.avctx->qmax)
1134 ctx->qscale = qscale;
1194 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
1198 if ((ret = dnxhd_find_qscale(ctx)) < 0)
1200 for (y = 0; y < ctx->m.mb_height; y++) {
1201 for (x = 0; x < ctx->m.mb_width; x++) {
1202 int mb = y * ctx->m.mb_width + x;
1203 int rc = (ctx->qscale * ctx->m.mb_num ) + mb;
1205 ctx->mb_qscale[mb] = ctx->qscale;
1206 ctx->mb_bits[mb] = ctx->mb_rc[rc].bits;
1207 max_bits += ctx->mb_rc[rc].bits;
1209 delta_bits = ctx->mb_rc[rc].bits -
1210 ctx->mb_rc[rc + ctx->m.mb_num].bits;
1211 ctx->mb_cmp[mb].mb = mb;
1212 ctx->mb_cmp[mb].value =
1213 delta_bits ? ((ctx->mb_rc[rc].ssd -
1214 ctx->mb_rc[rc + ctx->m.mb_num].ssd) * 100) /
1224 NULL, NULL, ctx->m.mb_height);
1225 radix_sort(ctx->mb_cmp, ctx->mb_cmp_tmp, ctx->m.mb_num);
1227 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
1228 int mb = ctx->mb_cmp[x].mb;
1229 int rc = (ctx->qscale * ctx->m.mb_num ) + mb;
1230 max_bits -= ctx->mb_rc[rc].bits -
1231 ctx->mb_rc[rc + ctx->m.mb_num].bits;
1232 if (ctx->mb_qscale[mb] < 255)
1233 ctx->mb_qscale[mb]++;
1234 ctx->mb_bits[mb] = ctx->mb_rc[rc + ctx->m.mb_num].bits;
1237 if (max_bits > ctx->frame_bits)
1243 static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
1247 for (i = 0; i < ctx->m.avctx->thread_count; i++) {
1248 ctx->thread[i]->m.linesize = frame->linesize[0] << ctx->interlaced;
1249 ctx->thread[i]->m.uvlinesize = frame->linesize[1] << ctx->interlaced;
1250 ctx->thread[i]->dct_y_offset = ctx->m.linesize *8;
1251 ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
1254 ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
1260 DNXHDEncContext *ctx = avctx->priv_data;
1265 if ((ret = ff_get_encode_buffer(avctx, pkt, ctx->frame_size, 0)) < 0)
1269 dnxhd_load_picture(ctx, frame);
1273 ctx->src[i] = frame->data[i];
1274 if (ctx->interlaced && ctx->cur_field)
1275 ctx->src[i] += frame->linesize[i];
1281 ret = dnxhd_encode_rdo(avctx, ctx);
1283 ret = dnxhd_encode_fast(avctx, ctx);
1290 dnxhd_setup_threads_slices(ctx);
1293 for (i = 0; i < ctx->m.mb_height; i++) {
1294 AV_WB32(ctx->msip + i * 4, offset);
1295 offset += ctx->slice_size[i];
1296 av_assert1(!(ctx->slice_size[i] & 3));
1299 avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height);
1301 av_assert1(ctx->data_offset + offset + 4 <= ctx->coding_unit_size);
1302 memset(buf + ctx->data_offset + offset, 0,
1303 ctx->coding_unit_size - 4 - offset - ctx->data_offset);
1305 AV_WB32(buf + ctx->coding_unit_size - 4, 0x600DC0DE); // EOF
1307 if (ctx->interlaced && first_field) {
1309 ctx->cur_field ^= 1;
1310 buf += ctx->coding_unit_size;
1314 ff_side_data_set_encoder_stats(pkt, ctx->qscale * FF_QP2LAMBDA, NULL, 0, AV_PICTURE_TYPE_I);
1322 DNXHDEncContext *ctx = avctx->priv_data;
1325 av_freep(&ctx->orig_vlc_codes);
1326 av_freep(&ctx->orig_vlc_bits);
1327 av_freep(&ctx->run_codes);
1328 av_freep(&ctx->run_bits);
1330 av_freep(&ctx->mb_bits);
1331 av_freep(&ctx->mb_qscale);
1332 av_freep(&ctx->mb_rc);
1333 av_freep(&ctx->mb_cmp);
1334 av_freep(&ctx->mb_cmp_tmp);
1335 av_freep(&ctx->slice_size);
1336 av_freep(&ctx->slice_offs);
1338 av_freep(&ctx->qmatrix_c);
1339 av_freep(&ctx->qmatrix_l);
1340 av_freep(&ctx->qmatrix_c16);
1341 av_freep(&ctx->qmatrix_l16);
1343 if (ctx->thread[1]) {
1345 av_freep(&ctx->thread[i]);