dnxhdenc.c - OpenGrok cross reference for /third_party/ffmpeg/libavcodec/dnxhdenc.c

Lines Matching refs:ctx
119 static int dnxhd_10bit_dct_quantize_444(MpegEncContext *ctx, int16_t *block,
124     const uint8_t *scantable= ctx->intra_scantable.scantable;
129     ctx->fdsp.fdct(block);
134     qmat = n < 4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale];
135     bias= ctx->intra_quant_bias * (1 << (16 - 8));
168     *overflow = ctx->max_qcoeff < max; //overflow might have happened
171     if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE)
172         ff_block_permute(block, ctx->idsp.idct_permutation,
178 static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
181     const uint8_t *scantable= ctx->intra_scantable.scantable;
182     const int *qmat = n<4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale];
186     ctx->fdsp.fdct(block);
202     if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE)
203         ff_block_permute(block, ctx->idsp.idct_permutation,
209 static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx)
212     int max_level = 1 << (ctx->bit_depth + 2);
214     if (!FF_ALLOCZ_TYPED_ARRAY(ctx->orig_vlc_codes, max_level * 4) ||
215         !FF_ALLOCZ_TYPED_ARRAY(ctx->orig_vlc_bits,  max_level * 4) ||
216         !(ctx->run_codes = av_mallocz(63 * 2))                     ||
217         !(ctx->run_bits  = av_mallocz(63)))
219     ctx->vlc_codes = ctx->orig_vlc_codes + max_level * 2;
220     ctx->vlc_bits  = ctx->orig_vlc_bits + max_level * 2;
232                 if (ctx->cid_table->ac_info[2*j+0] >> 1 == alevel &&
233                     (!offset || (ctx->cid_table->ac_info[2*j+1] & 1) && offset) &&
234                     (!run    || (ctx->cid_table->ac_info[2*j+1] & 2) && run)) {
235                     av_assert1(!ctx->vlc_codes[index]);
237                         ctx->vlc_codes[index] =
238                             (ctx->cid_table->ac_codes[j] << 1) | (sign & 1);
239                         ctx->vlc_bits[index] = ctx->cid_table->ac_bits[j] + 1;
241                         ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j];
242                         ctx->vlc_bits[index]  = ctx->cid_table->ac_bits[j];
249                 ctx->vlc_codes[index] =
250                     (ctx->vlc_codes[index] << ctx->cid_table->index_bits) | offset;
251                 ctx->vlc_bits[index] += ctx->cid_table->index_bits;
256         int run = ctx->cid_table->run[i];
258         ctx->run_codes[run] = ctx->cid_table->run_codes[i];
259         ctx->run_bits[run]  = ctx->cid_table->run_bits[i];
264 static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
269     const uint8_t *luma_weight_table   = ctx->cid_table->luma_weight;
270     const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight;
272     if (!FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_l,   ctx->m.avctx->qmax + 1) ||
273         !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_c,   ctx->m.avctx->qmax + 1) ||
274         !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_l16, ctx->m.avctx->qmax + 1) ||
275         !FF_ALLOCZ_TYPED_ARRAY(ctx->qmatrix_c16, ctx->m.avctx->qmax + 1))
278     if (ctx->bit_depth == 8) {
280             int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
281             weight_matrix[j] = ctx->cid_table->luma_weight[i];
283         ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16,
284                           weight_matrix, ctx->intra_quant_bias, 1,
285                           ctx->m.avctx->qmax, 1);
287             int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
288             weight_matrix[j] = ctx->cid_table->chroma_weight[i];
290         ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16,
291                           weight_matrix, ctx->intra_quant_bias, 1,
292                           ctx->m.avctx->qmax, 1);
294         for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
296                 ctx->qmatrix_l[qscale][i]      <<= 2;
297                 ctx->qmatrix_c[qscale][i]      <<= 2;
298                 ctx->qmatrix_l16[qscale][0][i] <<= 2;
299                 ctx->qmatrix_l16[qscale][1][i] <<= 2;
300                 ctx->qmatrix_c16[qscale][0][i] <<= 2;
301                 ctx->qmatrix_c16[qscale][1][i] <<= 2;
306         for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
317                  * We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be:
321                 ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) /
323                 ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) /
329     ctx->m.q_chroma_intra_matrix16 = ctx->qmatrix_c16;
330     ctx->m.q_chroma_intra_matrix   = ctx->qmatrix_c;
331     ctx->m.q_intra_matrix16        = ctx->qmatrix_l16;
332     ctx->m.q_intra_matrix          = ctx->qmatrix_l;
337 static av_cold int dnxhd_init_rc(DNXHDEncContext *ctx)
339     if (!FF_ALLOCZ_TYPED_ARRAY(ctx->mb_rc, (ctx->m.avctx->qmax + 1) * ctx->m.mb_num))
342     if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD) {
343         if (!FF_ALLOCZ_TYPED_ARRAY(ctx->mb_cmp,     ctx->m.mb_num) ||
344             !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_cmp_tmp, ctx->m.mb_num))
347     ctx->frame_bits = (ctx->coding_unit_size -
348                        ctx->data_offset - 4 - ctx->min_padding) * 8;
349     ctx->qscale = 1;
350     ctx->lambda = 2 << LAMBDA_FRAC_BITS; // qscale 2
356     DNXHDEncContext *ctx = avctx->priv_data;
361         ctx->bit_depth = 8;
366         ctx->bit_depth = 10;
370     if ((ctx->profile == FF_PROFILE_DNXHR_444 && (avctx->pix_fmt != AV_PIX_FMT_YUV444P10 &&
372         (ctx->profile != FF_PROFILE_DNXHR_444 && (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 ||
379     if (ctx->profile == FF_PROFILE_DNXHR_HQX && avctx->pix_fmt != AV_PIX_FMT_YUV422P10) {
385     if ((ctx->profile == FF_PROFILE_DNXHR_LB ||
386          ctx->profile == FF_PROFILE_DNXHR_SQ ||
387          ctx->profile == FF_PROFILE_DNXHR_HQ) && avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
393     ctx->is_444 = ctx->profile == FF_PROFILE_DNXHR_444;
394     avctx->profile = ctx->profile;
395     ctx->cid = ff_dnxhd_find_cid(avctx, ctx->bit_depth);
396     if (!ctx->cid) {
402     av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
404     if (ctx->cid >= 1270 && ctx->cid <= 1274)
413     ctx->cid_table = ff_dnxhd_get_cid_table(ctx->cid);
414     av_assert0(ctx->cid_table);
416     ctx->m.avctx    = avctx;
417     ctx->m.mb_intra = 1;
418     ctx->m.h263_aic = 1;
420     avctx->bits_per_raw_sample = ctx->bit_depth;
422     ff_blockdsp_init(&ctx->bdsp, avctx);
423     ff_fdctdsp_init(&ctx->m.fdsp, avctx);
424     ff_mpv_idct_init(&ctx->m);
425     ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
426     ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
427     ff_dct_encode_init(&ctx->m);
429     if (ctx->profile != FF_PROFILE_DNXHD)
430         ff_videodsp_init(&ctx->m.vdsp, ctx->bit_depth);
432     if (!ctx->m.dct_quantize)
433         ctx->m.dct_quantize = ff_dct_quantize_c;
435     if (ctx->is_444 || ctx->profile == FF_PROFILE_DNXHR_HQX) {
436         ctx->m.dct_quantize     = dnxhd_10bit_dct_quantize_444;
437         ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
438         ctx->block_width_l2     = 4;
439     } else if (ctx->bit_depth == 10) {
440         ctx->m.dct_quantize     = dnxhd_10bit_dct_quantize;
441         ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
442         ctx->block_width_l2     = 4;
444         ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym;
445         ctx->block_width_l2     = 3;
449     ff_dnxhdenc_init_x86(ctx);
452     ctx->m.mb_height = (avctx->height + 15) / 16;
453     ctx->m.mb_width  = (avctx->width  + 15) / 16;
456         ctx->interlaced   = 1;
457         ctx->m.mb_height /= 2;
460     if (ctx->interlaced && ctx->profile != FF_PROFILE_DNXHD) {
466     ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
468     if (ctx->cid_table->frame_size == DNXHD_VARIABLE) {
469         ctx->frame_size = ff_dnxhd_get_hr_frame_size(ctx->cid,
471         av_assert0(ctx->frame_size >= 0);
472         ctx->coding_unit_size = ctx->frame_size;
474         ctx->frame_size = ctx->cid_table->frame_size;
475         ctx->coding_unit_size = ctx->cid_table->coding_unit_size;
478     if (ctx->m.mb_height > 68)
479         ctx->data_offset = 0x170 + (ctx->m.mb_height << 2);
481         ctx->data_offset = 0x280;
484     if ((ret = dnxhd_init_qmat(ctx, ctx->intra_quant_bias, 0)) < 0)
489     if (ctx->nitris_compat)
490         ctx->min_padding = 1600;
492     if ((ret = dnxhd_init_vlc(ctx)) < 0)
494     if ((ret = dnxhd_init_rc(ctx)) < 0)
497     if (!FF_ALLOCZ_TYPED_ARRAY(ctx->slice_size, ctx->m.mb_height) ||
498         !FF_ALLOCZ_TYPED_ARRAY(ctx->slice_offs, ctx->m.mb_height) ||
499         !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_bits,    ctx->m.mb_num)    ||
500         !FF_ALLOCZ_TYPED_ARRAY(ctx->mb_qscale,  ctx->m.mb_num))
515     ctx->thread[0] = ctx;
518             ctx->thread[i] = av_memdup(ctx, sizeof(DNXHDEncContext));
519             if (!ctx->thread[i])
529     DNXHDEncContext *ctx = avctx->priv_data;
531     memset(buf, 0, ctx->data_offset);
534     AV_WB16(buf + 0x02, ctx->data_offset);
535     if (ctx->cid >= 1270 && ctx->cid <= 1274)
540     buf[5] = ctx->interlaced ? ctx->cur_field + 2 : 0x01;
543     AV_WB16(buf + 0x18, avctx->height >> ctx->interlaced); // ALPF
545     AV_WB16(buf + 0x1d, avctx->height >> ctx->interlaced); // NAL
547     buf[0x21] = ctx->bit_depth == 10 ? 0x58 : 0x38;
548     buf[0x22] = 0x88 + (ctx->interlaced << 2);
549     AV_WB32(buf + 0x28, ctx->cid); // CID
550     buf[0x2c] = (!ctx->interlaced << 7) | (ctx->is_444 << 6) | (avctx->pix_fmt == AV_PIX_FMT_YUV444P10);
555     AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS
556     AV_WB16(buf + 0x16c, ctx->m.mb_height); // Ns
559     ctx->msip = buf + 0x170;
563 static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
572     put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
573              (ctx->cid_table->dc_codes[nbits] << nbits) +
578 void dnxhd_encode_block(DNXHDEncContext *ctx, int16_t *block,
584     dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
585     ctx->m.last_dc[n] = block[0];
588         j = ctx->m.intra_scantable.permutated[i];
593             put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]);
595                 put_bits(&ctx->m.pb, ctx->run_bits[run_level],
596                          ctx->run_codes[run_level]);
600     put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB
604 void dnxhd_unquantize_c(DNXHDEncContext *ctx, int16_t *block, int n,
611     if (ctx->is_444) {
612         weight_matrix = ((n % 6) < 2) ? ctx->cid_table->luma_weight
613                                       : ctx->cid_table->chroma_weight;
615         weight_matrix = (n & 2) ? ctx->cid_table->chroma_weight
616                                 : ctx->cid_table->luma_weight;
620         int j = ctx->m.intra_scantable.permutated[i];
625                 if (ctx->bit_depth == 10) {
637                 if (ctx->bit_depth == 10) {
662 int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, int16_t *block, int last_index)
668         j = ctx->m.intra_scantable.permutated[i];
672             bits += ctx->vlc_bits[level * (1 << 1) |
673                     !!run_level] + ctx->run_bits[run_level];
681 void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
683     const int bs = ctx->block_width_l2;
685     int dct_y_offset = ctx->dct_y_offset;
686     int dct_uv_offset = ctx->dct_uv_offset;
687     int linesize = ctx->m.linesize;
688     int uvlinesize = ctx->m.uvlinesize;
689     const uint8_t *ptr_y = ctx->thread[0]->src[0] +
690                            ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs + 1);
691     const uint8_t *ptr_u = ctx->thread[0]->src[1] +
692                            ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444);
693     const uint8_t *ptr_v = ctx->thread[0]->src[2] +
694                            ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444);
695     PixblockDSPContext *pdsp = &ctx->m.pdsp;
696     VideoDSPContext *vdsp = &ctx->m.vdsp;
698     if (ctx->bit_depth != 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width ||
699                                                            (mb_y << 4) + 16 > ctx->m.avctx->height)) {
700         int y_w = ctx->m.avctx->width  - (mb_x << 4);
701         int y_h = ctx->m.avctx->height - (mb_y << 4);
707         vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y,
708                                linesize, ctx->m.linesize,
711         vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u,
712                                uvlinesize, ctx->m.uvlinesize,
715         vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v,
716                                uvlinesize, ctx->m.uvlinesize,
722         ptr_y = &ctx->edge_buf_y[0];
723         ptr_u = &ctx->edge_buf_uv[0][0];
724         ptr_v = &ctx->edge_buf_uv[1][0];
725     } else if (ctx->bit_depth == 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width ||
726                                                                   (mb_y << 4) + 16 > ctx->m.avctx->height)) {
727         int y_w = ctx->m.avctx->width  - (mb_x << 4);
728         int y_h = ctx->m.avctx->height - (mb_y << 4);
729         int uv_w = ctx->is_444 ? y_w : (y_w + 1) / 2;
732         uvlinesize = 16 + 16 * ctx->is_444;
734         vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y,
735                                linesize, ctx->m.linesize,
738         vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u,
739                                uvlinesize, ctx->m.uvlinesize,
742         vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v,
743                                uvlinesize, ctx->m.uvlinesize,
749         ptr_y = &ctx->edge_buf_y[0];
750         ptr_u = &ctx->edge_buf_uv[0][0];
751         ptr_v = &ctx->edge_buf_uv[1][0];
754     if (!ctx->is_444) {
755         pdsp->get_pixels(ctx->blocks[0], ptr_y,      linesize);
756         pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize);
757         pdsp->get_pixels(ctx->blocks[2], ptr_u,      uvlinesize);
758         pdsp->get_pixels(ctx->blocks[3], ptr_v,      uvlinesize);
760         if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
761             if (ctx->interlaced) {
762                 ctx->get_pixels_8x4_sym(ctx->blocks[4],
765                 ctx->get_pixels_8x4_sym(ctx->blocks[5],
768                 ctx->get_pixels_8x4_sym(ctx->blocks[6],
771                 ctx->get_pixels_8x4_sym(ctx->blocks[7],
775                 ctx->bdsp.clear_block(ctx->blocks[4]);
776                 ctx->bdsp.clear_block(ctx->blocks[5]);
777                 ctx->bdsp.clear_block(ctx->blocks[6]);
778                 ctx->bdsp.clear_block(ctx->blocks[7]);
781             pdsp->get_pixels(ctx->blocks[4],
783             pdsp->get_pixels(ctx->blocks[5],
785             pdsp->get_pixels(ctx->blocks[6],
787             pdsp->get_pixels(ctx->blocks[7],
791         pdsp->get_pixels(ctx->blocks[0], ptr_y,      linesize);
792         pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize);
793         pdsp->get_pixels(ctx->blocks[6], ptr_y + dct_y_offset, linesize);
794         pdsp->get_pixels(ctx->blocks[7], ptr_y + dct_y_offset + bw, linesize);
796         pdsp->get_pixels(ctx->blocks[2], ptr_u,      uvlinesize);
797         pdsp->get_pixels(ctx->blocks[3], ptr_u + bw, uvlinesize);
798         pdsp->get_pixels(ctx->blocks[8], ptr_u + dct_uv_offset, uvlinesize);
799         pdsp->get_pixels(ctx->blocks[9], ptr_u + dct_uv_offset + bw, uvlinesize);
801         pdsp->get_pixels(ctx->blocks[4], ptr_v,      uvlinesize);
802         pdsp->get_pixels(ctx->blocks[5], ptr_v + bw, uvlinesize);
803         pdsp->get_pixels(ctx->blocks[10], ptr_v + dct_uv_offset, uvlinesize);
804         pdsp->get_pixels(ctx->blocks[11], ptr_v + dct_uv_offset + bw, uvlinesize);
809 int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
813     if (ctx->is_444) {
825     DNXHDEncContext *ctx = avctx->priv_data;
827     int qscale = ctx->qscale;
829     ctx = ctx->thread[threadnr];
831     ctx->m.last_dc[0] =
832     ctx->m.last_dc[1] =
833     ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2);
835     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
836         unsigned mb = mb_y * ctx->m.mb_width + mb_x;
842         dnxhd_get_blocks(ctx, mb_x, mb_y);
844         for (i = 0; i < 8 + 4 * ctx->is_444; i++) {
845             int16_t *src_block = ctx->blocks[i];
847             int n = dnxhd_switch_matrix(ctx, i);
850             last_index = ctx->m.dct_quantize(&ctx->m, block,
851                                              ctx->is_444 ? 4 * (n > 0): 4 & (2*i),
853             ac_bits   += dnxhd_calc_ac_bits(ctx, block, last_index);
855             diff = block[0] - ctx->m.last_dc[n];
861             av_assert1(nbits < ctx->bit_depth + 4);
862             dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
864             ctx->m.last_dc[n] = block[0];
867                 dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
868                 ctx->m.idsp.idct(block);
872         ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].ssd  = ssd;
873         ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].bits = ac_bits + dc_bits + 12 +
874                                      (1 + ctx->is_444) * 8 * ctx->vlc_bits[0];
882     DNXHDEncContext *ctx = avctx->priv_data;
884     ctx = ctx->thread[threadnr];
885     init_put_bits(&ctx->m.pb, (uint8_t *)arg + ctx->data_offset + ctx->slice_offs[jobnr],
886                   ctx->slice_size[jobnr]);
888     ctx->m.last_dc[0] =
889     ctx->m.last_dc[1] =
890     ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2);
891     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
892         unsigned mb = mb_y * ctx->m.mb_width + mb_x;
893         int qscale = ctx->mb_qscale[mb];
896         put_bits(&ctx->m.pb, 11, qscale);
897         put_bits(&ctx->m.pb, 1, avctx->pix_fmt == AV_PIX_FMT_YUV444P10);
899         dnxhd_get_blocks(ctx, mb_x, mb_y);
901         for (i = 0; i < 8 + 4 * ctx->is_444; i++) {
902             int16_t *block = ctx->blocks[i];
903             int overflow, n = dnxhd_switch_matrix(ctx, i);
904             int last_index = ctx->m.dct_quantize(&ctx->m, block,
905                                                  ctx->is_444 ? (((i >> 1) % 3) < 1 ? 0 : 4): 4 & (2*i),
908             dnxhd_encode_block(ctx, block, last_index, n);
911     if (put_bits_count(&ctx->m.pb) & 31)
912         put_bits(&ctx->m.pb, 32 - (put_bits_count(&ctx->m.pb) & 31), 0);
913     flush_put_bits(&ctx->m.pb);
914     memset(put_bits_ptr(&ctx->m.pb), 0, put_bytes_left(&ctx->m.pb, 0));
918 static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx)
922     for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) {
924         ctx->slice_offs[mb_y] = offset;
925         ctx->slice_size[mb_y] = 0;
926         for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
927             unsigned mb = mb_y * ctx->m.mb_width + mb_x;
928             ctx->slice_size[mb_y] += ctx->mb_bits[mb];
930         ctx->slice_size[mb_y]   = (ctx->slice_size[mb_y] + 31U) & ~31U;
931         ctx->slice_size[mb_y] >>= 3;
932         thread_size = ctx->slice_size[mb_y];
940     DNXHDEncContext *ctx = avctx->priv_data;
942     int partial_last_row = (mb_y == ctx->m.mb_height - 1) &&
943                            ((avctx->height >> ctx->interlaced) & 0xF);
945     ctx = ctx->thread[threadnr];
946     if (ctx->bit_depth == 8) {
947         uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize);
948         for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) {
949             unsigned mb = mb_y * ctx->m.mb_width + mb_x;
954                 sum  = ctx->m.mpvencdsp.pix_sum(pix, ctx->m.linesize);
955                 varc = ctx->m.mpvencdsp.pix_norm1(pix, ctx->m.linesize);
958                 int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
962                         uint8_t val = pix[x + y * ctx->m.linesize];
970             ctx->mb_cmp[mb].value = varc;
971             ctx->mb_cmp[mb].mb    = mb;
974         const int linesize = ctx->m.linesize >> 1;
975         for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) {
976             uint16_t *pix = (uint16_t *)ctx->thread[0]->src[0] +
978             unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
982             int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
998             ctx->mb_cmp[mb].value = sqmean - mean * mean;
999             ctx->mb_cmp[mb].mb    = mb;
1005 static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
1012         ctx->qscale = q;
1014                         NULL, NULL, ctx->m.mb_height);
1017     lambda  = ctx->lambda;
1026         for (y = 0; y < ctx->m.mb_height; y++) {
1027             for (x = 0; x < ctx->m.mb_width; x++) {
1030                 int mb     = y * ctx->m.mb_width + x;
1033                     int i = (q*ctx->m.mb_num) + mb;
1034                     unsigned score = ctx->mb_rc[i].bits * lambda +
1035                                      ((unsigned) ctx->mb_rc[i].ssd << LAMBDA_FRAC_BITS);
1042                 bits += ctx->mb_rc[rc].bits;
1043                 ctx->mb_qscale[mb] = qscale;
1044                 ctx->mb_bits[mb]   = ctx->mb_rc[rc].bits;
1047             if (bits > ctx->frame_bits)
1051             if (bits > ctx->frame_bits)
1055         if (bits < ctx->frame_bits) {
1078     ctx->lambda = lambda;
1082 static int dnxhd_find_qscale(DNXHDEncContext *ctx)
1092     qscale = ctx->qscale;
1095         ctx->qscale = qscale;
1097         ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread,
1098                                NULL, NULL, ctx->m.mb_height);
1099         for (y = 0; y < ctx->m.mb_height; y++) {
1100             for (x = 0; x < ctx->m.mb_width; x++)
1101                 bits += ctx->mb_rc[(qscale*ctx->m.mb_num) + (y*ctx->m.mb_width+x)].bits;
1103             if (bits > ctx->frame_bits)
1106         if (bits < ctx->frame_bits) {
1130             if (qscale >= ctx->m.avctx->qmax)
1134     ctx->qscale = qscale;
1194 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
1198     if ((ret = dnxhd_find_qscale(ctx)) < 0)
1200     for (y = 0; y < ctx->m.mb_height; y++) {
1201         for (x = 0; x < ctx->m.mb_width; x++) {
1202             int mb = y * ctx->m.mb_width + x;
1203             int rc = (ctx->qscale * ctx->m.mb_num ) + mb;
1205             ctx->mb_qscale[mb] = ctx->qscale;
1206             ctx->mb_bits[mb] = ctx->mb_rc[rc].bits;
1207             max_bits += ctx->mb_rc[rc].bits;
1209                 delta_bits = ctx->mb_rc[rc].bits -
1210                              ctx->mb_rc[rc + ctx->m.mb_num].bits;
1211                 ctx->mb_cmp[mb].mb = mb;
1212                 ctx->mb_cmp[mb].value =
1213                     delta_bits ? ((ctx->mb_rc[rc].ssd -
1214                                    ctx->mb_rc[rc + ctx->m.mb_num].ssd) * 100) /
1224                             NULL, NULL, ctx->m.mb_height);
1225         radix_sort(ctx->mb_cmp, ctx->mb_cmp_tmp, ctx->m.mb_num);
1227         for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
1228             int mb = ctx->mb_cmp[x].mb;
1229             int rc = (ctx->qscale * ctx->m.mb_num ) + mb;
1230             max_bits -= ctx->mb_rc[rc].bits -
1231                         ctx->mb_rc[rc + ctx->m.mb_num].bits;
1232             if (ctx->mb_qscale[mb] < 255)
1233                 ctx->mb_qscale[mb]++;
1234             ctx->mb_bits[mb]   = ctx->mb_rc[rc + ctx->m.mb_num].bits;
1237         if (max_bits > ctx->frame_bits)
1243 static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
1247     for (i = 0; i < ctx->m.avctx->thread_count; i++) {
1248         ctx->thread[i]->m.linesize    = frame->linesize[0] << ctx->interlaced;
1249         ctx->thread[i]->m.uvlinesize  = frame->linesize[1] << ctx->interlaced;
1250         ctx->thread[i]->dct_y_offset  = ctx->m.linesize  *8;
1251         ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
1254     ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
1260     DNXHDEncContext *ctx = avctx->priv_data;
1265     if ((ret = ff_get_encode_buffer(avctx, pkt, ctx->frame_size, 0)) < 0)
1269     dnxhd_load_picture(ctx, frame);
1273         ctx->src[i] = frame->data[i];
1274         if (ctx->interlaced && ctx->cur_field)
1275             ctx->src[i] += frame->linesize[i];
1281         ret = dnxhd_encode_rdo(avctx, ctx);
1283         ret = dnxhd_encode_fast(avctx, ctx);
1290     dnxhd_setup_threads_slices(ctx);
1293     for (i = 0; i < ctx->m.mb_height; i++) {
1294         AV_WB32(ctx->msip + i * 4, offset);
1295         offset += ctx->slice_size[i];
1296         av_assert1(!(ctx->slice_size[i] & 3));
1299     avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height);
1301     av_assert1(ctx->data_offset + offset + 4 <= ctx->coding_unit_size);
1302     memset(buf + ctx->data_offset + offset, 0,
1303            ctx->coding_unit_size - 4 - offset - ctx->data_offset);
1305     AV_WB32(buf + ctx->coding_unit_size - 4, 0x600DC0DE); // EOF
1307     if (ctx->interlaced && first_field) {
1309         ctx->cur_field ^= 1;
1310         buf            += ctx->coding_unit_size;
1314     ff_side_data_set_encoder_stats(pkt, ctx->qscale * FF_QP2LAMBDA, NULL, 0, AV_PICTURE_TYPE_I);
1322     DNXHDEncContext *ctx = avctx->priv_data;
1325     av_freep(&ctx->orig_vlc_codes);
1326     av_freep(&ctx->orig_vlc_bits);
1327     av_freep(&ctx->run_codes);
1328     av_freep(&ctx->run_bits);
1330     av_freep(&ctx->mb_bits);
1331     av_freep(&ctx->mb_qscale);
1332     av_freep(&ctx->mb_rc);
1333     av_freep(&ctx->mb_cmp);
1334     av_freep(&ctx->mb_cmp_tmp);
1335     av_freep(&ctx->slice_size);
1336     av_freep(&ctx->slice_offs);
1338     av_freep(&ctx->qmatrix_c);
1339     av_freep(&ctx->qmatrix_l);
1340     av_freep(&ctx->qmatrix_c16);
1341     av_freep(&ctx->qmatrix_l16);
1343     if (ctx->thread[1]) {
1345             av_freep(&ctx->thread[i]);