1/*
2 * Apple ProRes encoder
3 *
4 * Copyright (c) 2012 Konstantin Shishkov
5 *
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
8 *
9 * This file is part of FFmpeg.
10 *
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26#include "libavutil/mem_internal.h"
27#include "libavutil/opt.h"
28#include "libavutil/pixdesc.h"
29#include "avcodec.h"
30#include "codec_internal.h"
31#include "encode.h"
32#include "fdctdsp.h"
33#include "put_bits.h"
34#include "profiles.h"
35#include "bytestream.h"
36#include "proresdata.h"
37
38#define CFACTOR_Y422 2
39#define CFACTOR_Y444 3
40
41#define MAX_MBS_PER_SLICE 8
42
43#define MAX_PLANES 4
44
45enum {
46    PRORES_PROFILE_AUTO  = -1,
47    PRORES_PROFILE_PROXY = 0,
48    PRORES_PROFILE_LT,
49    PRORES_PROFILE_STANDARD,
50    PRORES_PROFILE_HQ,
51    PRORES_PROFILE_4444,
52    PRORES_PROFILE_4444XQ,
53};
54
55enum {
56    QUANT_MAT_PROXY = 0,
57    QUANT_MAT_PROXY_CHROMA,
58    QUANT_MAT_LT,
59    QUANT_MAT_STANDARD,
60    QUANT_MAT_HQ,
61    QUANT_MAT_XQ_LUMA,
62    QUANT_MAT_DEFAULT,
63};
64
65static const uint8_t prores_quant_matrices[][64] = {
66    { // proxy
67         4,  7,  9, 11, 13, 14, 15, 63,
68         7,  7, 11, 12, 14, 15, 63, 63,
69         9, 11, 13, 14, 15, 63, 63, 63,
70        11, 11, 13, 14, 63, 63, 63, 63,
71        11, 13, 14, 63, 63, 63, 63, 63,
72        13, 14, 63, 63, 63, 63, 63, 63,
73        13, 63, 63, 63, 63, 63, 63, 63,
74        63, 63, 63, 63, 63, 63, 63, 63,
75    },
76    { // proxy chromas
77        4,  7,  9, 11, 13, 14, 63, 63,
78        7,  7, 11, 12, 14, 63, 63, 63,
79        9, 11, 13, 14, 63, 63, 63, 63,
80        11, 11, 13, 14, 63, 63, 63, 63,
81        11, 13, 14, 63, 63, 63, 63, 63,
82        13, 14, 63, 63, 63, 63, 63, 63,
83        13, 63, 63, 63, 63, 63, 63, 63,
84        63, 63, 63, 63, 63, 63, 63, 63
85    },
86    { // LT
87         4,  5,  6,  7,  9, 11, 13, 15,
88         5,  5,  7,  8, 11, 13, 15, 17,
89         6,  7,  9, 11, 13, 15, 15, 17,
90         7,  7,  9, 11, 13, 15, 17, 19,
91         7,  9, 11, 13, 14, 16, 19, 23,
92         9, 11, 13, 14, 16, 19, 23, 29,
93         9, 11, 13, 15, 17, 21, 28, 35,
94        11, 13, 16, 17, 21, 28, 35, 41,
95    },
96    { // standard
97         4,  4,  5,  5,  6,  7,  7,  9,
98         4,  4,  5,  6,  7,  7,  9,  9,
99         5,  5,  6,  7,  7,  9,  9, 10,
100         5,  5,  6,  7,  7,  9,  9, 10,
101         5,  6,  7,  7,  8,  9, 10, 12,
102         6,  7,  7,  8,  9, 10, 12, 15,
103         6,  7,  7,  9, 10, 11, 14, 17,
104         7,  7,  9, 10, 11, 14, 17, 21,
105    },
106    { // high quality
107         4,  4,  4,  4,  4,  4,  4,  4,
108         4,  4,  4,  4,  4,  4,  4,  4,
109         4,  4,  4,  4,  4,  4,  4,  4,
110         4,  4,  4,  4,  4,  4,  4,  5,
111         4,  4,  4,  4,  4,  4,  5,  5,
112         4,  4,  4,  4,  4,  5,  5,  6,
113         4,  4,  4,  4,  5,  5,  6,  7,
114         4,  4,  4,  4,  5,  6,  7,  7,
115    },
116    { // XQ luma
117        2,  2,  2,  2,  2,  2,  2,  2,
118        2,  2,  2,  2,  2,  2,  2,  2,
119        2,  2,  2,  2,  2,  2,  2,  2,
120        2,  2,  2,  2,  2,  2,  2,  3,
121        2,  2,  2,  2,  2,  2,  3,  3,
122        2,  2,  2,  2,  2,  3,  3,  3,
123        2,  2,  2,  2,  3,  3,  3,  4,
124        2,  2,  2,  2,  3,  3,  4,  4,
125    },
126    { // codec default
127         4,  4,  4,  4,  4,  4,  4,  4,
128         4,  4,  4,  4,  4,  4,  4,  4,
129         4,  4,  4,  4,  4,  4,  4,  4,
130         4,  4,  4,  4,  4,  4,  4,  4,
131         4,  4,  4,  4,  4,  4,  4,  4,
132         4,  4,  4,  4,  4,  4,  4,  4,
133         4,  4,  4,  4,  4,  4,  4,  4,
134         4,  4,  4,  4,  4,  4,  4,  4,
135    },
136};
137
138#define NUM_MB_LIMITS 4
139static const int prores_mb_limits[NUM_MB_LIMITS] = {
140    1620, // up to 720x576
141    2700, // up to 960x720
142    6075, // up to 1440x1080
143    9216, // up to 2048x1152
144};
145
146static const struct prores_profile {
147    const char *full_name;
148    uint32_t    tag;
149    int         min_quant;
150    int         max_quant;
151    int         br_tab[NUM_MB_LIMITS];
152    int         quant;
153    int         quant_chroma;
154} prores_profile_info[6] = {
155    {
156        .full_name = "proxy",
157        .tag       = MKTAG('a', 'p', 'c', 'o'),
158        .min_quant = 4,
159        .max_quant = 8,
160        .br_tab    = { 300, 242, 220, 194 },
161        .quant     = QUANT_MAT_PROXY,
162        .quant_chroma = QUANT_MAT_PROXY_CHROMA,
163    },
164    {
165        .full_name = "LT",
166        .tag       = MKTAG('a', 'p', 'c', 's'),
167        .min_quant = 1,
168        .max_quant = 9,
169        .br_tab    = { 720, 560, 490, 440 },
170        .quant     = QUANT_MAT_LT,
171        .quant_chroma = QUANT_MAT_LT,
172    },
173    {
174        .full_name = "standard",
175        .tag       = MKTAG('a', 'p', 'c', 'n'),
176        .min_quant = 1,
177        .max_quant = 6,
178        .br_tab    = { 1050, 808, 710, 632 },
179        .quant     = QUANT_MAT_STANDARD,
180        .quant_chroma = QUANT_MAT_STANDARD,
181    },
182    {
183        .full_name = "high quality",
184        .tag       = MKTAG('a', 'p', 'c', 'h'),
185        .min_quant = 1,
186        .max_quant = 6,
187        .br_tab    = { 1566, 1216, 1070, 950 },
188        .quant     = QUANT_MAT_HQ,
189        .quant_chroma = QUANT_MAT_HQ,
190    },
191    {
192        .full_name = "4444",
193        .tag       = MKTAG('a', 'p', '4', 'h'),
194        .min_quant = 1,
195        .max_quant = 6,
196        .br_tab    = { 2350, 1828, 1600, 1425 },
197        .quant     = QUANT_MAT_HQ,
198        .quant_chroma = QUANT_MAT_HQ,
199    },
200    {
201        .full_name = "4444XQ",
202        .tag       = MKTAG('a', 'p', '4', 'x'),
203        .min_quant = 1,
204        .max_quant = 6,
205        .br_tab    = { 3525, 2742, 2400, 2137 },
206        .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
207        .quant_chroma = QUANT_MAT_HQ,
208    }
209};
210
211#define TRELLIS_WIDTH 16
212#define SCORE_LIMIT   INT_MAX / 2
213
214struct TrellisNode {
215    int prev_node;
216    int quant;
217    int bits;
218    int score;
219};
220
221#define MAX_STORED_Q 16
222
223typedef struct ProresThreadData {
224    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
225    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
226    int16_t custom_q[64];
227    int16_t custom_chroma_q[64];
228    struct TrellisNode *nodes;
229} ProresThreadData;
230
231typedef struct ProresContext {
232    AVClass *class;
233    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
234    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
235    int16_t quants[MAX_STORED_Q][64];
236    int16_t quants_chroma[MAX_STORED_Q][64];
237    int16_t custom_q[64];
238    int16_t custom_chroma_q[64];
239    const uint8_t *quant_mat;
240    const uint8_t *quant_chroma_mat;
241    const uint8_t *scantable;
242
243    void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
244                 ptrdiff_t linesize, int16_t *block);
245    FDCTDSPContext fdsp;
246
247    const AVFrame *pic;
248    int mb_width, mb_height;
249    int mbs_per_slice;
250    int num_chroma_blocks, chroma_factor;
251    int slices_width;
252    int slices_per_picture;
253    int pictures_per_frame; // 1 for progressive, 2 for interlaced
254    int cur_picture_idx;
255    int num_planes;
256    int bits_per_mb;
257    int force_quant;
258    int alpha_bits;
259    int warn;
260
261    char *vendor;
262    int quant_sel;
263
264    int frame_size_upper_bound;
265
266    int profile;
267    const struct prores_profile *profile_info;
268
269    int *slice_q;
270
271    ProresThreadData *tdata;
272} ProresContext;
273
274static void get_slice_data(ProresContext *ctx, const uint16_t *src,
275                           ptrdiff_t linesize, int x, int y, int w, int h,
276                           int16_t *blocks, uint16_t *emu_buf,
277                           int mbs_per_slice, int blocks_per_mb, int is_chroma)
278{
279    const uint16_t *esrc;
280    const int mb_width = 4 * blocks_per_mb;
281    ptrdiff_t elinesize;
282    int i, j, k;
283
284    for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
285        if (x >= w) {
286            memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
287                              * sizeof(*blocks));
288            return;
289        }
290        if (x + mb_width <= w && y + 16 <= h) {
291            esrc      = src;
292            elinesize = linesize;
293        } else {
294            int bw, bh, pix;
295
296            esrc      = emu_buf;
297            elinesize = 16 * sizeof(*emu_buf);
298
299            bw = FFMIN(w - x, mb_width);
300            bh = FFMIN(h - y, 16);
301
302            for (j = 0; j < bh; j++) {
303                memcpy(emu_buf + j * 16,
304                       (const uint8_t*)src + j * linesize,
305                       bw * sizeof(*src));
306                pix = emu_buf[j * 16 + bw - 1];
307                for (k = bw; k < mb_width; k++)
308                    emu_buf[j * 16 + k] = pix;
309            }
310            for (; j < 16; j++)
311                memcpy(emu_buf + j * 16,
312                       emu_buf + (bh - 1) * 16,
313                       mb_width * sizeof(*emu_buf));
314        }
315        if (!is_chroma) {
316            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
317            blocks += 64;
318            if (blocks_per_mb > 2) {
319                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
320                blocks += 64;
321            }
322            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
323            blocks += 64;
324            if (blocks_per_mb > 2) {
325                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
326                blocks += 64;
327            }
328        } else {
329            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
330            blocks += 64;
331            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
332            blocks += 64;
333            if (blocks_per_mb > 2) {
334                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
335                blocks += 64;
336                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
337                blocks += 64;
338            }
339        }
340
341        x += mb_width;
342    }
343}
344
345static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
346                           ptrdiff_t linesize, int x, int y, int w, int h,
347                           int16_t *blocks, int mbs_per_slice, int abits)
348{
349    const int slice_width = 16 * mbs_per_slice;
350    int i, j, copy_w, copy_h;
351
352    copy_w = FFMIN(w - x, slice_width);
353    copy_h = FFMIN(h - y, 16);
354    for (i = 0; i < copy_h; i++) {
355        memcpy(blocks, src, copy_w * sizeof(*src));
356        if (abits == 8)
357            for (j = 0; j < copy_w; j++)
358                blocks[j] >>= 2;
359        else
360            for (j = 0; j < copy_w; j++)
361                blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
362        for (j = copy_w; j < slice_width; j++)
363            blocks[j] = blocks[copy_w - 1];
364        blocks += slice_width;
365        src    += linesize >> 1;
366    }
367    for (; i < 16; i++) {
368        memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
369        blocks += slice_width;
370    }
371}
372
373/**
374 * Write an unsigned rice/exp golomb codeword.
375 */
376static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
377{
378    unsigned int rice_order, exp_order, switch_bits, switch_val;
379    int exponent;
380
381    /* number of prefix bits to switch between Rice and expGolomb */
382    switch_bits = (codebook & 3) + 1;
383    rice_order  =  codebook >> 5;       /* rice code order */
384    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
385
386    switch_val  = switch_bits << rice_order;
387
388    if (val >= switch_val) {
389        val -= switch_val - (1 << exp_order);
390        exponent = av_log2(val);
391
392        put_bits(pb, exponent - exp_order + switch_bits, 0);
393        put_bits(pb, exponent + 1, val);
394    } else {
395        exponent = val >> rice_order;
396
397        if (exponent)
398            put_bits(pb, exponent, 0);
399        put_bits(pb, 1, 1);
400        if (rice_order)
401            put_sbits(pb, rice_order, val);
402    }
403}
404
405#define GET_SIGN(x)  ((x) >> 31)
406#define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
407
408static void encode_dcs(PutBitContext *pb, int16_t *blocks,
409                       int blocks_per_slice, int scale)
410{
411    int i;
412    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
413
414    prev_dc = (blocks[0] - 0x4000) / scale;
415    encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
416    sign     = 0;
417    codebook = 3;
418    blocks  += 64;
419
420    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
421        dc       = (blocks[0] - 0x4000) / scale;
422        delta    = dc - prev_dc;
423        new_sign = GET_SIGN(delta);
424        delta    = (delta ^ sign) - sign;
425        code     = MAKE_CODE(delta);
426        encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
427        codebook = (code + (code & 1)) >> 1;
428        codebook = FFMIN(codebook, 3);
429        sign     = new_sign;
430        prev_dc  = dc;
431    }
432}
433
434static void encode_acs(PutBitContext *pb, int16_t *blocks,
435                       int blocks_per_slice,
436                       int plane_size_factor,
437                       const uint8_t *scan, const int16_t *qmat)
438{
439    int idx, i;
440    int run, level, run_cb, lev_cb;
441    int max_coeffs, abs_level;
442
443    max_coeffs = blocks_per_slice << 6;
444    run_cb     = ff_prores_run_to_cb_index[4];
445    lev_cb     = ff_prores_lev_to_cb_index[2];
446    run        = 0;
447
448    for (i = 1; i < 64; i++) {
449        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
450            level = blocks[idx] / qmat[scan[i]];
451            if (level) {
452                abs_level = FFABS(level);
453                encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
454                encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
455                                    abs_level - 1);
456                put_sbits(pb, 1, GET_SIGN(level));
457
458                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
459                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
460                run    = 0;
461            } else {
462                run++;
463            }
464        }
465    }
466}
467
468static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
469                              const uint16_t *src, ptrdiff_t linesize,
470                              int mbs_per_slice, int16_t *blocks,
471                              int blocks_per_mb, int plane_size_factor,
472                              const int16_t *qmat)
473{
474    int blocks_per_slice = mbs_per_slice * blocks_per_mb;
475
476    encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
477    encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
478               ctx->scantable, qmat);
479}
480
481static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
482{
483    const int dbits = (abits == 8) ? 4 : 7;
484    const int dsize = 1 << dbits - 1;
485    int diff = cur - prev;
486
487    diff = av_mod_uintp2(diff, abits);
488    if (diff >= (1 << abits) - dsize)
489        diff -= 1 << abits;
490    if (diff < -dsize || diff > dsize || !diff) {
491        put_bits(pb, 1, 1);
492        put_bits(pb, abits, diff);
493    } else {
494        put_bits(pb, 1, 0);
495        put_bits(pb, dbits - 1, FFABS(diff) - 1);
496        put_bits(pb, 1, diff < 0);
497    }
498}
499
500static void put_alpha_run(PutBitContext *pb, int run)
501{
502    if (run) {
503        put_bits(pb, 1, 0);
504        if (run < 0x10)
505            put_bits(pb, 4, run);
506        else
507            put_bits(pb, 15, run);
508    } else {
509        put_bits(pb, 1, 1);
510    }
511}
512
513// todo alpha quantisation for high quants
514static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
515                              int mbs_per_slice, uint16_t *blocks,
516                              int quant)
517{
518    const int abits = ctx->alpha_bits;
519    const int mask  = (1 << abits) - 1;
520    const int num_coeffs = mbs_per_slice * 256;
521    int prev = mask, cur;
522    int idx = 0;
523    int run = 0;
524
525    cur = blocks[idx++];
526    put_alpha_diff(pb, cur, prev, abits);
527    prev = cur;
528    do {
529        cur = blocks[idx++];
530        if (cur != prev) {
531            put_alpha_run (pb, run);
532            put_alpha_diff(pb, cur, prev, abits);
533            prev = cur;
534            run  = 0;
535        } else {
536            run++;
537        }
538    } while (idx < num_coeffs);
539    if (run)
540        put_alpha_run(pb, run);
541}
542
543static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
544                        PutBitContext *pb,
545                        int sizes[4], int x, int y, int quant,
546                        int mbs_per_slice)
547{
548    ProresContext *ctx = avctx->priv_data;
549    int i, xp, yp;
550    int total_size = 0;
551    const uint16_t *src;
552    int slice_width_factor = av_log2(mbs_per_slice);
553    int num_cblocks, pwidth, line_add;
554    ptrdiff_t linesize;
555    int plane_factor, is_chroma;
556    uint16_t *qmat;
557    uint16_t *qmat_chroma;
558
559    if (ctx->pictures_per_frame == 1)
560        line_add = 0;
561    else
562        line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
563
564    if (ctx->force_quant) {
565        qmat = ctx->quants[0];
566        qmat_chroma = ctx->quants_chroma[0];
567    } else if (quant < MAX_STORED_Q) {
568        qmat = ctx->quants[quant];
569        qmat_chroma = ctx->quants_chroma[quant];
570    } else {
571        qmat = ctx->custom_q;
572        qmat_chroma = ctx->custom_chroma_q;
573        for (i = 0; i < 64; i++) {
574            qmat[i] = ctx->quant_mat[i] * quant;
575            qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
576        }
577    }
578
579    for (i = 0; i < ctx->num_planes; i++) {
580        is_chroma    = (i == 1 || i == 2);
581        plane_factor = slice_width_factor + 2;
582        if (is_chroma)
583            plane_factor += ctx->chroma_factor - 3;
584        if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
585            xp          = x << 4;
586            yp          = y << 4;
587            num_cblocks = 4;
588            pwidth      = avctx->width;
589        } else {
590            xp          = x << 3;
591            yp          = y << 4;
592            num_cblocks = 2;
593            pwidth      = avctx->width >> 1;
594        }
595
596        linesize = pic->linesize[i] * ctx->pictures_per_frame;
597        src = (const uint16_t*)(pic->data[i] + yp * linesize +
598                                line_add * pic->linesize[i]) + xp;
599
600        if (i < 3) {
601            get_slice_data(ctx, src, linesize, xp, yp,
602                           pwidth, avctx->height / ctx->pictures_per_frame,
603                           ctx->blocks[0], ctx->emu_buf,
604                           mbs_per_slice, num_cblocks, is_chroma);
605            if (!is_chroma) {/* luma quant */
606                encode_slice_plane(ctx, pb, src, linesize,
607                                   mbs_per_slice, ctx->blocks[0],
608                                   num_cblocks, plane_factor, qmat);
609            } else { /* chroma plane */
610                encode_slice_plane(ctx, pb, src, linesize,
611                                   mbs_per_slice, ctx->blocks[0],
612                                   num_cblocks, plane_factor, qmat_chroma);
613            }
614        } else {
615            get_alpha_data(ctx, src, linesize, xp, yp,
616                           pwidth, avctx->height / ctx->pictures_per_frame,
617                           ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
618            encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
619        }
620        flush_put_bits(pb);
621        sizes[i]   = put_bytes_output(pb) - total_size;
622        total_size = put_bytes_output(pb);
623    }
624    return total_size;
625}
626
627static inline int estimate_vlc(unsigned codebook, int val)
628{
629    unsigned int rice_order, exp_order, switch_bits, switch_val;
630    int exponent;
631
632    /* number of prefix bits to switch between Rice and expGolomb */
633    switch_bits = (codebook & 3) + 1;
634    rice_order  =  codebook >> 5;       /* rice code order */
635    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
636
637    switch_val  = switch_bits << rice_order;
638
639    if (val >= switch_val) {
640        val -= switch_val - (1 << exp_order);
641        exponent = av_log2(val);
642
643        return exponent * 2 - exp_order + switch_bits + 1;
644    } else {
645        return (val >> rice_order) + rice_order + 1;
646    }
647}
648
649static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
650                        int scale)
651{
652    int i;
653    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
654    int bits;
655
656    prev_dc  = (blocks[0] - 0x4000) / scale;
657    bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
658    sign     = 0;
659    codebook = 3;
660    blocks  += 64;
661    *error  += FFABS(blocks[0] - 0x4000) % scale;
662
663    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
664        dc       = (blocks[0] - 0x4000) / scale;
665        *error  += FFABS(blocks[0] - 0x4000) % scale;
666        delta    = dc - prev_dc;
667        new_sign = GET_SIGN(delta);
668        delta    = (delta ^ sign) - sign;
669        code     = MAKE_CODE(delta);
670        bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
671        codebook = (code + (code & 1)) >> 1;
672        codebook = FFMIN(codebook, 3);
673        sign     = new_sign;
674        prev_dc  = dc;
675    }
676
677    return bits;
678}
679
680static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
681                        int plane_size_factor,
682                        const uint8_t *scan, const int16_t *qmat)
683{
684    int idx, i;
685    int run, level, run_cb, lev_cb;
686    int max_coeffs, abs_level;
687    int bits = 0;
688
689    max_coeffs = blocks_per_slice << 6;
690    run_cb     = ff_prores_run_to_cb_index[4];
691    lev_cb     = ff_prores_lev_to_cb_index[2];
692    run        = 0;
693
694    for (i = 1; i < 64; i++) {
695        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
696            level   = blocks[idx] / qmat[scan[i]];
697            *error += FFABS(blocks[idx]) % qmat[scan[i]];
698            if (level) {
699                abs_level = FFABS(level);
700                bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
701                bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
702                                     abs_level - 1) + 1;
703
704                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
705                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
706                run    = 0;
707            } else {
708                run++;
709            }
710        }
711    }
712
713    return bits;
714}
715
716static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
717                                const uint16_t *src, ptrdiff_t linesize,
718                                int mbs_per_slice,
719                                int blocks_per_mb, int plane_size_factor,
720                                const int16_t *qmat, ProresThreadData *td)
721{
722    int blocks_per_slice;
723    int bits;
724
725    blocks_per_slice = mbs_per_slice * blocks_per_mb;
726
727    bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
728    bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
729                         plane_size_factor, ctx->scantable, qmat);
730
731    return FFALIGN(bits, 8);
732}
733
734static int est_alpha_diff(int cur, int prev, int abits)
735{
736    const int dbits = (abits == 8) ? 4 : 7;
737    const int dsize = 1 << dbits - 1;
738    int diff = cur - prev;
739
740    diff = av_mod_uintp2(diff, abits);
741    if (diff >= (1 << abits) - dsize)
742        diff -= 1 << abits;
743    if (diff < -dsize || diff > dsize || !diff)
744        return abits + 1;
745    else
746        return dbits + 1;
747}
748
749static int estimate_alpha_plane(ProresContext *ctx,
750                                const uint16_t *src, ptrdiff_t linesize,
751                                int mbs_per_slice, int16_t *blocks)
752{
753    const int abits = ctx->alpha_bits;
754    const int mask  = (1 << abits) - 1;
755    const int num_coeffs = mbs_per_slice * 256;
756    int prev = mask, cur;
757    int idx = 0;
758    int run = 0;
759    int bits;
760
761    cur = blocks[idx++];
762    bits = est_alpha_diff(cur, prev, abits);
763    prev = cur;
764    do {
765        cur = blocks[idx++];
766        if (cur != prev) {
767            if (!run)
768                bits++;
769            else if (run < 0x10)
770                bits += 4;
771            else
772                bits += 15;
773            bits += est_alpha_diff(cur, prev, abits);
774            prev = cur;
775            run  = 0;
776        } else {
777            run++;
778        }
779    } while (idx < num_coeffs);
780
781    if (run) {
782        if (run < 0x10)
783            bits += 4;
784        else
785            bits += 15;
786    }
787
788    return bits;
789}
790
791static int find_slice_quant(AVCodecContext *avctx,
792                            int trellis_node, int x, int y, int mbs_per_slice,
793                            ProresThreadData *td)
794{
795    ProresContext *ctx = avctx->priv_data;
796    int i, q, pq, xp, yp;
797    const uint16_t *src;
798    int slice_width_factor = av_log2(mbs_per_slice);
799    int num_cblocks[MAX_PLANES], pwidth;
800    int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
801    const int min_quant = ctx->profile_info->min_quant;
802    const int max_quant = ctx->profile_info->max_quant;
803    int error, bits, bits_limit;
804    int mbs, prev, cur, new_score;
805    int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
806    int overquant;
807    uint16_t *qmat;
808    uint16_t *qmat_chroma;
809    int linesize[4], line_add;
810    int alpha_bits = 0;
811
812    if (ctx->pictures_per_frame == 1)
813        line_add = 0;
814    else
815        line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
816    mbs = x + mbs_per_slice;
817
818    for (i = 0; i < ctx->num_planes; i++) {
819        is_chroma[i]    = (i == 1 || i == 2);
820        plane_factor[i] = slice_width_factor + 2;
821        if (is_chroma[i])
822            plane_factor[i] += ctx->chroma_factor - 3;
823        if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
824            xp             = x << 4;
825            yp             = y << 4;
826            num_cblocks[i] = 4;
827            pwidth         = avctx->width;
828        } else {
829            xp             = x << 3;
830            yp             = y << 4;
831            num_cblocks[i] = 2;
832            pwidth         = avctx->width >> 1;
833        }
834
835        linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
836        src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
837                                 line_add * ctx->pic->linesize[i]) + xp;
838
839        if (i < 3) {
840            get_slice_data(ctx, src, linesize[i], xp, yp,
841                           pwidth, avctx->height / ctx->pictures_per_frame,
842                           td->blocks[i], td->emu_buf,
843                           mbs_per_slice, num_cblocks[i], is_chroma[i]);
844        } else {
845            get_alpha_data(ctx, src, linesize[i], xp, yp,
846                           pwidth, avctx->height / ctx->pictures_per_frame,
847                           td->blocks[i], mbs_per_slice, ctx->alpha_bits);
848        }
849    }
850
851    for (q = min_quant; q < max_quant + 2; q++) {
852        td->nodes[trellis_node + q].prev_node = -1;
853        td->nodes[trellis_node + q].quant     = q;
854    }
855
856    if (ctx->alpha_bits)
857        alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
858                                          mbs_per_slice, td->blocks[3]);
859    // todo: maybe perform coarser quantising to fit into frame size when needed
860    for (q = min_quant; q <= max_quant; q++) {
861        bits  = alpha_bits;
862        error = 0;
863        bits += estimate_slice_plane(ctx, &error, 0,
864                                     src, linesize[0],
865                                     mbs_per_slice,
866                                     num_cblocks[0], plane_factor[0],
867                                     ctx->quants[q], td); /* estimate luma plane */
868        for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
869            bits += estimate_slice_plane(ctx, &error, i,
870                                         src, linesize[i],
871                                         mbs_per_slice,
872                                         num_cblocks[i], plane_factor[i],
873                                         ctx->quants_chroma[q], td);
874        }
875        if (bits > 65000 * 8)
876            error = SCORE_LIMIT;
877
878        slice_bits[q]  = bits;
879        slice_score[q] = error;
880    }
881    if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
882        slice_bits[max_quant + 1]  = slice_bits[max_quant];
883        slice_score[max_quant + 1] = slice_score[max_quant] + 1;
884        overquant = max_quant;
885    } else {
886        for (q = max_quant + 1; q < 128; q++) {
887            bits  = alpha_bits;
888            error = 0;
889            if (q < MAX_STORED_Q) {
890                qmat = ctx->quants[q];
891                qmat_chroma = ctx->quants_chroma[q];
892            } else {
893                qmat = td->custom_q;
894                qmat_chroma = td->custom_chroma_q;
895                for (i = 0; i < 64; i++) {
896                    qmat[i] = ctx->quant_mat[i] * q;
897                    qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
898                }
899            }
900            bits += estimate_slice_plane(ctx, &error, 0,
901                                         src, linesize[0],
902                                         mbs_per_slice,
903                                         num_cblocks[0], plane_factor[0],
904                                         qmat, td);/* estimate luma plane */
905            for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
906                bits += estimate_slice_plane(ctx, &error, i,
907                                             src, linesize[i],
908                                             mbs_per_slice,
909                                             num_cblocks[i], plane_factor[i],
910                                             qmat_chroma, td);
911            }
912            if (bits <= ctx->bits_per_mb * mbs_per_slice)
913                break;
914        }
915
916        slice_bits[max_quant + 1]  = bits;
917        slice_score[max_quant + 1] = error;
918        overquant = q;
919    }
920    td->nodes[trellis_node + max_quant + 1].quant = overquant;
921
922    bits_limit = mbs * ctx->bits_per_mb;
923    for (pq = min_quant; pq < max_quant + 2; pq++) {
924        prev = trellis_node - TRELLIS_WIDTH + pq;
925
926        for (q = min_quant; q < max_quant + 2; q++) {
927            cur = trellis_node + q;
928
929            bits  = td->nodes[prev].bits + slice_bits[q];
930            error = slice_score[q];
931            if (bits > bits_limit)
932                error = SCORE_LIMIT;
933
934            if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
935                new_score = td->nodes[prev].score + error;
936            else
937                new_score = SCORE_LIMIT;
938            if (td->nodes[cur].prev_node == -1 ||
939                td->nodes[cur].score >= new_score) {
940
941                td->nodes[cur].bits      = bits;
942                td->nodes[cur].score     = new_score;
943                td->nodes[cur].prev_node = prev;
944            }
945        }
946    }
947
948    error = td->nodes[trellis_node + min_quant].score;
949    pq    = trellis_node + min_quant;
950    for (q = min_quant + 1; q < max_quant + 2; q++) {
951        if (td->nodes[trellis_node + q].score <= error) {
952            error = td->nodes[trellis_node + q].score;
953            pq    = trellis_node + q;
954        }
955    }
956
957    return pq;
958}
959
960static int find_quant_thread(AVCodecContext *avctx, void *arg,
961                             int jobnr, int threadnr)
962{
963    ProresContext *ctx = avctx->priv_data;
964    ProresThreadData *td = ctx->tdata + threadnr;
965    int mbs_per_slice = ctx->mbs_per_slice;
966    int x, y = jobnr, mb, q = 0;
967
968    for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
969        while (ctx->mb_width - x < mbs_per_slice)
970            mbs_per_slice >>= 1;
971        q = find_slice_quant(avctx,
972                             (mb + 1) * TRELLIS_WIDTH, x, y,
973                             mbs_per_slice, td);
974    }
975
976    for (x = ctx->slices_width - 1; x >= 0; x--) {
977        ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
978        q = td->nodes[q].prev_node;
979    }
980
981    return 0;
982}
983
984static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
985                        const AVFrame *pic, int *got_packet)
986{
987    ProresContext *ctx = avctx->priv_data;
988    uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
989    uint8_t *picture_size_pos;
990    PutBitContext pb;
991    int x, y, i, mb, q = 0;
992    int sizes[4] = { 0 };
993    int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
994    int frame_size, picture_size, slice_size;
995    int pkt_size, ret;
996    int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
997    uint8_t frame_flags;
998
999    ctx->pic = pic;
1000    pkt_size = ctx->frame_size_upper_bound;
1001
1002    if ((ret = ff_alloc_packet(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE)) < 0)
1003        return ret;
1004
1005    orig_buf = pkt->data;
1006
1007    // frame atom
1008    orig_buf += 4;                              // frame size
1009    bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
1010    buf = orig_buf;
1011
1012    // frame header
1013    tmp = buf;
1014    buf += 2;                                   // frame header size will be stored here
1015    bytestream_put_be16  (&buf, 0);             // version 1
1016    bytestream_put_buffer(&buf, ctx->vendor, 4);
1017    bytestream_put_be16  (&buf, avctx->width);
1018    bytestream_put_be16  (&buf, avctx->height);
1019
1020    frame_flags = ctx->chroma_factor << 6;
1021    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1022        frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1023    bytestream_put_byte  (&buf, frame_flags);
1024
1025    bytestream_put_byte  (&buf, 0);             // reserved
1026    bytestream_put_byte  (&buf, pic->color_primaries);
1027    bytestream_put_byte  (&buf, pic->color_trc);
1028    bytestream_put_byte  (&buf, pic->colorspace);
1029    bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
1030    bytestream_put_byte  (&buf, 0);             // reserved
1031    if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1032        bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
1033        // luma quantisation matrix
1034        for (i = 0; i < 64; i++)
1035            bytestream_put_byte(&buf, ctx->quant_mat[i]);
1036        // chroma quantisation matrix
1037        for (i = 0; i < 64; i++)
1038            bytestream_put_byte(&buf, ctx->quant_mat[i]);
1039    } else {
1040        bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
1041    }
1042    bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
1043
1044    for (ctx->cur_picture_idx = 0;
1045         ctx->cur_picture_idx < ctx->pictures_per_frame;
1046         ctx->cur_picture_idx++) {
1047        // picture header
1048        picture_size_pos = buf + 1;
1049        bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1050        buf += 4;                                   // picture data size will be stored here
1051        bytestream_put_be16  (&buf, ctx->slices_per_picture);
1052        bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1053
1054        // seek table - will be filled during slice encoding
1055        slice_sizes = buf;
1056        buf += ctx->slices_per_picture * 2;
1057
1058        // slices
1059        if (!ctx->force_quant) {
1060            ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1061                                  ctx->mb_height);
1062            if (ret)
1063                return ret;
1064        }
1065
1066        for (y = 0; y < ctx->mb_height; y++) {
1067            int mbs_per_slice = ctx->mbs_per_slice;
1068            for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1069                q = ctx->force_quant ? ctx->force_quant
1070                                     : ctx->slice_q[mb + y * ctx->slices_width];
1071
1072                while (ctx->mb_width - x < mbs_per_slice)
1073                    mbs_per_slice >>= 1;
1074
1075                bytestream_put_byte(&buf, slice_hdr_size << 3);
1076                slice_hdr = buf;
1077                buf += slice_hdr_size - 1;
1078                if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1079                    uint8_t *start = pkt->data;
1080                    // Recompute new size according to max_slice_size
1081                    // and deduce delta
1082                    int delta = 200 + (ctx->pictures_per_frame *
1083                                ctx->slices_per_picture + 1) *
1084                                max_slice_size - pkt_size;
1085
1086                    delta = FFMAX(delta, 2 * max_slice_size);
1087                    ctx->frame_size_upper_bound += delta;
1088
1089                    if (!ctx->warn) {
1090                        avpriv_request_sample(avctx,
1091                                              "Packet too small: is %i,"
1092                                              " needs %i (slice: %i). "
1093                                              "Correct allocation",
1094                                              pkt_size, delta, max_slice_size);
1095                        ctx->warn = 1;
1096                    }
1097
1098                    ret = av_grow_packet(pkt, delta);
1099                    if (ret < 0)
1100                        return ret;
1101
1102                    pkt_size += delta;
1103                    // restore pointers
1104                    orig_buf         = pkt->data + (orig_buf         - start);
1105                    buf              = pkt->data + (buf              - start);
1106                    picture_size_pos = pkt->data + (picture_size_pos - start);
1107                    slice_sizes      = pkt->data + (slice_sizes      - start);
1108                    slice_hdr        = pkt->data + (slice_hdr        - start);
1109                    tmp              = pkt->data + (tmp              - start);
1110                }
1111                init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1112                ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1113                                   mbs_per_slice);
1114                if (ret < 0)
1115                    return ret;
1116
1117                bytestream_put_byte(&slice_hdr, q);
1118                slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1119                for (i = 0; i < ctx->num_planes - 1; i++) {
1120                    bytestream_put_be16(&slice_hdr, sizes[i]);
1121                    slice_size += sizes[i];
1122                }
1123                bytestream_put_be16(&slice_sizes, slice_size);
1124                buf += slice_size - slice_hdr_size;
1125                if (max_slice_size < slice_size)
1126                    max_slice_size = slice_size;
1127            }
1128        }
1129
1130        picture_size = buf - (picture_size_pos - 1);
1131        bytestream_put_be32(&picture_size_pos, picture_size);
1132    }
1133
1134    orig_buf -= 8;
1135    frame_size = buf - orig_buf;
1136    bytestream_put_be32(&orig_buf, frame_size);
1137
1138    pkt->size   = frame_size;
1139    *got_packet = 1;
1140
1141    return 0;
1142}
1143
1144static av_cold int encode_close(AVCodecContext *avctx)
1145{
1146    ProresContext *ctx = avctx->priv_data;
1147    int i;
1148
1149    if (ctx->tdata) {
1150        for (i = 0; i < avctx->thread_count; i++)
1151            av_freep(&ctx->tdata[i].nodes);
1152    }
1153    av_freep(&ctx->tdata);
1154    av_freep(&ctx->slice_q);
1155
1156    return 0;
1157}
1158
1159static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1160                        ptrdiff_t linesize, int16_t *block)
1161{
1162    int x, y;
1163    const uint16_t *tsrc = src;
1164
1165    for (y = 0; y < 8; y++) {
1166        for (x = 0; x < 8; x++)
1167            block[y * 8 + x] = tsrc[x];
1168        tsrc += linesize >> 1;
1169    }
1170    fdsp->fdct(block);
1171}
1172
1173static av_cold int encode_init(AVCodecContext *avctx)
1174{
1175    ProresContext *ctx = avctx->priv_data;
1176    int mps;
1177    int i, j;
1178    int min_quant, max_quant;
1179    int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1180
1181    avctx->bits_per_raw_sample = 10;
1182
1183    ctx->fdct      = prores_fdct;
1184    ctx->scantable = interlaced ? ff_prores_interlaced_scan
1185                                : ff_prores_progressive_scan;
1186    ff_fdctdsp_init(&ctx->fdsp, avctx);
1187
1188    mps = ctx->mbs_per_slice;
1189    if (mps & (mps - 1)) {
1190        av_log(avctx, AV_LOG_ERROR,
1191               "there should be an integer power of two MBs per slice\n");
1192        return AVERROR(EINVAL);
1193    }
1194    if (ctx->profile == PRORES_PROFILE_AUTO) {
1195        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1196        ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1197                        !(desc->log2_chroma_w + desc->log2_chroma_h))
1198                     ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1199        av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1200               "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1201               ? "4:4:4:4 profile because of the used input colorspace"
1202               : "HQ profile to keep best quality");
1203    }
1204    if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1205        if (ctx->profile != PRORES_PROFILE_4444 &&
1206            ctx->profile != PRORES_PROFILE_4444XQ) {
1207            // force alpha and warn
1208            av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1209                   "encode alpha. Override with -profile if needed.\n");
1210            ctx->alpha_bits = 0;
1211        }
1212        if (ctx->alpha_bits & 7) {
1213            av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1214            return AVERROR(EINVAL);
1215        }
1216        avctx->bits_per_coded_sample = 32;
1217    } else {
1218        ctx->alpha_bits = 0;
1219    }
1220
1221    ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1222                         ? CFACTOR_Y422
1223                         : CFACTOR_Y444;
1224    ctx->profile_info  = prores_profile_info + ctx->profile;
1225    ctx->num_planes    = 3 + !!ctx->alpha_bits;
1226
1227    ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1228
1229    if (interlaced)
1230        ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1231    else
1232        ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1233
1234    ctx->slices_width  = ctx->mb_width / mps;
1235    ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1236    ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1237    ctx->pictures_per_frame = 1 + interlaced;
1238
1239    if (ctx->quant_sel == -1) {
1240        ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1241        ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1242    } else {
1243        ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1244        ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1245    }
1246
1247    if (strlen(ctx->vendor) != 4) {
1248        av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1249        return AVERROR_INVALIDDATA;
1250    }
1251
1252    ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1253    if (!ctx->force_quant) {
1254        if (!ctx->bits_per_mb) {
1255            for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1256                if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1257                                           ctx->pictures_per_frame)
1258                    break;
1259            ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1260            if (ctx->alpha_bits)
1261                ctx->bits_per_mb *= 20;
1262        } else if (ctx->bits_per_mb < 128) {
1263            av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1264            return AVERROR_INVALIDDATA;
1265        }
1266
1267        min_quant = ctx->profile_info->min_quant;
1268        max_quant = ctx->profile_info->max_quant;
1269        for (i = min_quant; i < MAX_STORED_Q; i++) {
1270            for (j = 0; j < 64; j++) {
1271                ctx->quants[i][j] = ctx->quant_mat[j] * i;
1272                ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1273            }
1274        }
1275
1276        ctx->slice_q = av_malloc_array(ctx->slices_per_picture, sizeof(*ctx->slice_q));
1277        if (!ctx->slice_q)
1278            return AVERROR(ENOMEM);
1279
1280        ctx->tdata = av_calloc(avctx->thread_count, sizeof(*ctx->tdata));
1281        if (!ctx->tdata)
1282            return AVERROR(ENOMEM);
1283
1284        for (j = 0; j < avctx->thread_count; j++) {
1285            ctx->tdata[j].nodes = av_malloc_array(ctx->slices_width + 1,
1286                                                  TRELLIS_WIDTH
1287                                                  * sizeof(*ctx->tdata->nodes));
1288            if (!ctx->tdata[j].nodes)
1289                return AVERROR(ENOMEM);
1290            for (i = min_quant; i < max_quant + 2; i++) {
1291                ctx->tdata[j].nodes[i].prev_node = -1;
1292                ctx->tdata[j].nodes[i].bits      = 0;
1293                ctx->tdata[j].nodes[i].score     = 0;
1294            }
1295        }
1296    } else {
1297        int ls = 0;
1298        int ls_chroma = 0;
1299
1300        if (ctx->force_quant > 64) {
1301            av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1302            return AVERROR_INVALIDDATA;
1303        }
1304
1305        for (j = 0; j < 64; j++) {
1306            ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1307            ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1308            ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1309            ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
1310        }
1311
1312        ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1313        if (ctx->chroma_factor == CFACTOR_Y444)
1314            ctx->bits_per_mb += ls_chroma * 4;
1315    }
1316
1317    ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1318                                   ctx->slices_per_picture + 1) *
1319                                  (2 + 2 * ctx->num_planes +
1320                                   (mps * ctx->bits_per_mb) / 8)
1321                                  + 200;
1322
1323    if (ctx->alpha_bits) {
1324         // The alpha plane is run-coded and might exceed the bit budget.
1325         ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1326                                         ctx->slices_per_picture + 1) *
1327         /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1328         /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1329    }
1330
1331    avctx->codec_tag   = ctx->profile_info->tag;
1332
1333    av_log(avctx, AV_LOG_DEBUG,
1334           "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1335           ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1336           interlaced ? "yes" : "no", ctx->bits_per_mb);
1337    av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1338           ctx->frame_size_upper_bound);
1339
1340    return 0;
1341}
1342
1343#define OFFSET(x) offsetof(ProresContext, x)
1344#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1345
1346static const AVOption options[] = {
1347    { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1348        AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1349    { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1350        { .i64 = PRORES_PROFILE_AUTO },
1351        PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1352    { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1353        0, 0, VE, "profile" },
1354    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1355        0, 0, VE, "profile" },
1356    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1357        0, 0, VE, "profile" },
1358    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1359        0, 0, VE, "profile" },
1360    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1361        0, 0, VE, "profile" },
1362    { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1363        0, 0, VE, "profile" },
1364    { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1365        0, 0, VE, "profile" },
1366    { "vendor", "vendor ID", OFFSET(vendor),
1367        AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1368    { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1369        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1370    { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1371        { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1372    { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1373        0, 0, VE, "quant_mat" },
1374    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1375        0, 0, VE, "quant_mat" },
1376    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1377        0, 0, VE, "quant_mat" },
1378    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1379        0, 0, VE, "quant_mat" },
1380    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1381        0, 0, VE, "quant_mat" },
1382    { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1383        0, 0, VE, "quant_mat" },
1384    { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1385        { .i64 = 16 }, 0, 16, VE },
1386    { NULL }
1387};
1388
1389static const AVClass proresenc_class = {
1390    .class_name = "ProRes encoder",
1391    .item_name  = av_default_item_name,
1392    .option     = options,
1393    .version    = LIBAVUTIL_VERSION_INT,
1394};
1395
1396const FFCodec ff_prores_ks_encoder = {
1397    .p.name         = "prores_ks",
1398    .p.long_name    = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1399    .p.type         = AVMEDIA_TYPE_VIDEO,
1400    .p.id           = AV_CODEC_ID_PRORES,
1401    .priv_data_size = sizeof(ProresContext),
1402    .init           = encode_init,
1403    .close          = encode_close,
1404    FF_CODEC_ENCODE_CB(encode_frame),
1405    .p.capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1406    .p.pix_fmts     = (const enum AVPixelFormat[]) {
1407                          AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1408                          AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1409                      },
1410    .p.priv_class   = &proresenc_class,
1411    .p.profiles     = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1412    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1413};
1414