1 /*
2  * Apple ProRes encoder
3  *
4  * Copyright (c) 2012 Konstantin Shishkov
5  *
6  * This encoder appears to be based on Anatoliy Wassermans considering
7  * similarities in the bugs.
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "avcodec.h"
30 #include "codec_internal.h"
31 #include "encode.h"
32 #include "fdctdsp.h"
33 #include "put_bits.h"
34 #include "profiles.h"
35 #include "bytestream.h"
36 #include "proresdata.h"
37 
38 #define CFACTOR_Y422 2
39 #define CFACTOR_Y444 3
40 
41 #define MAX_MBS_PER_SLICE 8
42 
43 #define MAX_PLANES 4
44 
45 enum {
46     PRORES_PROFILE_AUTO  = -1,
47     PRORES_PROFILE_PROXY = 0,
48     PRORES_PROFILE_LT,
49     PRORES_PROFILE_STANDARD,
50     PRORES_PROFILE_HQ,
51     PRORES_PROFILE_4444,
52     PRORES_PROFILE_4444XQ,
53 };
54 
55 enum {
56     QUANT_MAT_PROXY = 0,
57     QUANT_MAT_PROXY_CHROMA,
58     QUANT_MAT_LT,
59     QUANT_MAT_STANDARD,
60     QUANT_MAT_HQ,
61     QUANT_MAT_XQ_LUMA,
62     QUANT_MAT_DEFAULT,
63 };
64 
65 static const uint8_t prores_quant_matrices[][64] = {
66     { // proxy
67          4,  7,  9, 11, 13, 14, 15, 63,
68          7,  7, 11, 12, 14, 15, 63, 63,
69          9, 11, 13, 14, 15, 63, 63, 63,
70         11, 11, 13, 14, 63, 63, 63, 63,
71         11, 13, 14, 63, 63, 63, 63, 63,
72         13, 14, 63, 63, 63, 63, 63, 63,
73         13, 63, 63, 63, 63, 63, 63, 63,
74         63, 63, 63, 63, 63, 63, 63, 63,
75     },
76     { // proxy chromas
77         4,  7,  9, 11, 13, 14, 63, 63,
78         7,  7, 11, 12, 14, 63, 63, 63,
79         9, 11, 13, 14, 63, 63, 63, 63,
80         11, 11, 13, 14, 63, 63, 63, 63,
81         11, 13, 14, 63, 63, 63, 63, 63,
82         13, 14, 63, 63, 63, 63, 63, 63,
83         13, 63, 63, 63, 63, 63, 63, 63,
84         63, 63, 63, 63, 63, 63, 63, 63
85     },
86     { // LT
87          4,  5,  6,  7,  9, 11, 13, 15,
88          5,  5,  7,  8, 11, 13, 15, 17,
89          6,  7,  9, 11, 13, 15, 15, 17,
90          7,  7,  9, 11, 13, 15, 17, 19,
91          7,  9, 11, 13, 14, 16, 19, 23,
92          9, 11, 13, 14, 16, 19, 23, 29,
93          9, 11, 13, 15, 17, 21, 28, 35,
94         11, 13, 16, 17, 21, 28, 35, 41,
95     },
96     { // standard
97          4,  4,  5,  5,  6,  7,  7,  9,
98          4,  4,  5,  6,  7,  7,  9,  9,
99          5,  5,  6,  7,  7,  9,  9, 10,
100          5,  5,  6,  7,  7,  9,  9, 10,
101          5,  6,  7,  7,  8,  9, 10, 12,
102          6,  7,  7,  8,  9, 10, 12, 15,
103          6,  7,  7,  9, 10, 11, 14, 17,
104          7,  7,  9, 10, 11, 14, 17, 21,
105     },
106     { // high quality
107          4,  4,  4,  4,  4,  4,  4,  4,
108          4,  4,  4,  4,  4,  4,  4,  4,
109          4,  4,  4,  4,  4,  4,  4,  4,
110          4,  4,  4,  4,  4,  4,  4,  5,
111          4,  4,  4,  4,  4,  4,  5,  5,
112          4,  4,  4,  4,  4,  5,  5,  6,
113          4,  4,  4,  4,  5,  5,  6,  7,
114          4,  4,  4,  4,  5,  6,  7,  7,
115     },
116     { // XQ luma
117         2,  2,  2,  2,  2,  2,  2,  2,
118         2,  2,  2,  2,  2,  2,  2,  2,
119         2,  2,  2,  2,  2,  2,  2,  2,
120         2,  2,  2,  2,  2,  2,  2,  3,
121         2,  2,  2,  2,  2,  2,  3,  3,
122         2,  2,  2,  2,  2,  3,  3,  3,
123         2,  2,  2,  2,  3,  3,  3,  4,
124         2,  2,  2,  2,  3,  3,  4,  4,
125     },
126     { // codec default
127          4,  4,  4,  4,  4,  4,  4,  4,
128          4,  4,  4,  4,  4,  4,  4,  4,
129          4,  4,  4,  4,  4,  4,  4,  4,
130          4,  4,  4,  4,  4,  4,  4,  4,
131          4,  4,  4,  4,  4,  4,  4,  4,
132          4,  4,  4,  4,  4,  4,  4,  4,
133          4,  4,  4,  4,  4,  4,  4,  4,
134          4,  4,  4,  4,  4,  4,  4,  4,
135     },
136 };
137 
138 #define NUM_MB_LIMITS 4
139 static const int prores_mb_limits[NUM_MB_LIMITS] = {
140     1620, // up to 720x576
141     2700, // up to 960x720
142     6075, // up to 1440x1080
143     9216, // up to 2048x1152
144 };
145 
146 static const struct prores_profile {
147     const char *full_name;
148     uint32_t    tag;
149     int         min_quant;
150     int         max_quant;
151     int         br_tab[NUM_MB_LIMITS];
152     int         quant;
153     int         quant_chroma;
154 } prores_profile_info[6] = {
155     {
156         .full_name = "proxy",
157         .tag       = MKTAG('a', 'p', 'c', 'o'),
158         .min_quant = 4,
159         .max_quant = 8,
160         .br_tab    = { 300, 242, 220, 194 },
161         .quant     = QUANT_MAT_PROXY,
162         .quant_chroma = QUANT_MAT_PROXY_CHROMA,
163     },
164     {
165         .full_name = "LT",
166         .tag       = MKTAG('a', 'p', 'c', 's'),
167         .min_quant = 1,
168         .max_quant = 9,
169         .br_tab    = { 720, 560, 490, 440 },
170         .quant     = QUANT_MAT_LT,
171         .quant_chroma = QUANT_MAT_LT,
172     },
173     {
174         .full_name = "standard",
175         .tag       = MKTAG('a', 'p', 'c', 'n'),
176         .min_quant = 1,
177         .max_quant = 6,
178         .br_tab    = { 1050, 808, 710, 632 },
179         .quant     = QUANT_MAT_STANDARD,
180         .quant_chroma = QUANT_MAT_STANDARD,
181     },
182     {
183         .full_name = "high quality",
184         .tag       = MKTAG('a', 'p', 'c', 'h'),
185         .min_quant = 1,
186         .max_quant = 6,
187         .br_tab    = { 1566, 1216, 1070, 950 },
188         .quant     = QUANT_MAT_HQ,
189         .quant_chroma = QUANT_MAT_HQ,
190     },
191     {
192         .full_name = "4444",
193         .tag       = MKTAG('a', 'p', '4', 'h'),
194         .min_quant = 1,
195         .max_quant = 6,
196         .br_tab    = { 2350, 1828, 1600, 1425 },
197         .quant     = QUANT_MAT_HQ,
198         .quant_chroma = QUANT_MAT_HQ,
199     },
200     {
201         .full_name = "4444XQ",
202         .tag       = MKTAG('a', 'p', '4', 'x'),
203         .min_quant = 1,
204         .max_quant = 6,
205         .br_tab    = { 3525, 2742, 2400, 2137 },
206         .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
207         .quant_chroma = QUANT_MAT_HQ,
208     }
209 };
210 
211 #define TRELLIS_WIDTH 16
212 #define SCORE_LIMIT   INT_MAX / 2
213 
214 struct TrellisNode {
215     int prev_node;
216     int quant;
217     int bits;
218     int score;
219 };
220 
221 #define MAX_STORED_Q 16
222 
223 typedef struct ProresThreadData {
224     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
225     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
226     int16_t custom_q[64];
227     int16_t custom_chroma_q[64];
228     struct TrellisNode *nodes;
229 } ProresThreadData;
230 
231 typedef struct ProresContext {
232     AVClass *class;
233     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
234     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
235     int16_t quants[MAX_STORED_Q][64];
236     int16_t quants_chroma[MAX_STORED_Q][64];
237     int16_t custom_q[64];
238     int16_t custom_chroma_q[64];
239     const uint8_t *quant_mat;
240     const uint8_t *quant_chroma_mat;
241     const uint8_t *scantable;
242 
243     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
244                  ptrdiff_t linesize, int16_t *block);
245     FDCTDSPContext fdsp;
246 
247     const AVFrame *pic;
248     int mb_width, mb_height;
249     int mbs_per_slice;
250     int num_chroma_blocks, chroma_factor;
251     int slices_width;
252     int slices_per_picture;
253     int pictures_per_frame; // 1 for progressive, 2 for interlaced
254     int cur_picture_idx;
255     int num_planes;
256     int bits_per_mb;
257     int force_quant;
258     int alpha_bits;
259     int warn;
260 
261     char *vendor;
262     int quant_sel;
263 
264     int frame_size_upper_bound;
265 
266     int profile;
267     const struct prores_profile *profile_info;
268 
269     int *slice_q;
270 
271     ProresThreadData *tdata;
272 } ProresContext;
273 
get_slice_data(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int x, int y, int w, int h, int16_t *blocks, uint16_t *emu_buf, int mbs_per_slice, int blocks_per_mb, int is_chroma)274 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
275                            ptrdiff_t linesize, int x, int y, int w, int h,
276                            int16_t *blocks, uint16_t *emu_buf,
277                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
278 {
279     const uint16_t *esrc;
280     const int mb_width = 4 * blocks_per_mb;
281     ptrdiff_t elinesize;
282     int i, j, k;
283 
284     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
285         if (x >= w) {
286             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
287                               * sizeof(*blocks));
288             return;
289         }
290         if (x + mb_width <= w && y + 16 <= h) {
291             esrc      = src;
292             elinesize = linesize;
293         } else {
294             int bw, bh, pix;
295 
296             esrc      = emu_buf;
297             elinesize = 16 * sizeof(*emu_buf);
298 
299             bw = FFMIN(w - x, mb_width);
300             bh = FFMIN(h - y, 16);
301 
302             for (j = 0; j < bh; j++) {
303                 memcpy(emu_buf + j * 16,
304                        (const uint8_t*)src + j * linesize,
305                        bw * sizeof(*src));
306                 pix = emu_buf[j * 16 + bw - 1];
307                 for (k = bw; k < mb_width; k++)
308                     emu_buf[j * 16 + k] = pix;
309             }
310             for (; j < 16; j++)
311                 memcpy(emu_buf + j * 16,
312                        emu_buf + (bh - 1) * 16,
313                        mb_width * sizeof(*emu_buf));
314         }
315         if (!is_chroma) {
316             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
317             blocks += 64;
318             if (blocks_per_mb > 2) {
319                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
320                 blocks += 64;
321             }
322             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
323             blocks += 64;
324             if (blocks_per_mb > 2) {
325                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
326                 blocks += 64;
327             }
328         } else {
329             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
330             blocks += 64;
331             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
332             blocks += 64;
333             if (blocks_per_mb > 2) {
334                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
335                 blocks += 64;
336                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
337                 blocks += 64;
338             }
339         }
340 
341         x += mb_width;
342     }
343 }
344 
get_alpha_data(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int x, int y, int w, int h, int16_t *blocks, int mbs_per_slice, int abits)345 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
346                            ptrdiff_t linesize, int x, int y, int w, int h,
347                            int16_t *blocks, int mbs_per_slice, int abits)
348 {
349     const int slice_width = 16 * mbs_per_slice;
350     int i, j, copy_w, copy_h;
351 
352     copy_w = FFMIN(w - x, slice_width);
353     copy_h = FFMIN(h - y, 16);
354     for (i = 0; i < copy_h; i++) {
355         memcpy(blocks, src, copy_w * sizeof(*src));
356         if (abits == 8)
357             for (j = 0; j < copy_w; j++)
358                 blocks[j] >>= 2;
359         else
360             for (j = 0; j < copy_w; j++)
361                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
362         for (j = copy_w; j < slice_width; j++)
363             blocks[j] = blocks[copy_w - 1];
364         blocks += slice_width;
365         src    += linesize >> 1;
366     }
367     for (; i < 16; i++) {
368         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
369         blocks += slice_width;
370     }
371 }
372 
373 /**
374  * Write an unsigned rice/exp golomb codeword.
375  */
encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)376 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
377 {
378     unsigned int rice_order, exp_order, switch_bits, switch_val;
379     int exponent;
380 
381     /* number of prefix bits to switch between Rice and expGolomb */
382     switch_bits = (codebook & 3) + 1;
383     rice_order  =  codebook >> 5;       /* rice code order */
384     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
385 
386     switch_val  = switch_bits << rice_order;
387 
388     if (val >= switch_val) {
389         val -= switch_val - (1 << exp_order);
390         exponent = av_log2(val);
391 
392         put_bits(pb, exponent - exp_order + switch_bits, 0);
393         put_bits(pb, exponent + 1, val);
394     } else {
395         exponent = val >> rice_order;
396 
397         if (exponent)
398             put_bits(pb, exponent, 0);
399         put_bits(pb, 1, 1);
400         if (rice_order)
401             put_sbits(pb, rice_order, val);
402     }
403 }
404 
405 #define GET_SIGN(x)  ((x) >> 31)
406 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
407 
encode_dcs(PutBitContext *pb, int16_t *blocks, int blocks_per_slice, int scale)408 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
409                        int blocks_per_slice, int scale)
410 {
411     int i;
412     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
413 
414     prev_dc = (blocks[0] - 0x4000) / scale;
415     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
416     sign     = 0;
417     codebook = 3;
418     blocks  += 64;
419 
420     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
421         dc       = (blocks[0] - 0x4000) / scale;
422         delta    = dc - prev_dc;
423         new_sign = GET_SIGN(delta);
424         delta    = (delta ^ sign) - sign;
425         code     = MAKE_CODE(delta);
426         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
427         codebook = (code + (code & 1)) >> 1;
428         codebook = FFMIN(codebook, 3);
429         sign     = new_sign;
430         prev_dc  = dc;
431     }
432 }
433 
encode_acs(PutBitContext *pb, int16_t *blocks, int blocks_per_slice, int plane_size_factor, const uint8_t *scan, const int16_t *qmat)434 static void encode_acs(PutBitContext *pb, int16_t *blocks,
435                        int blocks_per_slice,
436                        int plane_size_factor,
437                        const uint8_t *scan, const int16_t *qmat)
438 {
439     int idx, i;
440     int run, level, run_cb, lev_cb;
441     int max_coeffs, abs_level;
442 
443     max_coeffs = blocks_per_slice << 6;
444     run_cb     = ff_prores_run_to_cb_index[4];
445     lev_cb     = ff_prores_lev_to_cb_index[2];
446     run        = 0;
447 
448     for (i = 1; i < 64; i++) {
449         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
450             level = blocks[idx] / qmat[scan[i]];
451             if (level) {
452                 abs_level = FFABS(level);
453                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
454                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
455                                     abs_level - 1);
456                 put_sbits(pb, 1, GET_SIGN(level));
457 
458                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
459                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
460                 run    = 0;
461             } else {
462                 run++;
463             }
464         }
465     }
466 }
467 
encode_slice_plane(ProresContext *ctx, PutBitContext *pb, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int16_t *blocks, int blocks_per_mb, int plane_size_factor, const int16_t *qmat)468 static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
469                               const uint16_t *src, ptrdiff_t linesize,
470                               int mbs_per_slice, int16_t *blocks,
471                               int blocks_per_mb, int plane_size_factor,
472                               const int16_t *qmat)
473 {
474     int blocks_per_slice = mbs_per_slice * blocks_per_mb;
475 
476     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
477     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
478                ctx->scantable, qmat);
479 }
480 
put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)481 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
482 {
483     const int dbits = (abits == 8) ? 4 : 7;
484     const int dsize = 1 << dbits - 1;
485     int diff = cur - prev;
486 
487     diff = av_mod_uintp2(diff, abits);
488     if (diff >= (1 << abits) - dsize)
489         diff -= 1 << abits;
490     if (diff < -dsize || diff > dsize || !diff) {
491         put_bits(pb, 1, 1);
492         put_bits(pb, abits, diff);
493     } else {
494         put_bits(pb, 1, 0);
495         put_bits(pb, dbits - 1, FFABS(diff) - 1);
496         put_bits(pb, 1, diff < 0);
497     }
498 }
499 
put_alpha_run(PutBitContext *pb, int run)500 static void put_alpha_run(PutBitContext *pb, int run)
501 {
502     if (run) {
503         put_bits(pb, 1, 0);
504         if (run < 0x10)
505             put_bits(pb, 4, run);
506         else
507             put_bits(pb, 15, run);
508     } else {
509         put_bits(pb, 1, 1);
510     }
511 }
512 
513 // todo alpha quantisation for high quants
encode_alpha_plane(ProresContext *ctx, PutBitContext *pb, int mbs_per_slice, uint16_t *blocks, int quant)514 static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
515                               int mbs_per_slice, uint16_t *blocks,
516                               int quant)
517 {
518     const int abits = ctx->alpha_bits;
519     const int mask  = (1 << abits) - 1;
520     const int num_coeffs = mbs_per_slice * 256;
521     int prev = mask, cur;
522     int idx = 0;
523     int run = 0;
524 
525     cur = blocks[idx++];
526     put_alpha_diff(pb, cur, prev, abits);
527     prev = cur;
528     do {
529         cur = blocks[idx++];
530         if (cur != prev) {
531             put_alpha_run (pb, run);
532             put_alpha_diff(pb, cur, prev, abits);
533             prev = cur;
534             run  = 0;
535         } else {
536             run++;
537         }
538     } while (idx < num_coeffs);
539     if (run)
540         put_alpha_run(pb, run);
541 }
542 
encode_slice(AVCodecContext *avctx, const AVFrame *pic, PutBitContext *pb, int sizes[4], int x, int y, int quant, int mbs_per_slice)543 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
544                         PutBitContext *pb,
545                         int sizes[4], int x, int y, int quant,
546                         int mbs_per_slice)
547 {
548     ProresContext *ctx = avctx->priv_data;
549     int i, xp, yp;
550     int total_size = 0;
551     const uint16_t *src;
552     int slice_width_factor = av_log2(mbs_per_slice);
553     int num_cblocks, pwidth, line_add;
554     ptrdiff_t linesize;
555     int plane_factor, is_chroma;
556     uint16_t *qmat;
557     uint16_t *qmat_chroma;
558 
559     if (ctx->pictures_per_frame == 1)
560         line_add = 0;
561     else
562         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
563 
564     if (ctx->force_quant) {
565         qmat = ctx->quants[0];
566         qmat_chroma = ctx->quants_chroma[0];
567     } else if (quant < MAX_STORED_Q) {
568         qmat = ctx->quants[quant];
569         qmat_chroma = ctx->quants_chroma[quant];
570     } else {
571         qmat = ctx->custom_q;
572         qmat_chroma = ctx->custom_chroma_q;
573         for (i = 0; i < 64; i++) {
574             qmat[i] = ctx->quant_mat[i] * quant;
575             qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
576         }
577     }
578 
579     for (i = 0; i < ctx->num_planes; i++) {
580         is_chroma    = (i == 1 || i == 2);
581         plane_factor = slice_width_factor + 2;
582         if (is_chroma)
583             plane_factor += ctx->chroma_factor - 3;
584         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
585             xp          = x << 4;
586             yp          = y << 4;
587             num_cblocks = 4;
588             pwidth      = avctx->width;
589         } else {
590             xp          = x << 3;
591             yp          = y << 4;
592             num_cblocks = 2;
593             pwidth      = avctx->width >> 1;
594         }
595 
596         linesize = pic->linesize[i] * ctx->pictures_per_frame;
597         src = (const uint16_t*)(pic->data[i] + yp * linesize +
598                                 line_add * pic->linesize[i]) + xp;
599 
600         if (i < 3) {
601             get_slice_data(ctx, src, linesize, xp, yp,
602                            pwidth, avctx->height / ctx->pictures_per_frame,
603                            ctx->blocks[0], ctx->emu_buf,
604                            mbs_per_slice, num_cblocks, is_chroma);
605             if (!is_chroma) {/* luma quant */
606                 encode_slice_plane(ctx, pb, src, linesize,
607                                    mbs_per_slice, ctx->blocks[0],
608                                    num_cblocks, plane_factor, qmat);
609             } else { /* chroma plane */
610                 encode_slice_plane(ctx, pb, src, linesize,
611                                    mbs_per_slice, ctx->blocks[0],
612                                    num_cblocks, plane_factor, qmat_chroma);
613             }
614         } else {
615             get_alpha_data(ctx, src, linesize, xp, yp,
616                            pwidth, avctx->height / ctx->pictures_per_frame,
617                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
618             encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
619         }
620         flush_put_bits(pb);
621         sizes[i]   = put_bytes_output(pb) - total_size;
622         total_size = put_bytes_output(pb);
623     }
624     return total_size;
625 }
626 
estimate_vlc(unsigned codebook, int val)627 static inline int estimate_vlc(unsigned codebook, int val)
628 {
629     unsigned int rice_order, exp_order, switch_bits, switch_val;
630     int exponent;
631 
632     /* number of prefix bits to switch between Rice and expGolomb */
633     switch_bits = (codebook & 3) + 1;
634     rice_order  =  codebook >> 5;       /* rice code order */
635     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
636 
637     switch_val  = switch_bits << rice_order;
638 
639     if (val >= switch_val) {
640         val -= switch_val - (1 << exp_order);
641         exponent = av_log2(val);
642 
643         return exponent * 2 - exp_order + switch_bits + 1;
644     } else {
645         return (val >> rice_order) + rice_order + 1;
646     }
647 }
648 
estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice, int scale)649 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
650                         int scale)
651 {
652     int i;
653     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
654     int bits;
655 
656     prev_dc  = (blocks[0] - 0x4000) / scale;
657     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
658     sign     = 0;
659     codebook = 3;
660     blocks  += 64;
661     *error  += FFABS(blocks[0] - 0x4000) % scale;
662 
663     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
664         dc       = (blocks[0] - 0x4000) / scale;
665         *error  += FFABS(blocks[0] - 0x4000) % scale;
666         delta    = dc - prev_dc;
667         new_sign = GET_SIGN(delta);
668         delta    = (delta ^ sign) - sign;
669         code     = MAKE_CODE(delta);
670         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
671         codebook = (code + (code & 1)) >> 1;
672         codebook = FFMIN(codebook, 3);
673         sign     = new_sign;
674         prev_dc  = dc;
675     }
676 
677     return bits;
678 }
679 
estimate_acs(int *error, int16_t *blocks, int blocks_per_slice, int plane_size_factor, const uint8_t *scan, const int16_t *qmat)680 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
681                         int plane_size_factor,
682                         const uint8_t *scan, const int16_t *qmat)
683 {
684     int idx, i;
685     int run, level, run_cb, lev_cb;
686     int max_coeffs, abs_level;
687     int bits = 0;
688 
689     max_coeffs = blocks_per_slice << 6;
690     run_cb     = ff_prores_run_to_cb_index[4];
691     lev_cb     = ff_prores_lev_to_cb_index[2];
692     run        = 0;
693 
694     for (i = 1; i < 64; i++) {
695         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
696             level   = blocks[idx] / qmat[scan[i]];
697             *error += FFABS(blocks[idx]) % qmat[scan[i]];
698             if (level) {
699                 abs_level = FFABS(level);
700                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
701                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
702                                      abs_level - 1) + 1;
703 
704                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
705                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
706                 run    = 0;
707             } else {
708                 run++;
709             }
710         }
711     }
712 
713     return bits;
714 }
715 
estimate_slice_plane(ProresContext *ctx, int *error, int plane, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int blocks_per_mb, int plane_size_factor, const int16_t *qmat, ProresThreadData *td)716 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
717                                 const uint16_t *src, ptrdiff_t linesize,
718                                 int mbs_per_slice,
719                                 int blocks_per_mb, int plane_size_factor,
720                                 const int16_t *qmat, ProresThreadData *td)
721 {
722     int blocks_per_slice;
723     int bits;
724 
725     blocks_per_slice = mbs_per_slice * blocks_per_mb;
726 
727     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
728     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
729                          plane_size_factor, ctx->scantable, qmat);
730 
731     return FFALIGN(bits, 8);
732 }
733 
est_alpha_diff(int cur, int prev, int abits)734 static int est_alpha_diff(int cur, int prev, int abits)
735 {
736     const int dbits = (abits == 8) ? 4 : 7;
737     const int dsize = 1 << dbits - 1;
738     int diff = cur - prev;
739 
740     diff = av_mod_uintp2(diff, abits);
741     if (diff >= (1 << abits) - dsize)
742         diff -= 1 << abits;
743     if (diff < -dsize || diff > dsize || !diff)
744         return abits + 1;
745     else
746         return dbits + 1;
747 }
748 
estimate_alpha_plane(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int16_t *blocks)749 static int estimate_alpha_plane(ProresContext *ctx,
750                                 const uint16_t *src, ptrdiff_t linesize,
751                                 int mbs_per_slice, int16_t *blocks)
752 {
753     const int abits = ctx->alpha_bits;
754     const int mask  = (1 << abits) - 1;
755     const int num_coeffs = mbs_per_slice * 256;
756     int prev = mask, cur;
757     int idx = 0;
758     int run = 0;
759     int bits;
760 
761     cur = blocks[idx++];
762     bits = est_alpha_diff(cur, prev, abits);
763     prev = cur;
764     do {
765         cur = blocks[idx++];
766         if (cur != prev) {
767             if (!run)
768                 bits++;
769             else if (run < 0x10)
770                 bits += 4;
771             else
772                 bits += 15;
773             bits += est_alpha_diff(cur, prev, abits);
774             prev = cur;
775             run  = 0;
776         } else {
777             run++;
778         }
779     } while (idx < num_coeffs);
780 
781     if (run) {
782         if (run < 0x10)
783             bits += 4;
784         else
785             bits += 15;
786     }
787 
788     return bits;
789 }
790 
find_slice_quant(AVCodecContext *avctx, int trellis_node, int x, int y, int mbs_per_slice, ProresThreadData *td)791 static int find_slice_quant(AVCodecContext *avctx,
792                             int trellis_node, int x, int y, int mbs_per_slice,
793                             ProresThreadData *td)
794 {
795     ProresContext *ctx = avctx->priv_data;
796     int i, q, pq, xp, yp;
797     const uint16_t *src;
798     int slice_width_factor = av_log2(mbs_per_slice);
799     int num_cblocks[MAX_PLANES], pwidth;
800     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
801     const int min_quant = ctx->profile_info->min_quant;
802     const int max_quant = ctx->profile_info->max_quant;
803     int error, bits, bits_limit;
804     int mbs, prev, cur, new_score;
805     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
806     int overquant;
807     uint16_t *qmat;
808     uint16_t *qmat_chroma;
809     int linesize[4], line_add;
810     int alpha_bits = 0;
811 
812     if (ctx->pictures_per_frame == 1)
813         line_add = 0;
814     else
815         line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
816     mbs = x + mbs_per_slice;
817 
818     for (i = 0; i < ctx->num_planes; i++) {
819         is_chroma[i]    = (i == 1 || i == 2);
820         plane_factor[i] = slice_width_factor + 2;
821         if (is_chroma[i])
822             plane_factor[i] += ctx->chroma_factor - 3;
823         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
824             xp             = x << 4;
825             yp             = y << 4;
826             num_cblocks[i] = 4;
827             pwidth         = avctx->width;
828         } else {
829             xp             = x << 3;
830             yp             = y << 4;
831             num_cblocks[i] = 2;
832             pwidth         = avctx->width >> 1;
833         }
834 
835         linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
836         src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
837                                  line_add * ctx->pic->linesize[i]) + xp;
838 
839         if (i < 3) {
840             get_slice_data(ctx, src, linesize[i], xp, yp,
841                            pwidth, avctx->height / ctx->pictures_per_frame,
842                            td->blocks[i], td->emu_buf,
843                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
844         } else {
845             get_alpha_data(ctx, src, linesize[i], xp, yp,
846                            pwidth, avctx->height / ctx->pictures_per_frame,
847                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
848         }
849     }
850 
851     for (q = min_quant; q < max_quant + 2; q++) {
852         td->nodes[trellis_node + q].prev_node = -1;
853         td->nodes[trellis_node + q].quant     = q;
854     }
855 
856     if (ctx->alpha_bits)
857         alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
858                                           mbs_per_slice, td->blocks[3]);
859     // todo: maybe perform coarser quantising to fit into frame size when needed
860     for (q = min_quant; q <= max_quant; q++) {
861         bits  = alpha_bits;
862         error = 0;
863         bits += estimate_slice_plane(ctx, &error, 0,
864                                      src, linesize[0],
865                                      mbs_per_slice,
866                                      num_cblocks[0], plane_factor[0],
867                                      ctx->quants[q], td); /* estimate luma plane */
868         for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
869             bits += estimate_slice_plane(ctx, &error, i,
870                                          src, linesize[i],
871                                          mbs_per_slice,
872                                          num_cblocks[i], plane_factor[i],
873                                          ctx->quants_chroma[q], td);
874         }
875         if (bits > 65000 * 8)
876             error = SCORE_LIMIT;
877 
878         slice_bits[q]  = bits;
879         slice_score[q] = error;
880     }
881     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
882         slice_bits[max_quant + 1]  = slice_bits[max_quant];
883         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
884         overquant = max_quant;
885     } else {
886         for (q = max_quant + 1; q < 128; q++) {
887             bits  = alpha_bits;
888             error = 0;
889             if (q < MAX_STORED_Q) {
890                 qmat = ctx->quants[q];
891                 qmat_chroma = ctx->quants_chroma[q];
892             } else {
893                 qmat = td->custom_q;
894                 qmat_chroma = td->custom_chroma_q;
895                 for (i = 0; i < 64; i++) {
896                     qmat[i] = ctx->quant_mat[i] * q;
897                     qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
898                 }
899             }
900             bits += estimate_slice_plane(ctx, &error, 0,
901                                          src, linesize[0],
902                                          mbs_per_slice,
903                                          num_cblocks[0], plane_factor[0],
904                                          qmat, td);/* estimate luma plane */
905             for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
906                 bits += estimate_slice_plane(ctx, &error, i,
907                                              src, linesize[i],
908                                              mbs_per_slice,
909                                              num_cblocks[i], plane_factor[i],
910                                              qmat_chroma, td);
911             }
912             if (bits <= ctx->bits_per_mb * mbs_per_slice)
913                 break;
914         }
915 
916         slice_bits[max_quant + 1]  = bits;
917         slice_score[max_quant + 1] = error;
918         overquant = q;
919     }
920     td->nodes[trellis_node + max_quant + 1].quant = overquant;
921 
922     bits_limit = mbs * ctx->bits_per_mb;
923     for (pq = min_quant; pq < max_quant + 2; pq++) {
924         prev = trellis_node - TRELLIS_WIDTH + pq;
925 
926         for (q = min_quant; q < max_quant + 2; q++) {
927             cur = trellis_node + q;
928 
929             bits  = td->nodes[prev].bits + slice_bits[q];
930             error = slice_score[q];
931             if (bits > bits_limit)
932                 error = SCORE_LIMIT;
933 
934             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
935                 new_score = td->nodes[prev].score + error;
936             else
937                 new_score = SCORE_LIMIT;
938             if (td->nodes[cur].prev_node == -1 ||
939                 td->nodes[cur].score >= new_score) {
940 
941                 td->nodes[cur].bits      = bits;
942                 td->nodes[cur].score     = new_score;
943                 td->nodes[cur].prev_node = prev;
944             }
945         }
946     }
947 
948     error = td->nodes[trellis_node + min_quant].score;
949     pq    = trellis_node + min_quant;
950     for (q = min_quant + 1; q < max_quant + 2; q++) {
951         if (td->nodes[trellis_node + q].score <= error) {
952             error = td->nodes[trellis_node + q].score;
953             pq    = trellis_node + q;
954         }
955     }
956 
957     return pq;
958 }
959 
find_quant_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)960 static int find_quant_thread(AVCodecContext *avctx, void *arg,
961                              int jobnr, int threadnr)
962 {
963     ProresContext *ctx = avctx->priv_data;
964     ProresThreadData *td = ctx->tdata + threadnr;
965     int mbs_per_slice = ctx->mbs_per_slice;
966     int x, y = jobnr, mb, q = 0;
967 
968     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
969         while (ctx->mb_width - x < mbs_per_slice)
970             mbs_per_slice >>= 1;
971         q = find_slice_quant(avctx,
972                              (mb + 1) * TRELLIS_WIDTH, x, y,
973                              mbs_per_slice, td);
974     }
975 
976     for (x = ctx->slices_width - 1; x >= 0; x--) {
977         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
978         q = td->nodes[q].prev_node;
979     }
980 
981     return 0;
982 }
983 
encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pic, int *got_packet)984 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
985                         const AVFrame *pic, int *got_packet)
986 {
987     ProresContext *ctx = avctx->priv_data;
988     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
989     uint8_t *picture_size_pos;
990     PutBitContext pb;
991     int x, y, i, mb, q = 0;
992     int sizes[4] = { 0 };
993     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
994     int frame_size, picture_size, slice_size;
995     int pkt_size, ret;
996     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
997     uint8_t frame_flags;
998 
999     ctx->pic = pic;
1000     pkt_size = ctx->frame_size_upper_bound;
1001 
1002     if ((ret = ff_alloc_packet(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE)) < 0)
1003         return ret;
1004 
1005     orig_buf = pkt->data;
1006 
1007     // frame atom
1008     orig_buf += 4;                              // frame size
1009     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
1010     buf = orig_buf;
1011 
1012     // frame header
1013     tmp = buf;
1014     buf += 2;                                   // frame header size will be stored here
1015     bytestream_put_be16  (&buf, 0);             // version 1
1016     bytestream_put_buffer(&buf, ctx->vendor, 4);
1017     bytestream_put_be16  (&buf, avctx->width);
1018     bytestream_put_be16  (&buf, avctx->height);
1019 
1020     frame_flags = ctx->chroma_factor << 6;
1021     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1022         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1023     bytestream_put_byte  (&buf, frame_flags);
1024 
1025     bytestream_put_byte  (&buf, 0);             // reserved
1026     bytestream_put_byte  (&buf, pic->color_primaries);
1027     bytestream_put_byte  (&buf, pic->color_trc);
1028     bytestream_put_byte  (&buf, pic->colorspace);
1029     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
1030     bytestream_put_byte  (&buf, 0);             // reserved
1031     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1032         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
1033         // luma quantisation matrix
1034         for (i = 0; i < 64; i++)
1035             bytestream_put_byte(&buf, ctx->quant_mat[i]);
1036         // chroma quantisation matrix
1037         for (i = 0; i < 64; i++)
1038             bytestream_put_byte(&buf, ctx->quant_mat[i]);
1039     } else {
1040         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
1041     }
1042     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
1043 
1044     for (ctx->cur_picture_idx = 0;
1045          ctx->cur_picture_idx < ctx->pictures_per_frame;
1046          ctx->cur_picture_idx++) {
1047         // picture header
1048         picture_size_pos = buf + 1;
1049         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1050         buf += 4;                                   // picture data size will be stored here
1051         bytestream_put_be16  (&buf, ctx->slices_per_picture);
1052         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1053 
1054         // seek table - will be filled during slice encoding
1055         slice_sizes = buf;
1056         buf += ctx->slices_per_picture * 2;
1057 
1058         // slices
1059         if (!ctx->force_quant) {
1060             ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1061                                   ctx->mb_height);
1062             if (ret)
1063                 return ret;
1064         }
1065 
1066         for (y = 0; y < ctx->mb_height; y++) {
1067             int mbs_per_slice = ctx->mbs_per_slice;
1068             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1069                 q = ctx->force_quant ? ctx->force_quant
1070                                      : ctx->slice_q[mb + y * ctx->slices_width];
1071 
1072                 while (ctx->mb_width - x < mbs_per_slice)
1073                     mbs_per_slice >>= 1;
1074 
1075                 bytestream_put_byte(&buf, slice_hdr_size << 3);
1076                 slice_hdr = buf;
1077                 buf += slice_hdr_size - 1;
1078                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1079                     uint8_t *start = pkt->data;
1080                     // Recompute new size according to max_slice_size
1081                     // and deduce delta
1082                     int delta = 200 + (ctx->pictures_per_frame *
1083                                 ctx->slices_per_picture + 1) *
1084                                 max_slice_size - pkt_size;
1085 
1086                     delta = FFMAX(delta, 2 * max_slice_size);
1087                     ctx->frame_size_upper_bound += delta;
1088 
1089                     if (!ctx->warn) {
1090                         avpriv_request_sample(avctx,
1091                                               "Packet too small: is %i,"
1092                                               " needs %i (slice: %i). "
1093                                               "Correct allocation",
1094                                               pkt_size, delta, max_slice_size);
1095                         ctx->warn = 1;
1096                     }
1097 
1098                     ret = av_grow_packet(pkt, delta);
1099                     if (ret < 0)
1100                         return ret;
1101 
1102                     pkt_size += delta;
1103                     // restore pointers
1104                     orig_buf         = pkt->data + (orig_buf         - start);
1105                     buf              = pkt->data + (buf              - start);
1106                     picture_size_pos = pkt->data + (picture_size_pos - start);
1107                     slice_sizes      = pkt->data + (slice_sizes      - start);
1108                     slice_hdr        = pkt->data + (slice_hdr        - start);
1109                     tmp              = pkt->data + (tmp              - start);
1110                 }
1111                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1112                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1113                                    mbs_per_slice);
1114                 if (ret < 0)
1115                     return ret;
1116 
1117                 bytestream_put_byte(&slice_hdr, q);
1118                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1119                 for (i = 0; i < ctx->num_planes - 1; i++) {
1120                     bytestream_put_be16(&slice_hdr, sizes[i]);
1121                     slice_size += sizes[i];
1122                 }
1123                 bytestream_put_be16(&slice_sizes, slice_size);
1124                 buf += slice_size - slice_hdr_size;
1125                 if (max_slice_size < slice_size)
1126                     max_slice_size = slice_size;
1127             }
1128         }
1129 
1130         picture_size = buf - (picture_size_pos - 1);
1131         bytestream_put_be32(&picture_size_pos, picture_size);
1132     }
1133 
1134     orig_buf -= 8;
1135     frame_size = buf - orig_buf;
1136     bytestream_put_be32(&orig_buf, frame_size);
1137 
1138     pkt->size   = frame_size;
1139     *got_packet = 1;
1140 
1141     return 0;
1142 }
1143 
encode_close(AVCodecContext *avctx)1144 static av_cold int encode_close(AVCodecContext *avctx)
1145 {
1146     ProresContext *ctx = avctx->priv_data;
1147     int i;
1148 
1149     if (ctx->tdata) {
1150         for (i = 0; i < avctx->thread_count; i++)
1151             av_freep(&ctx->tdata[i].nodes);
1152     }
1153     av_freep(&ctx->tdata);
1154     av_freep(&ctx->slice_q);
1155 
1156     return 0;
1157 }
1158 
prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src, ptrdiff_t linesize, int16_t *block)1159 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1160                         ptrdiff_t linesize, int16_t *block)
1161 {
1162     int x, y;
1163     const uint16_t *tsrc = src;
1164 
1165     for (y = 0; y < 8; y++) {
1166         for (x = 0; x < 8; x++)
1167             block[y * 8 + x] = tsrc[x];
1168         tsrc += linesize >> 1;
1169     }
1170     fdsp->fdct(block);
1171 }
1172 
encode_init(AVCodecContext *avctx)1173 static av_cold int encode_init(AVCodecContext *avctx)
1174 {
1175     ProresContext *ctx = avctx->priv_data;
1176     int mps;
1177     int i, j;
1178     int min_quant, max_quant;
1179     int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1180 
1181     avctx->bits_per_raw_sample = 10;
1182 
1183     ctx->fdct      = prores_fdct;
1184     ctx->scantable = interlaced ? ff_prores_interlaced_scan
1185                                 : ff_prores_progressive_scan;
1186     ff_fdctdsp_init(&ctx->fdsp, avctx);
1187 
1188     mps = ctx->mbs_per_slice;
1189     if (mps & (mps - 1)) {
1190         av_log(avctx, AV_LOG_ERROR,
1191                "there should be an integer power of two MBs per slice\n");
1192         return AVERROR(EINVAL);
1193     }
1194     if (ctx->profile == PRORES_PROFILE_AUTO) {
1195         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1196         ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1197                         !(desc->log2_chroma_w + desc->log2_chroma_h))
1198                      ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1199         av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1200                "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1201                ? "4:4:4:4 profile because of the used input colorspace"
1202                : "HQ profile to keep best quality");
1203     }
1204     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1205         if (ctx->profile != PRORES_PROFILE_4444 &&
1206             ctx->profile != PRORES_PROFILE_4444XQ) {
1207             // force alpha and warn
1208             av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1209                    "encode alpha. Override with -profile if needed.\n");
1210             ctx->alpha_bits = 0;
1211         }
1212         if (ctx->alpha_bits & 7) {
1213             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1214             return AVERROR(EINVAL);
1215         }
1216         avctx->bits_per_coded_sample = 32;
1217     } else {
1218         ctx->alpha_bits = 0;
1219     }
1220 
1221     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1222                          ? CFACTOR_Y422
1223                          : CFACTOR_Y444;
1224     ctx->profile_info  = prores_profile_info + ctx->profile;
1225     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1226 
1227     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1228 
1229     if (interlaced)
1230         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1231     else
1232         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1233 
1234     ctx->slices_width  = ctx->mb_width / mps;
1235     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1236     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1237     ctx->pictures_per_frame = 1 + interlaced;
1238 
1239     if (ctx->quant_sel == -1) {
1240         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1241         ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1242     } else {
1243         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1244         ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1245     }
1246 
1247     if (strlen(ctx->vendor) != 4) {
1248         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1249         return AVERROR_INVALIDDATA;
1250     }
1251 
1252     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1253     if (!ctx->force_quant) {
1254         if (!ctx->bits_per_mb) {
1255             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1256                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1257                                            ctx->pictures_per_frame)
1258                     break;
1259             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1260             if (ctx->alpha_bits)
1261                 ctx->bits_per_mb *= 20;
1262         } else if (ctx->bits_per_mb < 128) {
1263             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1264             return AVERROR_INVALIDDATA;
1265         }
1266 
1267         min_quant = ctx->profile_info->min_quant;
1268         max_quant = ctx->profile_info->max_quant;
1269         for (i = min_quant; i < MAX_STORED_Q; i++) {
1270             for (j = 0; j < 64; j++) {
1271                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1272                 ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1273             }
1274         }
1275 
1276         ctx->slice_q = av_malloc_array(ctx->slices_per_picture, sizeof(*ctx->slice_q));
1277         if (!ctx->slice_q)
1278             return AVERROR(ENOMEM);
1279 
1280         ctx->tdata = av_calloc(avctx->thread_count, sizeof(*ctx->tdata));
1281         if (!ctx->tdata)
1282             return AVERROR(ENOMEM);
1283 
1284         for (j = 0; j < avctx->thread_count; j++) {
1285             ctx->tdata[j].nodes = av_malloc_array(ctx->slices_width + 1,
1286                                                   TRELLIS_WIDTH
1287                                                   * sizeof(*ctx->tdata->nodes));
1288             if (!ctx->tdata[j].nodes)
1289                 return AVERROR(ENOMEM);
1290             for (i = min_quant; i < max_quant + 2; i++) {
1291                 ctx->tdata[j].nodes[i].prev_node = -1;
1292                 ctx->tdata[j].nodes[i].bits      = 0;
1293                 ctx->tdata[j].nodes[i].score     = 0;
1294             }
1295         }
1296     } else {
1297         int ls = 0;
1298         int ls_chroma = 0;
1299 
1300         if (ctx->force_quant > 64) {
1301             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1302             return AVERROR_INVALIDDATA;
1303         }
1304 
1305         for (j = 0; j < 64; j++) {
1306             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1307             ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1308             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1309             ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
1310         }
1311 
1312         ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1313         if (ctx->chroma_factor == CFACTOR_Y444)
1314             ctx->bits_per_mb += ls_chroma * 4;
1315     }
1316 
1317     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1318                                    ctx->slices_per_picture + 1) *
1319                                   (2 + 2 * ctx->num_planes +
1320                                    (mps * ctx->bits_per_mb) / 8)
1321                                   + 200;
1322 
1323     if (ctx->alpha_bits) {
1324          // The alpha plane is run-coded and might exceed the bit budget.
1325          ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1326                                          ctx->slices_per_picture + 1) *
1327          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1328          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1329     }
1330 
1331     avctx->codec_tag   = ctx->profile_info->tag;
1332 
1333     av_log(avctx, AV_LOG_DEBUG,
1334            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1335            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1336            interlaced ? "yes" : "no", ctx->bits_per_mb);
1337     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1338            ctx->frame_size_upper_bound);
1339 
1340     return 0;
1341 }
1342 
1343 #define OFFSET(x) offsetof(ProresContext, x)
1344 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1345 
1346 static const AVOption options[] = {
1347     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1348         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1349     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1350         { .i64 = PRORES_PROFILE_AUTO },
1351         PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1352     { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1353         0, 0, VE, "profile" },
1354     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1355         0, 0, VE, "profile" },
1356     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1357         0, 0, VE, "profile" },
1358     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1359         0, 0, VE, "profile" },
1360     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1361         0, 0, VE, "profile" },
1362     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1363         0, 0, VE, "profile" },
1364     { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1365         0, 0, VE, "profile" },
1366     { "vendor", "vendor ID", OFFSET(vendor),
1367         AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1368     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1369         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1370     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1371         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1372     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1373         0, 0, VE, "quant_mat" },
1374     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1375         0, 0, VE, "quant_mat" },
1376     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1377         0, 0, VE, "quant_mat" },
1378     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1379         0, 0, VE, "quant_mat" },
1380     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1381         0, 0, VE, "quant_mat" },
1382     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1383         0, 0, VE, "quant_mat" },
1384     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1385         { .i64 = 16 }, 0, 16, VE },
1386     { NULL }
1387 };
1388 
1389 static const AVClass proresenc_class = {
1390     .class_name = "ProRes encoder",
1391     .item_name  = av_default_item_name,
1392     .option     = options,
1393     .version    = LIBAVUTIL_VERSION_INT,
1394 };
1395 
1396 const FFCodec ff_prores_ks_encoder = {
1397     .p.name         = "prores_ks",
1398     .p.long_name    = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1399     .p.type         = AVMEDIA_TYPE_VIDEO,
1400     .p.id           = AV_CODEC_ID_PRORES,
1401     .priv_data_size = sizeof(ProresContext),
1402     .init           = encode_init,
1403     .close          = encode_close,
1404     FF_CODEC_ENCODE_CB(encode_frame),
1405     .p.capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1406     .p.pix_fmts     = (const enum AVPixelFormat[]) {
1407                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1408                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1409                       },
1410     .p.priv_class   = &proresenc_class,
1411     .p.profiles     = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1412     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1413 };
1414