1 /*
2 * Apple ProRes encoder
3 *
4 * Copyright (c) 2012 Konstantin Shishkov
5 *
6 * This encoder appears to be based on Anatoliy Wassermans considering
7 * similarities in the bugs.
8 *
9 * This file is part of FFmpeg.
10 *
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 */
25
26 #include "libavutil/mem_internal.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/pixdesc.h"
29 #include "avcodec.h"
30 #include "codec_internal.h"
31 #include "encode.h"
32 #include "fdctdsp.h"
33 #include "put_bits.h"
34 #include "profiles.h"
35 #include "bytestream.h"
36 #include "proresdata.h"
37
38 #define CFACTOR_Y422 2
39 #define CFACTOR_Y444 3
40
41 #define MAX_MBS_PER_SLICE 8
42
43 #define MAX_PLANES 4
44
45 enum {
46 PRORES_PROFILE_AUTO = -1,
47 PRORES_PROFILE_PROXY = 0,
48 PRORES_PROFILE_LT,
49 PRORES_PROFILE_STANDARD,
50 PRORES_PROFILE_HQ,
51 PRORES_PROFILE_4444,
52 PRORES_PROFILE_4444XQ,
53 };
54
55 enum {
56 QUANT_MAT_PROXY = 0,
57 QUANT_MAT_PROXY_CHROMA,
58 QUANT_MAT_LT,
59 QUANT_MAT_STANDARD,
60 QUANT_MAT_HQ,
61 QUANT_MAT_XQ_LUMA,
62 QUANT_MAT_DEFAULT,
63 };
64
65 static const uint8_t prores_quant_matrices[][64] = {
66 { // proxy
67 4, 7, 9, 11, 13, 14, 15, 63,
68 7, 7, 11, 12, 14, 15, 63, 63,
69 9, 11, 13, 14, 15, 63, 63, 63,
70 11, 11, 13, 14, 63, 63, 63, 63,
71 11, 13, 14, 63, 63, 63, 63, 63,
72 13, 14, 63, 63, 63, 63, 63, 63,
73 13, 63, 63, 63, 63, 63, 63, 63,
74 63, 63, 63, 63, 63, 63, 63, 63,
75 },
76 { // proxy chromas
77 4, 7, 9, 11, 13, 14, 63, 63,
78 7, 7, 11, 12, 14, 63, 63, 63,
79 9, 11, 13, 14, 63, 63, 63, 63,
80 11, 11, 13, 14, 63, 63, 63, 63,
81 11, 13, 14, 63, 63, 63, 63, 63,
82 13, 14, 63, 63, 63, 63, 63, 63,
83 13, 63, 63, 63, 63, 63, 63, 63,
84 63, 63, 63, 63, 63, 63, 63, 63
85 },
86 { // LT
87 4, 5, 6, 7, 9, 11, 13, 15,
88 5, 5, 7, 8, 11, 13, 15, 17,
89 6, 7, 9, 11, 13, 15, 15, 17,
90 7, 7, 9, 11, 13, 15, 17, 19,
91 7, 9, 11, 13, 14, 16, 19, 23,
92 9, 11, 13, 14, 16, 19, 23, 29,
93 9, 11, 13, 15, 17, 21, 28, 35,
94 11, 13, 16, 17, 21, 28, 35, 41,
95 },
96 { // standard
97 4, 4, 5, 5, 6, 7, 7, 9,
98 4, 4, 5, 6, 7, 7, 9, 9,
99 5, 5, 6, 7, 7, 9, 9, 10,
100 5, 5, 6, 7, 7, 9, 9, 10,
101 5, 6, 7, 7, 8, 9, 10, 12,
102 6, 7, 7, 8, 9, 10, 12, 15,
103 6, 7, 7, 9, 10, 11, 14, 17,
104 7, 7, 9, 10, 11, 14, 17, 21,
105 },
106 { // high quality
107 4, 4, 4, 4, 4, 4, 4, 4,
108 4, 4, 4, 4, 4, 4, 4, 4,
109 4, 4, 4, 4, 4, 4, 4, 4,
110 4, 4, 4, 4, 4, 4, 4, 5,
111 4, 4, 4, 4, 4, 4, 5, 5,
112 4, 4, 4, 4, 4, 5, 5, 6,
113 4, 4, 4, 4, 5, 5, 6, 7,
114 4, 4, 4, 4, 5, 6, 7, 7,
115 },
116 { // XQ luma
117 2, 2, 2, 2, 2, 2, 2, 2,
118 2, 2, 2, 2, 2, 2, 2, 2,
119 2, 2, 2, 2, 2, 2, 2, 2,
120 2, 2, 2, 2, 2, 2, 2, 3,
121 2, 2, 2, 2, 2, 2, 3, 3,
122 2, 2, 2, 2, 2, 3, 3, 3,
123 2, 2, 2, 2, 3, 3, 3, 4,
124 2, 2, 2, 2, 3, 3, 4, 4,
125 },
126 { // codec default
127 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4,
135 },
136 };
137
138 #define NUM_MB_LIMITS 4
139 static const int prores_mb_limits[NUM_MB_LIMITS] = {
140 1620, // up to 720x576
141 2700, // up to 960x720
142 6075, // up to 1440x1080
143 9216, // up to 2048x1152
144 };
145
146 static const struct prores_profile {
147 const char *full_name;
148 uint32_t tag;
149 int min_quant;
150 int max_quant;
151 int br_tab[NUM_MB_LIMITS];
152 int quant;
153 int quant_chroma;
154 } prores_profile_info[6] = {
155 {
156 .full_name = "proxy",
157 .tag = MKTAG('a', 'p', 'c', 'o'),
158 .min_quant = 4,
159 .max_quant = 8,
160 .br_tab = { 300, 242, 220, 194 },
161 .quant = QUANT_MAT_PROXY,
162 .quant_chroma = QUANT_MAT_PROXY_CHROMA,
163 },
164 {
165 .full_name = "LT",
166 .tag = MKTAG('a', 'p', 'c', 's'),
167 .min_quant = 1,
168 .max_quant = 9,
169 .br_tab = { 720, 560, 490, 440 },
170 .quant = QUANT_MAT_LT,
171 .quant_chroma = QUANT_MAT_LT,
172 },
173 {
174 .full_name = "standard",
175 .tag = MKTAG('a', 'p', 'c', 'n'),
176 .min_quant = 1,
177 .max_quant = 6,
178 .br_tab = { 1050, 808, 710, 632 },
179 .quant = QUANT_MAT_STANDARD,
180 .quant_chroma = QUANT_MAT_STANDARD,
181 },
182 {
183 .full_name = "high quality",
184 .tag = MKTAG('a', 'p', 'c', 'h'),
185 .min_quant = 1,
186 .max_quant = 6,
187 .br_tab = { 1566, 1216, 1070, 950 },
188 .quant = QUANT_MAT_HQ,
189 .quant_chroma = QUANT_MAT_HQ,
190 },
191 {
192 .full_name = "4444",
193 .tag = MKTAG('a', 'p', '4', 'h'),
194 .min_quant = 1,
195 .max_quant = 6,
196 .br_tab = { 2350, 1828, 1600, 1425 },
197 .quant = QUANT_MAT_HQ,
198 .quant_chroma = QUANT_MAT_HQ,
199 },
200 {
201 .full_name = "4444XQ",
202 .tag = MKTAG('a', 'p', '4', 'x'),
203 .min_quant = 1,
204 .max_quant = 6,
205 .br_tab = { 3525, 2742, 2400, 2137 },
206 .quant = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
207 .quant_chroma = QUANT_MAT_HQ,
208 }
209 };
210
211 #define TRELLIS_WIDTH 16
212 #define SCORE_LIMIT INT_MAX / 2
213
214 struct TrellisNode {
215 int prev_node;
216 int quant;
217 int bits;
218 int score;
219 };
220
221 #define MAX_STORED_Q 16
222
223 typedef struct ProresThreadData {
224 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
225 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
226 int16_t custom_q[64];
227 int16_t custom_chroma_q[64];
228 struct TrellisNode *nodes;
229 } ProresThreadData;
230
231 typedef struct ProresContext {
232 AVClass *class;
233 DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
234 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
235 int16_t quants[MAX_STORED_Q][64];
236 int16_t quants_chroma[MAX_STORED_Q][64];
237 int16_t custom_q[64];
238 int16_t custom_chroma_q[64];
239 const uint8_t *quant_mat;
240 const uint8_t *quant_chroma_mat;
241 const uint8_t *scantable;
242
243 void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
244 ptrdiff_t linesize, int16_t *block);
245 FDCTDSPContext fdsp;
246
247 const AVFrame *pic;
248 int mb_width, mb_height;
249 int mbs_per_slice;
250 int num_chroma_blocks, chroma_factor;
251 int slices_width;
252 int slices_per_picture;
253 int pictures_per_frame; // 1 for progressive, 2 for interlaced
254 int cur_picture_idx;
255 int num_planes;
256 int bits_per_mb;
257 int force_quant;
258 int alpha_bits;
259 int warn;
260
261 char *vendor;
262 int quant_sel;
263
264 int frame_size_upper_bound;
265
266 int profile;
267 const struct prores_profile *profile_info;
268
269 int *slice_q;
270
271 ProresThreadData *tdata;
272 } ProresContext;
273
get_slice_data(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int x, int y, int w, int h, int16_t *blocks, uint16_t *emu_buf, int mbs_per_slice, int blocks_per_mb, int is_chroma)274 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
275 ptrdiff_t linesize, int x, int y, int w, int h,
276 int16_t *blocks, uint16_t *emu_buf,
277 int mbs_per_slice, int blocks_per_mb, int is_chroma)
278 {
279 const uint16_t *esrc;
280 const int mb_width = 4 * blocks_per_mb;
281 ptrdiff_t elinesize;
282 int i, j, k;
283
284 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
285 if (x >= w) {
286 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
287 * sizeof(*blocks));
288 return;
289 }
290 if (x + mb_width <= w && y + 16 <= h) {
291 esrc = src;
292 elinesize = linesize;
293 } else {
294 int bw, bh, pix;
295
296 esrc = emu_buf;
297 elinesize = 16 * sizeof(*emu_buf);
298
299 bw = FFMIN(w - x, mb_width);
300 bh = FFMIN(h - y, 16);
301
302 for (j = 0; j < bh; j++) {
303 memcpy(emu_buf + j * 16,
304 (const uint8_t*)src + j * linesize,
305 bw * sizeof(*src));
306 pix = emu_buf[j * 16 + bw - 1];
307 for (k = bw; k < mb_width; k++)
308 emu_buf[j * 16 + k] = pix;
309 }
310 for (; j < 16; j++)
311 memcpy(emu_buf + j * 16,
312 emu_buf + (bh - 1) * 16,
313 mb_width * sizeof(*emu_buf));
314 }
315 if (!is_chroma) {
316 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
317 blocks += 64;
318 if (blocks_per_mb > 2) {
319 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
320 blocks += 64;
321 }
322 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
323 blocks += 64;
324 if (blocks_per_mb > 2) {
325 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
326 blocks += 64;
327 }
328 } else {
329 ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
330 blocks += 64;
331 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
332 blocks += 64;
333 if (blocks_per_mb > 2) {
334 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
335 blocks += 64;
336 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
337 blocks += 64;
338 }
339 }
340
341 x += mb_width;
342 }
343 }
344
get_alpha_data(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int x, int y, int w, int h, int16_t *blocks, int mbs_per_slice, int abits)345 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
346 ptrdiff_t linesize, int x, int y, int w, int h,
347 int16_t *blocks, int mbs_per_slice, int abits)
348 {
349 const int slice_width = 16 * mbs_per_slice;
350 int i, j, copy_w, copy_h;
351
352 copy_w = FFMIN(w - x, slice_width);
353 copy_h = FFMIN(h - y, 16);
354 for (i = 0; i < copy_h; i++) {
355 memcpy(blocks, src, copy_w * sizeof(*src));
356 if (abits == 8)
357 for (j = 0; j < copy_w; j++)
358 blocks[j] >>= 2;
359 else
360 for (j = 0; j < copy_w; j++)
361 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
362 for (j = copy_w; j < slice_width; j++)
363 blocks[j] = blocks[copy_w - 1];
364 blocks += slice_width;
365 src += linesize >> 1;
366 }
367 for (; i < 16; i++) {
368 memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
369 blocks += slice_width;
370 }
371 }
372
373 /**
374 * Write an unsigned rice/exp golomb codeword.
375 */
encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)376 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
377 {
378 unsigned int rice_order, exp_order, switch_bits, switch_val;
379 int exponent;
380
381 /* number of prefix bits to switch between Rice and expGolomb */
382 switch_bits = (codebook & 3) + 1;
383 rice_order = codebook >> 5; /* rice code order */
384 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
385
386 switch_val = switch_bits << rice_order;
387
388 if (val >= switch_val) {
389 val -= switch_val - (1 << exp_order);
390 exponent = av_log2(val);
391
392 put_bits(pb, exponent - exp_order + switch_bits, 0);
393 put_bits(pb, exponent + 1, val);
394 } else {
395 exponent = val >> rice_order;
396
397 if (exponent)
398 put_bits(pb, exponent, 0);
399 put_bits(pb, 1, 1);
400 if (rice_order)
401 put_sbits(pb, rice_order, val);
402 }
403 }
404
405 #define GET_SIGN(x) ((x) >> 31)
406 #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
407
encode_dcs(PutBitContext *pb, int16_t *blocks, int blocks_per_slice, int scale)408 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
409 int blocks_per_slice, int scale)
410 {
411 int i;
412 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
413
414 prev_dc = (blocks[0] - 0x4000) / scale;
415 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
416 sign = 0;
417 codebook = 3;
418 blocks += 64;
419
420 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
421 dc = (blocks[0] - 0x4000) / scale;
422 delta = dc - prev_dc;
423 new_sign = GET_SIGN(delta);
424 delta = (delta ^ sign) - sign;
425 code = MAKE_CODE(delta);
426 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
427 codebook = (code + (code & 1)) >> 1;
428 codebook = FFMIN(codebook, 3);
429 sign = new_sign;
430 prev_dc = dc;
431 }
432 }
433
encode_acs(PutBitContext *pb, int16_t *blocks, int blocks_per_slice, int plane_size_factor, const uint8_t *scan, const int16_t *qmat)434 static void encode_acs(PutBitContext *pb, int16_t *blocks,
435 int blocks_per_slice,
436 int plane_size_factor,
437 const uint8_t *scan, const int16_t *qmat)
438 {
439 int idx, i;
440 int run, level, run_cb, lev_cb;
441 int max_coeffs, abs_level;
442
443 max_coeffs = blocks_per_slice << 6;
444 run_cb = ff_prores_run_to_cb_index[4];
445 lev_cb = ff_prores_lev_to_cb_index[2];
446 run = 0;
447
448 for (i = 1; i < 64; i++) {
449 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
450 level = blocks[idx] / qmat[scan[i]];
451 if (level) {
452 abs_level = FFABS(level);
453 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
454 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
455 abs_level - 1);
456 put_sbits(pb, 1, GET_SIGN(level));
457
458 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
459 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
460 run = 0;
461 } else {
462 run++;
463 }
464 }
465 }
466 }
467
encode_slice_plane(ProresContext *ctx, PutBitContext *pb, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int16_t *blocks, int blocks_per_mb, int plane_size_factor, const int16_t *qmat)468 static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
469 const uint16_t *src, ptrdiff_t linesize,
470 int mbs_per_slice, int16_t *blocks,
471 int blocks_per_mb, int plane_size_factor,
472 const int16_t *qmat)
473 {
474 int blocks_per_slice = mbs_per_slice * blocks_per_mb;
475
476 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
477 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
478 ctx->scantable, qmat);
479 }
480
put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)481 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
482 {
483 const int dbits = (abits == 8) ? 4 : 7;
484 const int dsize = 1 << dbits - 1;
485 int diff = cur - prev;
486
487 diff = av_mod_uintp2(diff, abits);
488 if (diff >= (1 << abits) - dsize)
489 diff -= 1 << abits;
490 if (diff < -dsize || diff > dsize || !diff) {
491 put_bits(pb, 1, 1);
492 put_bits(pb, abits, diff);
493 } else {
494 put_bits(pb, 1, 0);
495 put_bits(pb, dbits - 1, FFABS(diff) - 1);
496 put_bits(pb, 1, diff < 0);
497 }
498 }
499
put_alpha_run(PutBitContext *pb, int run)500 static void put_alpha_run(PutBitContext *pb, int run)
501 {
502 if (run) {
503 put_bits(pb, 1, 0);
504 if (run < 0x10)
505 put_bits(pb, 4, run);
506 else
507 put_bits(pb, 15, run);
508 } else {
509 put_bits(pb, 1, 1);
510 }
511 }
512
513 // todo alpha quantisation for high quants
encode_alpha_plane(ProresContext *ctx, PutBitContext *pb, int mbs_per_slice, uint16_t *blocks, int quant)514 static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
515 int mbs_per_slice, uint16_t *blocks,
516 int quant)
517 {
518 const int abits = ctx->alpha_bits;
519 const int mask = (1 << abits) - 1;
520 const int num_coeffs = mbs_per_slice * 256;
521 int prev = mask, cur;
522 int idx = 0;
523 int run = 0;
524
525 cur = blocks[idx++];
526 put_alpha_diff(pb, cur, prev, abits);
527 prev = cur;
528 do {
529 cur = blocks[idx++];
530 if (cur != prev) {
531 put_alpha_run (pb, run);
532 put_alpha_diff(pb, cur, prev, abits);
533 prev = cur;
534 run = 0;
535 } else {
536 run++;
537 }
538 } while (idx < num_coeffs);
539 if (run)
540 put_alpha_run(pb, run);
541 }
542
encode_slice(AVCodecContext *avctx, const AVFrame *pic, PutBitContext *pb, int sizes[4], int x, int y, int quant, int mbs_per_slice)543 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
544 PutBitContext *pb,
545 int sizes[4], int x, int y, int quant,
546 int mbs_per_slice)
547 {
548 ProresContext *ctx = avctx->priv_data;
549 int i, xp, yp;
550 int total_size = 0;
551 const uint16_t *src;
552 int slice_width_factor = av_log2(mbs_per_slice);
553 int num_cblocks, pwidth, line_add;
554 ptrdiff_t linesize;
555 int plane_factor, is_chroma;
556 uint16_t *qmat;
557 uint16_t *qmat_chroma;
558
559 if (ctx->pictures_per_frame == 1)
560 line_add = 0;
561 else
562 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
563
564 if (ctx->force_quant) {
565 qmat = ctx->quants[0];
566 qmat_chroma = ctx->quants_chroma[0];
567 } else if (quant < MAX_STORED_Q) {
568 qmat = ctx->quants[quant];
569 qmat_chroma = ctx->quants_chroma[quant];
570 } else {
571 qmat = ctx->custom_q;
572 qmat_chroma = ctx->custom_chroma_q;
573 for (i = 0; i < 64; i++) {
574 qmat[i] = ctx->quant_mat[i] * quant;
575 qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
576 }
577 }
578
579 for (i = 0; i < ctx->num_planes; i++) {
580 is_chroma = (i == 1 || i == 2);
581 plane_factor = slice_width_factor + 2;
582 if (is_chroma)
583 plane_factor += ctx->chroma_factor - 3;
584 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
585 xp = x << 4;
586 yp = y << 4;
587 num_cblocks = 4;
588 pwidth = avctx->width;
589 } else {
590 xp = x << 3;
591 yp = y << 4;
592 num_cblocks = 2;
593 pwidth = avctx->width >> 1;
594 }
595
596 linesize = pic->linesize[i] * ctx->pictures_per_frame;
597 src = (const uint16_t*)(pic->data[i] + yp * linesize +
598 line_add * pic->linesize[i]) + xp;
599
600 if (i < 3) {
601 get_slice_data(ctx, src, linesize, xp, yp,
602 pwidth, avctx->height / ctx->pictures_per_frame,
603 ctx->blocks[0], ctx->emu_buf,
604 mbs_per_slice, num_cblocks, is_chroma);
605 if (!is_chroma) {/* luma quant */
606 encode_slice_plane(ctx, pb, src, linesize,
607 mbs_per_slice, ctx->blocks[0],
608 num_cblocks, plane_factor, qmat);
609 } else { /* chroma plane */
610 encode_slice_plane(ctx, pb, src, linesize,
611 mbs_per_slice, ctx->blocks[0],
612 num_cblocks, plane_factor, qmat_chroma);
613 }
614 } else {
615 get_alpha_data(ctx, src, linesize, xp, yp,
616 pwidth, avctx->height / ctx->pictures_per_frame,
617 ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
618 encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
619 }
620 flush_put_bits(pb);
621 sizes[i] = put_bytes_output(pb) - total_size;
622 total_size = put_bytes_output(pb);
623 }
624 return total_size;
625 }
626
estimate_vlc(unsigned codebook, int val)627 static inline int estimate_vlc(unsigned codebook, int val)
628 {
629 unsigned int rice_order, exp_order, switch_bits, switch_val;
630 int exponent;
631
632 /* number of prefix bits to switch between Rice and expGolomb */
633 switch_bits = (codebook & 3) + 1;
634 rice_order = codebook >> 5; /* rice code order */
635 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
636
637 switch_val = switch_bits << rice_order;
638
639 if (val >= switch_val) {
640 val -= switch_val - (1 << exp_order);
641 exponent = av_log2(val);
642
643 return exponent * 2 - exp_order + switch_bits + 1;
644 } else {
645 return (val >> rice_order) + rice_order + 1;
646 }
647 }
648
estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice, int scale)649 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
650 int scale)
651 {
652 int i;
653 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
654 int bits;
655
656 prev_dc = (blocks[0] - 0x4000) / scale;
657 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
658 sign = 0;
659 codebook = 3;
660 blocks += 64;
661 *error += FFABS(blocks[0] - 0x4000) % scale;
662
663 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
664 dc = (blocks[0] - 0x4000) / scale;
665 *error += FFABS(blocks[0] - 0x4000) % scale;
666 delta = dc - prev_dc;
667 new_sign = GET_SIGN(delta);
668 delta = (delta ^ sign) - sign;
669 code = MAKE_CODE(delta);
670 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
671 codebook = (code + (code & 1)) >> 1;
672 codebook = FFMIN(codebook, 3);
673 sign = new_sign;
674 prev_dc = dc;
675 }
676
677 return bits;
678 }
679
estimate_acs(int *error, int16_t *blocks, int blocks_per_slice, int plane_size_factor, const uint8_t *scan, const int16_t *qmat)680 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
681 int plane_size_factor,
682 const uint8_t *scan, const int16_t *qmat)
683 {
684 int idx, i;
685 int run, level, run_cb, lev_cb;
686 int max_coeffs, abs_level;
687 int bits = 0;
688
689 max_coeffs = blocks_per_slice << 6;
690 run_cb = ff_prores_run_to_cb_index[4];
691 lev_cb = ff_prores_lev_to_cb_index[2];
692 run = 0;
693
694 for (i = 1; i < 64; i++) {
695 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
696 level = blocks[idx] / qmat[scan[i]];
697 *error += FFABS(blocks[idx]) % qmat[scan[i]];
698 if (level) {
699 abs_level = FFABS(level);
700 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
701 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
702 abs_level - 1) + 1;
703
704 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
705 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
706 run = 0;
707 } else {
708 run++;
709 }
710 }
711 }
712
713 return bits;
714 }
715
estimate_slice_plane(ProresContext *ctx, int *error, int plane, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int blocks_per_mb, int plane_size_factor, const int16_t *qmat, ProresThreadData *td)716 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
717 const uint16_t *src, ptrdiff_t linesize,
718 int mbs_per_slice,
719 int blocks_per_mb, int plane_size_factor,
720 const int16_t *qmat, ProresThreadData *td)
721 {
722 int blocks_per_slice;
723 int bits;
724
725 blocks_per_slice = mbs_per_slice * blocks_per_mb;
726
727 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
728 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
729 plane_size_factor, ctx->scantable, qmat);
730
731 return FFALIGN(bits, 8);
732 }
733
est_alpha_diff(int cur, int prev, int abits)734 static int est_alpha_diff(int cur, int prev, int abits)
735 {
736 const int dbits = (abits == 8) ? 4 : 7;
737 const int dsize = 1 << dbits - 1;
738 int diff = cur - prev;
739
740 diff = av_mod_uintp2(diff, abits);
741 if (diff >= (1 << abits) - dsize)
742 diff -= 1 << abits;
743 if (diff < -dsize || diff > dsize || !diff)
744 return abits + 1;
745 else
746 return dbits + 1;
747 }
748
estimate_alpha_plane(ProresContext *ctx, const uint16_t *src, ptrdiff_t linesize, int mbs_per_slice, int16_t *blocks)749 static int estimate_alpha_plane(ProresContext *ctx,
750 const uint16_t *src, ptrdiff_t linesize,
751 int mbs_per_slice, int16_t *blocks)
752 {
753 const int abits = ctx->alpha_bits;
754 const int mask = (1 << abits) - 1;
755 const int num_coeffs = mbs_per_slice * 256;
756 int prev = mask, cur;
757 int idx = 0;
758 int run = 0;
759 int bits;
760
761 cur = blocks[idx++];
762 bits = est_alpha_diff(cur, prev, abits);
763 prev = cur;
764 do {
765 cur = blocks[idx++];
766 if (cur != prev) {
767 if (!run)
768 bits++;
769 else if (run < 0x10)
770 bits += 4;
771 else
772 bits += 15;
773 bits += est_alpha_diff(cur, prev, abits);
774 prev = cur;
775 run = 0;
776 } else {
777 run++;
778 }
779 } while (idx < num_coeffs);
780
781 if (run) {
782 if (run < 0x10)
783 bits += 4;
784 else
785 bits += 15;
786 }
787
788 return bits;
789 }
790
find_slice_quant(AVCodecContext *avctx, int trellis_node, int x, int y, int mbs_per_slice, ProresThreadData *td)791 static int find_slice_quant(AVCodecContext *avctx,
792 int trellis_node, int x, int y, int mbs_per_slice,
793 ProresThreadData *td)
794 {
795 ProresContext *ctx = avctx->priv_data;
796 int i, q, pq, xp, yp;
797 const uint16_t *src;
798 int slice_width_factor = av_log2(mbs_per_slice);
799 int num_cblocks[MAX_PLANES], pwidth;
800 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
801 const int min_quant = ctx->profile_info->min_quant;
802 const int max_quant = ctx->profile_info->max_quant;
803 int error, bits, bits_limit;
804 int mbs, prev, cur, new_score;
805 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
806 int overquant;
807 uint16_t *qmat;
808 uint16_t *qmat_chroma;
809 int linesize[4], line_add;
810 int alpha_bits = 0;
811
812 if (ctx->pictures_per_frame == 1)
813 line_add = 0;
814 else
815 line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
816 mbs = x + mbs_per_slice;
817
818 for (i = 0; i < ctx->num_planes; i++) {
819 is_chroma[i] = (i == 1 || i == 2);
820 plane_factor[i] = slice_width_factor + 2;
821 if (is_chroma[i])
822 plane_factor[i] += ctx->chroma_factor - 3;
823 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
824 xp = x << 4;
825 yp = y << 4;
826 num_cblocks[i] = 4;
827 pwidth = avctx->width;
828 } else {
829 xp = x << 3;
830 yp = y << 4;
831 num_cblocks[i] = 2;
832 pwidth = avctx->width >> 1;
833 }
834
835 linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
836 src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
837 line_add * ctx->pic->linesize[i]) + xp;
838
839 if (i < 3) {
840 get_slice_data(ctx, src, linesize[i], xp, yp,
841 pwidth, avctx->height / ctx->pictures_per_frame,
842 td->blocks[i], td->emu_buf,
843 mbs_per_slice, num_cblocks[i], is_chroma[i]);
844 } else {
845 get_alpha_data(ctx, src, linesize[i], xp, yp,
846 pwidth, avctx->height / ctx->pictures_per_frame,
847 td->blocks[i], mbs_per_slice, ctx->alpha_bits);
848 }
849 }
850
851 for (q = min_quant; q < max_quant + 2; q++) {
852 td->nodes[trellis_node + q].prev_node = -1;
853 td->nodes[trellis_node + q].quant = q;
854 }
855
856 if (ctx->alpha_bits)
857 alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
858 mbs_per_slice, td->blocks[3]);
859 // todo: maybe perform coarser quantising to fit into frame size when needed
860 for (q = min_quant; q <= max_quant; q++) {
861 bits = alpha_bits;
862 error = 0;
863 bits += estimate_slice_plane(ctx, &error, 0,
864 src, linesize[0],
865 mbs_per_slice,
866 num_cblocks[0], plane_factor[0],
867 ctx->quants[q], td); /* estimate luma plane */
868 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
869 bits += estimate_slice_plane(ctx, &error, i,
870 src, linesize[i],
871 mbs_per_slice,
872 num_cblocks[i], plane_factor[i],
873 ctx->quants_chroma[q], td);
874 }
875 if (bits > 65000 * 8)
876 error = SCORE_LIMIT;
877
878 slice_bits[q] = bits;
879 slice_score[q] = error;
880 }
881 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
882 slice_bits[max_quant + 1] = slice_bits[max_quant];
883 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
884 overquant = max_quant;
885 } else {
886 for (q = max_quant + 1; q < 128; q++) {
887 bits = alpha_bits;
888 error = 0;
889 if (q < MAX_STORED_Q) {
890 qmat = ctx->quants[q];
891 qmat_chroma = ctx->quants_chroma[q];
892 } else {
893 qmat = td->custom_q;
894 qmat_chroma = td->custom_chroma_q;
895 for (i = 0; i < 64; i++) {
896 qmat[i] = ctx->quant_mat[i] * q;
897 qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
898 }
899 }
900 bits += estimate_slice_plane(ctx, &error, 0,
901 src, linesize[0],
902 mbs_per_slice,
903 num_cblocks[0], plane_factor[0],
904 qmat, td);/* estimate luma plane */
905 for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
906 bits += estimate_slice_plane(ctx, &error, i,
907 src, linesize[i],
908 mbs_per_slice,
909 num_cblocks[i], plane_factor[i],
910 qmat_chroma, td);
911 }
912 if (bits <= ctx->bits_per_mb * mbs_per_slice)
913 break;
914 }
915
916 slice_bits[max_quant + 1] = bits;
917 slice_score[max_quant + 1] = error;
918 overquant = q;
919 }
920 td->nodes[trellis_node + max_quant + 1].quant = overquant;
921
922 bits_limit = mbs * ctx->bits_per_mb;
923 for (pq = min_quant; pq < max_quant + 2; pq++) {
924 prev = trellis_node - TRELLIS_WIDTH + pq;
925
926 for (q = min_quant; q < max_quant + 2; q++) {
927 cur = trellis_node + q;
928
929 bits = td->nodes[prev].bits + slice_bits[q];
930 error = slice_score[q];
931 if (bits > bits_limit)
932 error = SCORE_LIMIT;
933
934 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
935 new_score = td->nodes[prev].score + error;
936 else
937 new_score = SCORE_LIMIT;
938 if (td->nodes[cur].prev_node == -1 ||
939 td->nodes[cur].score >= new_score) {
940
941 td->nodes[cur].bits = bits;
942 td->nodes[cur].score = new_score;
943 td->nodes[cur].prev_node = prev;
944 }
945 }
946 }
947
948 error = td->nodes[trellis_node + min_quant].score;
949 pq = trellis_node + min_quant;
950 for (q = min_quant + 1; q < max_quant + 2; q++) {
951 if (td->nodes[trellis_node + q].score <= error) {
952 error = td->nodes[trellis_node + q].score;
953 pq = trellis_node + q;
954 }
955 }
956
957 return pq;
958 }
959
find_quant_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)960 static int find_quant_thread(AVCodecContext *avctx, void *arg,
961 int jobnr, int threadnr)
962 {
963 ProresContext *ctx = avctx->priv_data;
964 ProresThreadData *td = ctx->tdata + threadnr;
965 int mbs_per_slice = ctx->mbs_per_slice;
966 int x, y = jobnr, mb, q = 0;
967
968 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
969 while (ctx->mb_width - x < mbs_per_slice)
970 mbs_per_slice >>= 1;
971 q = find_slice_quant(avctx,
972 (mb + 1) * TRELLIS_WIDTH, x, y,
973 mbs_per_slice, td);
974 }
975
976 for (x = ctx->slices_width - 1; x >= 0; x--) {
977 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
978 q = td->nodes[q].prev_node;
979 }
980
981 return 0;
982 }
983
encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *pic, int *got_packet)984 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
985 const AVFrame *pic, int *got_packet)
986 {
987 ProresContext *ctx = avctx->priv_data;
988 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
989 uint8_t *picture_size_pos;
990 PutBitContext pb;
991 int x, y, i, mb, q = 0;
992 int sizes[4] = { 0 };
993 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
994 int frame_size, picture_size, slice_size;
995 int pkt_size, ret;
996 int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
997 uint8_t frame_flags;
998
999 ctx->pic = pic;
1000 pkt_size = ctx->frame_size_upper_bound;
1001
1002 if ((ret = ff_alloc_packet(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE)) < 0)
1003 return ret;
1004
1005 orig_buf = pkt->data;
1006
1007 // frame atom
1008 orig_buf += 4; // frame size
1009 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
1010 buf = orig_buf;
1011
1012 // frame header
1013 tmp = buf;
1014 buf += 2; // frame header size will be stored here
1015 bytestream_put_be16 (&buf, 0); // version 1
1016 bytestream_put_buffer(&buf, ctx->vendor, 4);
1017 bytestream_put_be16 (&buf, avctx->width);
1018 bytestream_put_be16 (&buf, avctx->height);
1019
1020 frame_flags = ctx->chroma_factor << 6;
1021 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1022 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1023 bytestream_put_byte (&buf, frame_flags);
1024
1025 bytestream_put_byte (&buf, 0); // reserved
1026 bytestream_put_byte (&buf, pic->color_primaries);
1027 bytestream_put_byte (&buf, pic->color_trc);
1028 bytestream_put_byte (&buf, pic->colorspace);
1029 bytestream_put_byte (&buf, 0x40 | (ctx->alpha_bits >> 3));
1030 bytestream_put_byte (&buf, 0); // reserved
1031 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1032 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
1033 // luma quantisation matrix
1034 for (i = 0; i < 64; i++)
1035 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1036 // chroma quantisation matrix
1037 for (i = 0; i < 64; i++)
1038 bytestream_put_byte(&buf, ctx->quant_mat[i]);
1039 } else {
1040 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
1041 }
1042 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
1043
1044 for (ctx->cur_picture_idx = 0;
1045 ctx->cur_picture_idx < ctx->pictures_per_frame;
1046 ctx->cur_picture_idx++) {
1047 // picture header
1048 picture_size_pos = buf + 1;
1049 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
1050 buf += 4; // picture data size will be stored here
1051 bytestream_put_be16 (&buf, ctx->slices_per_picture);
1052 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1053
1054 // seek table - will be filled during slice encoding
1055 slice_sizes = buf;
1056 buf += ctx->slices_per_picture * 2;
1057
1058 // slices
1059 if (!ctx->force_quant) {
1060 ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1061 ctx->mb_height);
1062 if (ret)
1063 return ret;
1064 }
1065
1066 for (y = 0; y < ctx->mb_height; y++) {
1067 int mbs_per_slice = ctx->mbs_per_slice;
1068 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1069 q = ctx->force_quant ? ctx->force_quant
1070 : ctx->slice_q[mb + y * ctx->slices_width];
1071
1072 while (ctx->mb_width - x < mbs_per_slice)
1073 mbs_per_slice >>= 1;
1074
1075 bytestream_put_byte(&buf, slice_hdr_size << 3);
1076 slice_hdr = buf;
1077 buf += slice_hdr_size - 1;
1078 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1079 uint8_t *start = pkt->data;
1080 // Recompute new size according to max_slice_size
1081 // and deduce delta
1082 int delta = 200 + (ctx->pictures_per_frame *
1083 ctx->slices_per_picture + 1) *
1084 max_slice_size - pkt_size;
1085
1086 delta = FFMAX(delta, 2 * max_slice_size);
1087 ctx->frame_size_upper_bound += delta;
1088
1089 if (!ctx->warn) {
1090 avpriv_request_sample(avctx,
1091 "Packet too small: is %i,"
1092 " needs %i (slice: %i). "
1093 "Correct allocation",
1094 pkt_size, delta, max_slice_size);
1095 ctx->warn = 1;
1096 }
1097
1098 ret = av_grow_packet(pkt, delta);
1099 if (ret < 0)
1100 return ret;
1101
1102 pkt_size += delta;
1103 // restore pointers
1104 orig_buf = pkt->data + (orig_buf - start);
1105 buf = pkt->data + (buf - start);
1106 picture_size_pos = pkt->data + (picture_size_pos - start);
1107 slice_sizes = pkt->data + (slice_sizes - start);
1108 slice_hdr = pkt->data + (slice_hdr - start);
1109 tmp = pkt->data + (tmp - start);
1110 }
1111 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1112 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1113 mbs_per_slice);
1114 if (ret < 0)
1115 return ret;
1116
1117 bytestream_put_byte(&slice_hdr, q);
1118 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1119 for (i = 0; i < ctx->num_planes - 1; i++) {
1120 bytestream_put_be16(&slice_hdr, sizes[i]);
1121 slice_size += sizes[i];
1122 }
1123 bytestream_put_be16(&slice_sizes, slice_size);
1124 buf += slice_size - slice_hdr_size;
1125 if (max_slice_size < slice_size)
1126 max_slice_size = slice_size;
1127 }
1128 }
1129
1130 picture_size = buf - (picture_size_pos - 1);
1131 bytestream_put_be32(&picture_size_pos, picture_size);
1132 }
1133
1134 orig_buf -= 8;
1135 frame_size = buf - orig_buf;
1136 bytestream_put_be32(&orig_buf, frame_size);
1137
1138 pkt->size = frame_size;
1139 *got_packet = 1;
1140
1141 return 0;
1142 }
1143
encode_close(AVCodecContext *avctx)1144 static av_cold int encode_close(AVCodecContext *avctx)
1145 {
1146 ProresContext *ctx = avctx->priv_data;
1147 int i;
1148
1149 if (ctx->tdata) {
1150 for (i = 0; i < avctx->thread_count; i++)
1151 av_freep(&ctx->tdata[i].nodes);
1152 }
1153 av_freep(&ctx->tdata);
1154 av_freep(&ctx->slice_q);
1155
1156 return 0;
1157 }
1158
prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src, ptrdiff_t linesize, int16_t *block)1159 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1160 ptrdiff_t linesize, int16_t *block)
1161 {
1162 int x, y;
1163 const uint16_t *tsrc = src;
1164
1165 for (y = 0; y < 8; y++) {
1166 for (x = 0; x < 8; x++)
1167 block[y * 8 + x] = tsrc[x];
1168 tsrc += linesize >> 1;
1169 }
1170 fdsp->fdct(block);
1171 }
1172
encode_init(AVCodecContext *avctx)1173 static av_cold int encode_init(AVCodecContext *avctx)
1174 {
1175 ProresContext *ctx = avctx->priv_data;
1176 int mps;
1177 int i, j;
1178 int min_quant, max_quant;
1179 int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1180
1181 avctx->bits_per_raw_sample = 10;
1182
1183 ctx->fdct = prores_fdct;
1184 ctx->scantable = interlaced ? ff_prores_interlaced_scan
1185 : ff_prores_progressive_scan;
1186 ff_fdctdsp_init(&ctx->fdsp, avctx);
1187
1188 mps = ctx->mbs_per_slice;
1189 if (mps & (mps - 1)) {
1190 av_log(avctx, AV_LOG_ERROR,
1191 "there should be an integer power of two MBs per slice\n");
1192 return AVERROR(EINVAL);
1193 }
1194 if (ctx->profile == PRORES_PROFILE_AUTO) {
1195 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1196 ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1197 !(desc->log2_chroma_w + desc->log2_chroma_h))
1198 ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1199 av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1200 "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1201 ? "4:4:4:4 profile because of the used input colorspace"
1202 : "HQ profile to keep best quality");
1203 }
1204 if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1205 if (ctx->profile != PRORES_PROFILE_4444 &&
1206 ctx->profile != PRORES_PROFILE_4444XQ) {
1207 // force alpha and warn
1208 av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1209 "encode alpha. Override with -profile if needed.\n");
1210 ctx->alpha_bits = 0;
1211 }
1212 if (ctx->alpha_bits & 7) {
1213 av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1214 return AVERROR(EINVAL);
1215 }
1216 avctx->bits_per_coded_sample = 32;
1217 } else {
1218 ctx->alpha_bits = 0;
1219 }
1220
1221 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1222 ? CFACTOR_Y422
1223 : CFACTOR_Y444;
1224 ctx->profile_info = prores_profile_info + ctx->profile;
1225 ctx->num_planes = 3 + !!ctx->alpha_bits;
1226
1227 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
1228
1229 if (interlaced)
1230 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1231 else
1232 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1233
1234 ctx->slices_width = ctx->mb_width / mps;
1235 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1236 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1237 ctx->pictures_per_frame = 1 + interlaced;
1238
1239 if (ctx->quant_sel == -1) {
1240 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1241 ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1242 } else {
1243 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1244 ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1245 }
1246
1247 if (strlen(ctx->vendor) != 4) {
1248 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1249 return AVERROR_INVALIDDATA;
1250 }
1251
1252 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1253 if (!ctx->force_quant) {
1254 if (!ctx->bits_per_mb) {
1255 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1256 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1257 ctx->pictures_per_frame)
1258 break;
1259 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
1260 if (ctx->alpha_bits)
1261 ctx->bits_per_mb *= 20;
1262 } else if (ctx->bits_per_mb < 128) {
1263 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1264 return AVERROR_INVALIDDATA;
1265 }
1266
1267 min_quant = ctx->profile_info->min_quant;
1268 max_quant = ctx->profile_info->max_quant;
1269 for (i = min_quant; i < MAX_STORED_Q; i++) {
1270 for (j = 0; j < 64; j++) {
1271 ctx->quants[i][j] = ctx->quant_mat[j] * i;
1272 ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1273 }
1274 }
1275
1276 ctx->slice_q = av_malloc_array(ctx->slices_per_picture, sizeof(*ctx->slice_q));
1277 if (!ctx->slice_q)
1278 return AVERROR(ENOMEM);
1279
1280 ctx->tdata = av_calloc(avctx->thread_count, sizeof(*ctx->tdata));
1281 if (!ctx->tdata)
1282 return AVERROR(ENOMEM);
1283
1284 for (j = 0; j < avctx->thread_count; j++) {
1285 ctx->tdata[j].nodes = av_malloc_array(ctx->slices_width + 1,
1286 TRELLIS_WIDTH
1287 * sizeof(*ctx->tdata->nodes));
1288 if (!ctx->tdata[j].nodes)
1289 return AVERROR(ENOMEM);
1290 for (i = min_quant; i < max_quant + 2; i++) {
1291 ctx->tdata[j].nodes[i].prev_node = -1;
1292 ctx->tdata[j].nodes[i].bits = 0;
1293 ctx->tdata[j].nodes[i].score = 0;
1294 }
1295 }
1296 } else {
1297 int ls = 0;
1298 int ls_chroma = 0;
1299
1300 if (ctx->force_quant > 64) {
1301 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1302 return AVERROR_INVALIDDATA;
1303 }
1304
1305 for (j = 0; j < 64; j++) {
1306 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1307 ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1308 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
1309 ls_chroma += av_log2((1 << 11) / ctx->quants_chroma[0][j]) * 2 + 1;
1310 }
1311
1312 ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1313 if (ctx->chroma_factor == CFACTOR_Y444)
1314 ctx->bits_per_mb += ls_chroma * 4;
1315 }
1316
1317 ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1318 ctx->slices_per_picture + 1) *
1319 (2 + 2 * ctx->num_planes +
1320 (mps * ctx->bits_per_mb) / 8)
1321 + 200;
1322
1323 if (ctx->alpha_bits) {
1324 // The alpha plane is run-coded and might exceed the bit budget.
1325 ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1326 ctx->slices_per_picture + 1) *
1327 /* num pixels per slice */ (ctx->mbs_per_slice * 256 *
1328 /* bits per pixel */ (1 + ctx->alpha_bits + 1) + 7 >> 3);
1329 }
1330
1331 avctx->codec_tag = ctx->profile_info->tag;
1332
1333 av_log(avctx, AV_LOG_DEBUG,
1334 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1335 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1336 interlaced ? "yes" : "no", ctx->bits_per_mb);
1337 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1338 ctx->frame_size_upper_bound);
1339
1340 return 0;
1341 }
1342
1343 #define OFFSET(x) offsetof(ProresContext, x)
1344 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1345
1346 static const AVOption options[] = {
1347 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1348 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1349 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1350 { .i64 = PRORES_PROFILE_AUTO },
1351 PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1352 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1353 0, 0, VE, "profile" },
1354 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1355 0, 0, VE, "profile" },
1356 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1357 0, 0, VE, "profile" },
1358 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1359 0, 0, VE, "profile" },
1360 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1361 0, 0, VE, "profile" },
1362 { "4444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1363 0, 0, VE, "profile" },
1364 { "4444xq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1365 0, 0, VE, "profile" },
1366 { "vendor", "vendor ID", OFFSET(vendor),
1367 AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1368 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1369 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1370 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1371 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1372 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1373 0, 0, VE, "quant_mat" },
1374 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1375 0, 0, VE, "quant_mat" },
1376 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1377 0, 0, VE, "quant_mat" },
1378 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1379 0, 0, VE, "quant_mat" },
1380 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1381 0, 0, VE, "quant_mat" },
1382 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1383 0, 0, VE, "quant_mat" },
1384 { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1385 { .i64 = 16 }, 0, 16, VE },
1386 { NULL }
1387 };
1388
1389 static const AVClass proresenc_class = {
1390 .class_name = "ProRes encoder",
1391 .item_name = av_default_item_name,
1392 .option = options,
1393 .version = LIBAVUTIL_VERSION_INT,
1394 };
1395
1396 const FFCodec ff_prores_ks_encoder = {
1397 .p.name = "prores_ks",
1398 .p.long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1399 .p.type = AVMEDIA_TYPE_VIDEO,
1400 .p.id = AV_CODEC_ID_PRORES,
1401 .priv_data_size = sizeof(ProresContext),
1402 .init = encode_init,
1403 .close = encode_close,
1404 FF_CODEC_ENCODE_CB(encode_frame),
1405 .p.capabilities = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1406 .p.pix_fmts = (const enum AVPixelFormat[]) {
1407 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1408 AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1409 },
1410 .p.priv_class = &proresenc_class,
1411 .p.profiles = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1412 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1413 };
1414