1/*
2 * Apple ProRes encoder
3 *
4 * Copyright (c) 2011 Anatoliy Wasserman
5 * Copyright (c) 2012 Konstantin Shishkov
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file
26 * Apple ProRes encoder (Anatoliy Wasserman version)
27 * Known FOURCCs: 'ap4h' (444), 'apch' (HQ), 'apcn' (422), 'apcs' (LT), 'acpo' (Proxy)
28 */
29
30#include "libavutil/mem_internal.h"
31#include "libavutil/opt.h"
32#include "avcodec.h"
33#include "codec_internal.h"
34#include "dct.h"
35#include "encode.h"
36#include "internal.h"
37#include "profiles.h"
38#include "proresdata.h"
39#include "put_bits.h"
40#include "bytestream.h"
41#include "fdctdsp.h"
42
43#define DEFAULT_SLICE_MB_WIDTH 8
44
45static const AVProfile profiles[] = {
46    { FF_PROFILE_PRORES_PROXY,    "apco"},
47    { FF_PROFILE_PRORES_LT,       "apcs"},
48    { FF_PROFILE_PRORES_STANDARD, "apcn"},
49    { FF_PROFILE_PRORES_HQ,       "apch"},
50    { FF_PROFILE_PRORES_4444,     "ap4h"},
51    { FF_PROFILE_PRORES_XQ,       "ap4x"},
52    { FF_PROFILE_UNKNOWN }
53};
54
55static const int qp_start_table[] = {  8, 3, 2, 1, 1, 1};
56static const int qp_end_table[]   = { 13, 9, 6, 6, 5, 4};
57static const int bitrate_table[]  = { 1000, 2100, 3500, 5400, 7000, 10000};
58
59static const int valid_primaries[]  = { AVCOL_PRI_RESERVED0, AVCOL_PRI_BT709, AVCOL_PRI_UNSPECIFIED, AVCOL_PRI_BT470BG,
60                                        AVCOL_PRI_SMPTE170M, AVCOL_PRI_BT2020, AVCOL_PRI_SMPTE431, AVCOL_PRI_SMPTE432, INT_MAX };
61static const int valid_trc[]        = { AVCOL_TRC_RESERVED0, AVCOL_TRC_BT709, AVCOL_TRC_UNSPECIFIED, AVCOL_TRC_SMPTE2084,
62                                        AVCOL_TRC_ARIB_STD_B67, INT_MAX };
63static const int valid_colorspace[] = { AVCOL_SPC_BT709, AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_SMPTE170M,
64                                        AVCOL_SPC_BT2020_NCL, INT_MAX };
65
66static const uint8_t QMAT_LUMA[6][64] = {
67    {
68         4,  7,  9, 11, 13, 14, 15, 63,
69         7,  7, 11, 12, 14, 15, 63, 63,
70         9, 11, 13, 14, 15, 63, 63, 63,
71        11, 11, 13, 14, 63, 63, 63, 63,
72        11, 13, 14, 63, 63, 63, 63, 63,
73        13, 14, 63, 63, 63, 63, 63, 63,
74        13, 63, 63, 63, 63, 63, 63, 63,
75        63, 63, 63, 63, 63, 63, 63, 63
76    }, {
77         4,  5,  6,  7,  9, 11, 13, 15,
78         5,  5,  7,  8, 11, 13, 15, 17,
79         6,  7,  9, 11, 13, 15, 15, 17,
80         7,  7,  9, 11, 13, 15, 17, 19,
81         7,  9, 11, 13, 14, 16, 19, 23,
82         9, 11, 13, 14, 16, 19, 23, 29,
83         9, 11, 13, 15, 17, 21, 28, 35,
84        11, 13, 16, 17, 21, 28, 35, 41
85    }, {
86         4,  4,  5,  5,  6,  7,  7,  9,
87         4,  4,  5,  6,  7,  7,  9,  9,
88         5,  5,  6,  7,  7,  9,  9, 10,
89         5,  5,  6,  7,  7,  9,  9, 10,
90         5,  6,  7,  7,  8,  9, 10, 12,
91         6,  7,  7,  8,  9, 10, 12, 15,
92         6,  7,  7,  9, 10, 11, 14, 17,
93         7,  7,  9, 10, 11, 14, 17, 21
94    }, {
95         4,  4,  4,  4,  4,  4,  4,  4,
96         4,  4,  4,  4,  4,  4,  4,  4,
97         4,  4,  4,  4,  4,  4,  4,  4,
98         4,  4,  4,  4,  4,  4,  4,  5,
99         4,  4,  4,  4,  4,  4,  5,  5,
100         4,  4,  4,  4,  4,  5,  5,  6,
101         4,  4,  4,  4,  5,  5,  6,  7,
102         4,  4,  4,  4,  5,  6,  7,  7
103    }, { /* 444 */
104        4,  4,  4,  4,  4,  4,  4,  4,
105        4,  4,  4,  4,  4,  4,  4,  4,
106        4,  4,  4,  4,  4,  4,  4,  4,
107        4,  4,  4,  4,  4,  4,  4,  5,
108        4,  4,  4,  4,  4,  4,  5,  5,
109        4,  4,  4,  4,  4,  5,  5,  6,
110        4,  4,  4,  4,  5,  5,  6,  7,
111        4,  4,  4,  4,  5,  6,  7,  7
112    }, { /* 444 XQ */
113        2,  2,  2,  2,  2,  2,  2,  2,
114        2,  2,  2,  2,  2,  2,  2,  2,
115        2,  2,  2,  2,  2,  2,  2,  2,
116        2,  2,  2,  2,  2,  2,  2,  3,
117        2,  2,  2,  2,  2,  2,  3,  3,
118        2,  2,  2,  2,  2,  3,  3,  3,
119        2,  2,  2,  2,  3,  3,  3,  4,
120        2,  2,  2,  2,  3,  3,  4,  4,
121    }
122};
123
124static const uint8_t QMAT_CHROMA[6][64] = {
125    {
126         4,  7,  9, 11, 13, 14, 63, 63,
127         7,  7, 11, 12, 14, 63, 63, 63,
128         9, 11, 13, 14, 63, 63, 63, 63,
129        11, 11, 13, 14, 63, 63, 63, 63,
130        11, 13, 14, 63, 63, 63, 63, 63,
131        13, 14, 63, 63, 63, 63, 63, 63,
132        13, 63, 63, 63, 63, 63, 63, 63,
133        63, 63, 63, 63, 63, 63, 63, 63
134    }, {
135         4,  5,  6,  7,  9, 11, 13, 15,
136         5,  5,  7,  8, 11, 13, 15, 17,
137         6,  7,  9, 11, 13, 15, 15, 17,
138         7,  7,  9, 11, 13, 15, 17, 19,
139         7,  9, 11, 13, 14, 16, 19, 23,
140         9, 11, 13, 14, 16, 19, 23, 29,
141         9, 11, 13, 15, 17, 21, 28, 35,
142        11, 13, 16, 17, 21, 28, 35, 41
143    }, {
144         4,  4,  5,  5,  6,  7,  7,  9,
145         4,  4,  5,  6,  7,  7,  9,  9,
146         5,  5,  6,  7,  7,  9,  9, 10,
147         5,  5,  6,  7,  7,  9,  9, 10,
148         5,  6,  7,  7,  8,  9, 10, 12,
149         6,  7,  7,  8,  9, 10, 12, 15,
150         6,  7,  7,  9, 10, 11, 14, 17,
151         7,  7,  9, 10, 11, 14, 17, 21
152    }, {
153         4,  4,  4,  4,  4,  4,  4,  4,
154         4,  4,  4,  4,  4,  4,  4,  4,
155         4,  4,  4,  4,  4,  4,  4,  4,
156         4,  4,  4,  4,  4,  4,  4,  5,
157         4,  4,  4,  4,  4,  4,  5,  5,
158         4,  4,  4,  4,  4,  5,  5,  6,
159         4,  4,  4,  4,  5,  5,  6,  7,
160         4,  4,  4,  4,  5,  6,  7,  7
161    }, { /* 444 */
162        4,  4,  4,  4,  4,  4,  4,  4,
163        4,  4,  4,  4,  4,  4,  4,  4,
164        4,  4,  4,  4,  4,  4,  4,  4,
165        4,  4,  4,  4,  4,  4,  4,  5,
166        4,  4,  4,  4,  4,  4,  5,  5,
167        4,  4,  4,  4,  4,  5,  5,  6,
168        4,  4,  4,  4,  5,  5,  6,  7,
169        4,  4,  4,  4,  5,  6,  7,  7
170    }, { /* 444 xq */
171        4,  4,  4,  4,  4,  4,  4,  4,
172        4,  4,  4,  4,  4,  4,  4,  4,
173        4,  4,  4,  4,  4,  4,  4,  4,
174        4,  4,  4,  4,  4,  4,  4,  5,
175        4,  4,  4,  4,  4,  4,  5,  5,
176        4,  4,  4,  4,  4,  5,  5,  6,
177        4,  4,  4,  4,  5,  5,  6,  7,
178        4,  4,  4,  4,  5,  6,  7,  7
179    }
180};
181
182
183typedef struct {
184    AVClass *class;
185    FDCTDSPContext fdsp;
186    uint8_t* fill_y;
187    uint8_t* fill_u;
188    uint8_t* fill_v;
189    uint8_t* fill_a;
190
191    int qmat_luma[16][64];
192    int qmat_chroma[16][64];
193    const uint8_t *scantable;
194
195    int is_422;
196    int need_alpha;
197    int is_interlaced;
198
199    char *vendor;
200} ProresContext;
201
202static void encode_codeword(PutBitContext *pb, int val, int codebook)
203{
204    unsigned int rice_order, exp_order, switch_bits, first_exp, exp, zeros;
205
206    /* number of bits to switch between rice and exp golomb */
207    switch_bits = codebook & 3;
208    rice_order  = codebook >> 5;
209    exp_order   = (codebook >> 2) & 7;
210
211    first_exp = ((switch_bits + 1) << rice_order);
212
213    if (val >= first_exp) { /* exp golomb */
214        val -= first_exp;
215        val += (1 << exp_order);
216        exp = av_log2(val);
217        zeros = exp - exp_order + switch_bits + 1;
218        put_bits(pb, zeros, 0);
219        put_bits(pb, exp + 1, val);
220    } else if (rice_order) {
221        put_bits(pb, (val >> rice_order), 0);
222        put_bits(pb, 1, 1);
223        put_sbits(pb, rice_order, val);
224    } else {
225        put_bits(pb, val, 0);
226        put_bits(pb, 1, 1);
227    }
228}
229
230#define QSCALE(qmat,ind,val) ((val) / ((qmat)[ind]))
231#define TO_GOLOMB(val) (((val) * 2) ^ ((val) >> 31))
232#define DIFF_SIGN(val, sign) (((val) >> 31) ^ (sign))
233#define IS_NEGATIVE(val) ((((val) >> 31) ^ -1) + 1)
234#define TO_GOLOMB2(val,sign) ((val)==0 ? 0 : ((val) << 1) + (sign))
235
236static av_always_inline int get_level(int val)
237{
238    int sign = (val >> 31);
239    return (val ^ sign) - sign;
240}
241
242#define FIRST_DC_CB 0xB8
243
244static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
245
246static void encode_dc_coeffs(PutBitContext *pb, int16_t *in,
247        int blocks_per_slice, int *qmat)
248{
249    int prev_dc, code;
250    int i, sign, idx;
251    int new_dc, delta, diff_sign, new_code;
252
253    prev_dc = QSCALE(qmat, 0, in[0] - 16384);
254    code = TO_GOLOMB(prev_dc);
255    encode_codeword(pb, code, FIRST_DC_CB);
256
257    code = 5; sign = 0; idx = 64;
258    for (i = 1; i < blocks_per_slice; i++, idx += 64) {
259        new_dc    = QSCALE(qmat, 0, in[idx] - 16384);
260        delta     = new_dc - prev_dc;
261        diff_sign = DIFF_SIGN(delta, sign);
262        new_code  = TO_GOLOMB2(get_level(delta), diff_sign);
263
264        encode_codeword(pb, new_code, dc_codebook[FFMIN(code, 6)]);
265
266        code      = new_code;
267        sign      = delta >> 31;
268        prev_dc   = new_dc;
269    }
270}
271
272static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29,
273        0x29, 0x29, 0x29, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x4C };
274static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28,
275        0x28, 0x28, 0x28, 0x4C };
276
277static void encode_ac_coeffs(PutBitContext *pb,
278        int16_t *in, int blocks_per_slice, int *qmat, const uint8_t ff_prores_scan[64])
279{
280    int prev_run = 4;
281    int prev_level = 2;
282
283    int run = 0, level, code, i, j;
284    for (i = 1; i < 64; i++) {
285        int indp = ff_prores_scan[i];
286        for (j = 0; j < blocks_per_slice; j++) {
287            int val = QSCALE(qmat, indp, in[(j << 6) + indp]);
288            if (val) {
289                encode_codeword(pb, run, run_to_cb[FFMIN(prev_run, 15)]);
290
291                prev_run   = run;
292                run        = 0;
293                level      = get_level(val);
294                code       = level - 1;
295
296                encode_codeword(pb, code, lev_to_cb[FFMIN(prev_level, 9)]);
297
298                prev_level = level;
299
300                put_bits(pb, 1, IS_NEGATIVE(val));
301            } else {
302                ++run;
303            }
304        }
305    }
306}
307
308static void get(uint8_t *pixels, int stride, int16_t* block)
309{
310    int i;
311
312    for (i = 0; i < 8; i++) {
313        AV_WN64(block, AV_RN64(pixels));
314        AV_WN64(block+4, AV_RN64(pixels+8));
315        pixels += stride;
316        block += 8;
317    }
318}
319
320static void fdct_get(FDCTDSPContext *fdsp, uint8_t *pixels, int stride, int16_t* block)
321{
322    get(pixels, stride, block);
323    fdsp->fdct(block);
324}
325
326static void calc_plane_dct(FDCTDSPContext *fdsp, uint8_t *src, int16_t * blocks, int src_stride, int mb_count, int chroma, int is_422)
327{
328    int16_t *block;
329    int i;
330
331    block = blocks;
332
333    if (!chroma) { /* Luma plane */
334        for (i = 0; i < mb_count; i++) {
335            fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
336            fdct_get(fdsp, src + 16,                  src_stride, block + (1 << 6));
337            fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (2 << 6));
338            fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
339
340            block += 256;
341            src   += 32;
342        }
343    } else if (chroma && is_422){ /* chroma plane 422 */
344        for (i = 0; i < mb_count; i++) {
345            fdct_get(fdsp, src,                  src_stride, block + (0 << 6));
346            fdct_get(fdsp, src + 8 * src_stride, src_stride, block + (1 << 6));
347            block += (256 >> 1);
348            src   += (32  >> 1);
349        }
350    } else { /* chroma plane 444 */
351        for (i = 0; i < mb_count; i++) {
352            fdct_get(fdsp, src,                       src_stride, block + (0 << 6));
353            fdct_get(fdsp, src +      8 * src_stride, src_stride, block + (1 << 6));
354            fdct_get(fdsp, src + 16,                  src_stride, block + (2 << 6));
355            fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
356
357            block += 256;
358            src   += 32;
359        }
360    }
361}
362
363static int encode_slice_plane(int16_t *blocks, int mb_count, uint8_t *buf, unsigned buf_size, int *qmat, int sub_sample_chroma,
364                              const uint8_t ff_prores_scan[64])
365{
366    int blocks_per_slice;
367    PutBitContext pb;
368
369    blocks_per_slice = mb_count << (2 - sub_sample_chroma);
370    init_put_bits(&pb, buf, buf_size);
371
372    encode_dc_coeffs(&pb, blocks, blocks_per_slice, qmat);
373    encode_ac_coeffs(&pb, blocks, blocks_per_slice, qmat, ff_prores_scan);
374
375    flush_put_bits(&pb);
376    return put_bits_ptr(&pb) - pb.buf;
377}
378
379static av_always_inline unsigned encode_slice_data(AVCodecContext *avctx,
380                                                   int16_t * blocks_y, int16_t * blocks_u, int16_t * blocks_v,
381                                                   unsigned mb_count, uint8_t *buf, unsigned data_size,
382                                                   unsigned* y_data_size, unsigned* u_data_size, unsigned* v_data_size,
383                                                   int qp)
384{
385    ProresContext* ctx = avctx->priv_data;
386
387    *y_data_size = encode_slice_plane(blocks_y, mb_count,
388                                      buf, data_size, ctx->qmat_luma[qp - 1], 0, ctx->scantable);
389
390    if (!(avctx->flags & AV_CODEC_FLAG_GRAY)) {
391        *u_data_size = encode_slice_plane(blocks_u, mb_count, buf + *y_data_size, data_size - *y_data_size,
392                                          ctx->qmat_chroma[qp - 1], ctx->is_422, ctx->scantable);
393
394        *v_data_size = encode_slice_plane(blocks_v, mb_count, buf + *y_data_size + *u_data_size,
395                                          data_size - *y_data_size - *u_data_size,
396                                          ctx->qmat_chroma[qp - 1], ctx->is_422, ctx->scantable);
397    }
398
399    return *y_data_size + *u_data_size + *v_data_size;
400}
401
402static void put_alpha_diff(PutBitContext *pb, int cur, int prev)
403{
404    const int abits = 16;
405    const int dbits = 7;
406    const int dsize = 1 << dbits - 1;
407    int diff = cur - prev;
408
409    diff = av_mod_uintp2(diff, abits);
410    if (diff >= (1 << abits) - dsize)
411        diff -= 1 << abits;
412    if (diff < -dsize || diff > dsize || !diff) {
413        put_bits(pb, 1, 1);
414        put_bits(pb, abits, diff);
415    } else {
416        put_bits(pb, 1, 0);
417        put_bits(pb, dbits - 1, FFABS(diff) - 1);
418        put_bits(pb, 1, diff < 0);
419    }
420}
421
422static inline void put_alpha_run(PutBitContext *pb, int run)
423{
424    if (run) {
425        put_bits(pb, 1, 0);
426        if (run < 0x10)
427            put_bits(pb, 4, run);
428        else
429            put_bits(pb, 15, run);
430    } else {
431        put_bits(pb, 1, 1);
432    }
433}
434
435static av_always_inline int encode_alpha_slice_data(AVCodecContext *avctx, int8_t * src_a,
436                                                   unsigned mb_count, uint8_t *buf, unsigned data_size, unsigned* a_data_size)
437{
438    const int abits = 16;
439    const int mask  = (1 << abits) - 1;
440    const int num_coeffs = mb_count * 256;
441    int prev = mask, cur;
442    int idx = 0;
443    int run = 0;
444    int16_t * blocks = (int16_t *)src_a;
445    PutBitContext pb;
446    init_put_bits(&pb, buf, data_size);
447
448    cur = blocks[idx++];
449    put_alpha_diff(&pb, cur, prev);
450    prev = cur;
451    do {
452        cur = blocks[idx++];
453        if (cur != prev) {
454            put_alpha_run (&pb, run);
455            put_alpha_diff(&pb, cur, prev);
456            prev = cur;
457            run  = 0;
458        } else {
459            run++;
460        }
461    } while (idx < num_coeffs);
462    if (run)
463        put_alpha_run(&pb, run);
464    flush_put_bits(&pb);
465    *a_data_size = put_bytes_output(&pb);
466
467    if (put_bits_left(&pb) < 0) {
468        av_log(avctx, AV_LOG_ERROR,
469               "Underestimated required buffer size.\n");
470        return AVERROR_BUG;
471    } else {
472        return 0;
473    }
474}
475
476static inline void subimage_with_fill_template(uint16_t *src, unsigned x, unsigned y,
477                                               unsigned stride, unsigned width, unsigned height, uint16_t *dst,
478                                               unsigned dst_width, unsigned dst_height, int is_alpha_plane,
479                                               int is_interlaced, int is_top_field)
480{
481    int box_width = FFMIN(width - x, dst_width);
482    int i, j, src_stride, box_height;
483    uint16_t last_pix, *last_line;
484
485    if (!is_interlaced) {
486        src_stride = stride >> 1;
487        src += y * src_stride + x;
488        box_height = FFMIN(height - y, dst_height);
489    } else {
490        src_stride = stride; /* 2 lines stride */
491        src += y * src_stride + x;
492        box_height = FFMIN(height/2 - y, dst_height);
493        if (!is_top_field)
494            src += stride >> 1;
495    }
496
497    for (i = 0; i < box_height; ++i) {
498        for (j = 0; j < box_width; ++j) {
499            if (!is_alpha_plane) {
500                dst[j] = src[j];
501            } else {
502                dst[j] = src[j] << 6; /* alpha 10b to 16b */
503            }
504        }
505        if (!is_alpha_plane) {
506            last_pix = dst[j - 1];
507        } else {
508            last_pix = dst[j - 1] << 6; /* alpha 10b to 16b */
509        }
510        for (; j < dst_width; j++)
511            dst[j] = last_pix;
512        src += src_stride;
513        dst += dst_width;
514    }
515    last_line = dst - dst_width;
516    for (; i < dst_height; i++) {
517        for (j = 0; j < dst_width; ++j) {
518            dst[j] = last_line[j];
519        }
520        dst += dst_width;
521    }
522}
523
524static void subimage_with_fill(uint16_t *src, unsigned x, unsigned y,
525        unsigned stride, unsigned width, unsigned height, uint16_t *dst,
526        unsigned dst_width, unsigned dst_height, int is_interlaced, int is_top_field)
527{
528    subimage_with_fill_template(src, x, y, stride, width, height, dst, dst_width, dst_height, 0, is_interlaced, is_top_field);
529}
530
531/* reorganize alpha data and convert 10b -> 16b */
532static void subimage_alpha_with_fill(uint16_t *src, unsigned x, unsigned y,
533                               unsigned stride, unsigned width, unsigned height, uint16_t *dst,
534                               unsigned dst_width, unsigned dst_height, int is_interlaced, int is_top_field)
535{
536    subimage_with_fill_template(src, x, y, stride, width, height, dst, dst_width, dst_height, 1, is_interlaced, is_top_field);
537}
538
539static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, int mb_x,
540        int mb_y, unsigned mb_count, uint8_t *buf, unsigned data_size,
541        int unsafe, int *qp, int is_interlaced, int is_top_field)
542{
543    int luma_stride, chroma_stride, alpha_stride = 0;
544    ProresContext* ctx = avctx->priv_data;
545    int hdr_size = 6 + (ctx->need_alpha * 2); /* v data size is write when there is alpha */
546    int ret = 0, slice_size;
547    uint8_t *dest_y, *dest_u, *dest_v;
548    unsigned y_data_size = 0, u_data_size = 0, v_data_size = 0, a_data_size = 0;
549    FDCTDSPContext *fdsp = &ctx->fdsp;
550    int tgt_bits   = (mb_count * bitrate_table[avctx->profile]) >> 2;
551    int low_bytes  = (tgt_bits - (tgt_bits >> 3)) >> 3; // 12% bitrate fluctuation
552    int high_bytes = (tgt_bits + (tgt_bits >> 3)) >> 3;
553
554    LOCAL_ALIGNED(16, int16_t, blocks_y, [DEFAULT_SLICE_MB_WIDTH << 8]);
555    LOCAL_ALIGNED(16, int16_t, blocks_u, [DEFAULT_SLICE_MB_WIDTH << 8]);
556    LOCAL_ALIGNED(16, int16_t, blocks_v, [DEFAULT_SLICE_MB_WIDTH << 8]);
557
558    luma_stride   = pic->linesize[0];
559    chroma_stride = pic->linesize[1];
560
561    if (ctx->need_alpha)
562        alpha_stride = pic->linesize[3];
563
564    if (!is_interlaced) {
565        dest_y = pic->data[0] + (mb_y << 4) * luma_stride   + (mb_x << 5);
566        dest_u = pic->data[1] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
567        dest_v = pic->data[2] + (mb_y << 4) * chroma_stride + (mb_x << (5 - ctx->is_422));
568    } else {
569        dest_y = pic->data[0] + (mb_y << 4) * luma_stride * 2   + (mb_x << 5);
570        dest_u = pic->data[1] + (mb_y << 4) * chroma_stride * 2 + (mb_x << (5 - ctx->is_422));
571        dest_v = pic->data[2] + (mb_y << 4) * chroma_stride * 2 + (mb_x << (5 - ctx->is_422));
572        if (!is_top_field){ /* bottom field, offset dest */
573            dest_y += luma_stride;
574            dest_u += chroma_stride;
575            dest_v += chroma_stride;
576        }
577    }
578
579    if (unsafe) {
580        subimage_with_fill((uint16_t *) pic->data[0], mb_x << 4, mb_y << 4,
581                luma_stride, avctx->width, avctx->height,
582                (uint16_t *) ctx->fill_y, mb_count << 4, 16, is_interlaced, is_top_field);
583        subimage_with_fill((uint16_t *) pic->data[1], mb_x << (4 - ctx->is_422), mb_y << 4,
584                           chroma_stride, avctx->width >> ctx->is_422, avctx->height,
585                           (uint16_t *) ctx->fill_u, mb_count << (4 - ctx->is_422), 16, is_interlaced, is_top_field);
586        subimage_with_fill((uint16_t *) pic->data[2], mb_x << (4 - ctx->is_422), mb_y << 4,
587                           chroma_stride, avctx->width >> ctx->is_422, avctx->height,
588                           (uint16_t *) ctx->fill_v, mb_count << (4 - ctx->is_422), 16, is_interlaced, is_top_field);
589
590        /* no need for interlaced special case, data already reorganized in subimage_with_fill */
591        calc_plane_dct(fdsp, ctx->fill_y, blocks_y, mb_count <<  5,                mb_count, 0, 0);
592        calc_plane_dct(fdsp, ctx->fill_u, blocks_u, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
593        calc_plane_dct(fdsp, ctx->fill_v, blocks_v, mb_count << (5 - ctx->is_422), mb_count, 1, ctx->is_422);
594
595        slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
596                          mb_count, buf + hdr_size, data_size - hdr_size,
597                          &y_data_size, &u_data_size, &v_data_size,
598                          *qp);
599    } else {
600        if (!is_interlaced) {
601            calc_plane_dct(fdsp, dest_y, blocks_y, luma_stride, mb_count, 0, 0);
602            calc_plane_dct(fdsp, dest_u, blocks_u, chroma_stride, mb_count, 1, ctx->is_422);
603            calc_plane_dct(fdsp, dest_v, blocks_v, chroma_stride, mb_count, 1, ctx->is_422);
604        } else {
605            calc_plane_dct(fdsp, dest_y, blocks_y, luma_stride   * 2, mb_count, 0, 0);
606            calc_plane_dct(fdsp, dest_u, blocks_u, chroma_stride * 2, mb_count, 1, ctx->is_422);
607            calc_plane_dct(fdsp, dest_v, blocks_v, chroma_stride * 2, mb_count, 1, ctx->is_422);
608        }
609
610        slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
611                          mb_count, buf + hdr_size, data_size - hdr_size,
612                          &y_data_size, &u_data_size, &v_data_size,
613                          *qp);
614
615        if (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]) {
616            do {
617                *qp += 1;
618                slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
619                                               mb_count, buf + hdr_size, data_size - hdr_size,
620                                               &y_data_size, &u_data_size, &v_data_size,
621                                               *qp);
622            } while (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]);
623        } else if (slice_size < low_bytes && *qp
624                > qp_start_table[avctx->profile]) {
625            do {
626                *qp -= 1;
627                slice_size = encode_slice_data(avctx, blocks_y, blocks_u, blocks_v,
628                                               mb_count, buf + hdr_size, data_size - hdr_size,
629                                               &y_data_size, &u_data_size, &v_data_size,
630                                               *qp);
631            } while (slice_size < low_bytes && *qp > qp_start_table[avctx->profile]);
632        }
633    }
634
635    buf[0] = hdr_size << 3;
636    buf[1] = *qp;
637    AV_WB16(buf + 2, y_data_size);
638    AV_WB16(buf + 4, u_data_size);
639
640    if (ctx->need_alpha) {
641        AV_WB16(buf + 6, v_data_size); /* write v data size only if there is alpha */
642
643        subimage_alpha_with_fill((uint16_t *) pic->data[3], mb_x << 4, mb_y << 4,
644                           alpha_stride, avctx->width, avctx->height,
645                           (uint16_t *) ctx->fill_a, mb_count << 4, 16, is_interlaced, is_top_field);
646        ret = encode_alpha_slice_data(avctx, ctx->fill_a, mb_count,
647                                      buf + hdr_size + slice_size,
648                                      data_size - hdr_size - slice_size, &a_data_size);
649    }
650
651    if (ret != 0) {
652        return ret;
653    }
654    return hdr_size + y_data_size + u_data_size + v_data_size + a_data_size;
655}
656
657static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
658        uint8_t *buf, const int buf_size, const int picture_index, const int is_top_field)
659{
660    ProresContext *ctx = avctx->priv_data;
661    int mb_width = (avctx->width + 15) >> 4;
662    int hdr_size, sl_size, i;
663    int mb_y, sl_data_size, qp, mb_height, picture_height, unsafe_mb_height_limit;
664    int unsafe_bot, unsafe_right;
665    uint8_t *sl_data, *sl_data_sizes;
666    int slice_per_line = 0, rem = mb_width;
667
668    if (!ctx->is_interlaced) { /* progressive encoding */
669        mb_height = (avctx->height + 15) >> 4;
670        unsafe_mb_height_limit = mb_height;
671    } else {
672        if (is_top_field) {
673            picture_height = (avctx->height + 1) / 2;
674        } else {
675            picture_height = avctx->height / 2;
676        }
677        mb_height = (picture_height + 15) >> 4;
678        unsafe_mb_height_limit = mb_height;
679    }
680
681    for (i = av_log2(DEFAULT_SLICE_MB_WIDTH); i >= 0; --i) {
682        slice_per_line += rem >> i;
683        rem &= (1 << i) - 1;
684    }
685
686    qp = qp_start_table[avctx->profile];
687    hdr_size = 8; sl_data_size = buf_size - hdr_size;
688    sl_data_sizes = buf + hdr_size;
689    sl_data = sl_data_sizes + (slice_per_line * mb_height * 2);
690    for (mb_y = 0; mb_y < mb_height; mb_y++) {
691        int mb_x = 0;
692        int slice_mb_count = DEFAULT_SLICE_MB_WIDTH;
693        while (mb_x < mb_width) {
694            while (mb_width - mb_x < slice_mb_count)
695                slice_mb_count >>= 1;
696
697            unsafe_bot = (avctx->height & 0xf) && (mb_y == unsafe_mb_height_limit - 1);
698            unsafe_right = (avctx->width & 0xf) && (mb_x + slice_mb_count == mb_width);
699
700            sl_size = encode_slice(avctx, pic, mb_x, mb_y, slice_mb_count,
701                    sl_data, sl_data_size, unsafe_bot || unsafe_right, &qp, ctx->is_interlaced, is_top_field);
702            if (sl_size < 0){
703                return sl_size;
704            }
705
706            bytestream_put_be16(&sl_data_sizes, sl_size);
707            sl_data           += sl_size;
708            sl_data_size      -= sl_size;
709            mb_x              += slice_mb_count;
710        }
711    }
712
713    buf[0] = hdr_size << 3;
714    AV_WB32(buf + 1, sl_data - buf);
715    AV_WB16(buf + 5, slice_per_line * mb_height); /* picture size */
716    buf[7] = av_log2(DEFAULT_SLICE_MB_WIDTH) << 4; /* number of slices */
717
718    return sl_data - buf;
719}
720
721static int prores_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
722                               const AVFrame *pict, int *got_packet)
723{
724    ProresContext *ctx = avctx->priv_data;
725    int header_size = 148;
726    uint8_t *buf;
727    int compress_frame_size, pic_size, ret, is_top_field_first = 0;
728    uint8_t frame_flags;
729    int frame_size = FFALIGN(avctx->width, 16) * FFALIGN(avctx->height, 16)*16 + 500 + AV_INPUT_BUFFER_MIN_SIZE; //FIXME choose tighter limit
730
731
732    if ((ret = ff_alloc_packet(avctx, pkt, frame_size + AV_INPUT_BUFFER_MIN_SIZE)) < 0)
733        return ret;
734
735    buf = pkt->data;
736    compress_frame_size = 8 + header_size;
737
738    bytestream_put_be32(&buf, compress_frame_size);/* frame size will be update after picture(s) encoding */
739    bytestream_put_buffer(&buf, "icpf", 4);
740
741    bytestream_put_be16(&buf, header_size);
742    bytestream_put_be16(&buf, 0); /* version */
743    bytestream_put_buffer(&buf, ctx->vendor, 4);
744    bytestream_put_be16(&buf, avctx->width);
745    bytestream_put_be16(&buf, avctx->height);
746    frame_flags = 0x82; /* 422 not interlaced */
747    if (avctx->profile >= FF_PROFILE_PRORES_4444) /* 4444 or 4444 Xq */
748        frame_flags |= 0x40; /* 444 chroma */
749    if (ctx->is_interlaced) {
750        if (pict->top_field_first || !pict->interlaced_frame) { /* tff frame or progressive frame interpret as tff */
751            av_log(avctx, AV_LOG_DEBUG, "use interlaced encoding, top field first\n");
752            frame_flags |= 0x04; /* interlaced tff */
753            is_top_field_first = 1;
754        } else {
755            av_log(avctx, AV_LOG_DEBUG, "use interlaced encoding, bottom field first\n");
756            frame_flags |= 0x08; /* interlaced bff */
757        }
758    } else {
759        av_log(avctx, AV_LOG_DEBUG, "use progressive encoding\n");
760    }
761    *buf++ = frame_flags;
762    *buf++ = 0; /* reserved */
763    /* only write color properties, if valid value. set to unspecified otherwise */
764    *buf++ = ff_int_from_list_or_default(avctx, "frame color primaries", pict->color_primaries, valid_primaries, 0);
765    *buf++ = ff_int_from_list_or_default(avctx, "frame color trc", pict->color_trc, valid_trc, 0);
766    *buf++ = ff_int_from_list_or_default(avctx, "frame colorspace", pict->colorspace, valid_colorspace, 0);
767    if (avctx->profile >= FF_PROFILE_PRORES_4444) {
768        if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
769            *buf++ = 0xA0;/* src b64a and no alpha */
770        } else {
771            *buf++ = 0xA2;/* src b64a and 16b alpha */
772        }
773    } else {
774        *buf++ = 32;/* src v210 and no alpha */
775    }
776    *buf++ = 0; /* reserved */
777    *buf++ = 3; /* luma and chroma matrix present */
778
779    bytestream_put_buffer(&buf, QMAT_LUMA[avctx->profile],   64);
780    bytestream_put_buffer(&buf, QMAT_CHROMA[avctx->profile], 64);
781
782    pic_size = prores_encode_picture(avctx, pict, buf,
783                                     pkt->size - compress_frame_size, 0, is_top_field_first);/* encode progressive or first field */
784    if (pic_size < 0) {
785        return pic_size;
786    }
787    compress_frame_size += pic_size;
788
789    if (ctx->is_interlaced) { /* encode second field */
790        pic_size = prores_encode_picture(avctx, pict, pkt->data + compress_frame_size,
791                                         pkt->size - compress_frame_size, 1, !is_top_field_first);
792        if (pic_size < 0) {
793            return pic_size;
794        }
795        compress_frame_size += pic_size;
796    }
797
798    AV_WB32(pkt->data, compress_frame_size);/* update frame size */
799    pkt->size = compress_frame_size;
800    *got_packet = 1;
801
802    return 0;
803}
804
805static void scale_mat(const uint8_t* src, int* dst, int scale)
806{
807    int i;
808    for (i = 0; i < 64; i++)
809        dst[i] = src[i] * scale;
810}
811
812static av_cold int prores_encode_init(AVCodecContext *avctx)
813{
814    int i;
815    ProresContext* ctx = avctx->priv_data;
816
817    avctx->bits_per_raw_sample = 10;
818    ctx->need_alpha = 0;
819    ctx->is_interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
820    if (ctx->is_interlaced) {
821        ctx->scantable = ff_prores_interlaced_scan;
822    } else {
823        ctx->scantable = ff_prores_progressive_scan;
824    }
825
826    if (avctx->width & 0x1) {
827        av_log(avctx, AV_LOG_ERROR,
828                "frame width needs to be multiple of 2\n");
829        return AVERROR(EINVAL);
830    }
831
832    if (avctx->width > 65534 || avctx->height > 65535) {
833        av_log(avctx, AV_LOG_ERROR,
834                "The maximum dimensions are 65534x65535\n");
835        return AVERROR(EINVAL);
836    }
837
838    if (strlen(ctx->vendor) != 4) {
839        av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
840        return AVERROR(EINVAL);
841    }
842
843    if (avctx->profile == FF_PROFILE_UNKNOWN) {
844        if (avctx->pix_fmt == AV_PIX_FMT_YUV422P10) {
845            avctx->profile = FF_PROFILE_PRORES_STANDARD;
846            av_log(avctx, AV_LOG_INFO,
847                "encoding with ProRes standard (apcn) profile\n");
848        } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10) {
849            avctx->profile = FF_PROFILE_PRORES_4444;
850            av_log(avctx, AV_LOG_INFO,
851                   "encoding with ProRes 4444 (ap4h) profile\n");
852        } else if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
853            avctx->profile = FF_PROFILE_PRORES_4444;
854            av_log(avctx, AV_LOG_INFO,
855                   "encoding with ProRes 4444+ (ap4h) profile\n");
856        }
857    } else if (avctx->profile < FF_PROFILE_PRORES_PROXY
858            || avctx->profile > FF_PROFILE_PRORES_XQ) {
859        av_log(
860                avctx,
861                AV_LOG_ERROR,
862                "unknown profile %d, use [0 - apco, 1 - apcs, 2 - apcn (default), 3 - apch, 4 - ap4h, 5 - ap4x]\n",
863                avctx->profile);
864        return AVERROR(EINVAL);
865    } else if ((avctx->pix_fmt == AV_PIX_FMT_YUV422P10) && (avctx->profile > FF_PROFILE_PRORES_HQ)){
866        av_log(avctx, AV_LOG_ERROR,
867               "encoding with ProRes 444/Xq (ap4h/ap4x) profile, need YUV444P10 input\n");
868        return AVERROR(EINVAL);
869    }  else if ((avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10)
870                && (avctx->profile < FF_PROFILE_PRORES_4444)){
871        av_log(avctx, AV_LOG_ERROR,
872               "encoding with ProRes Proxy/LT/422/422 HQ (apco, apcs, apcn, ap4h) profile, need YUV422P10 input\n");
873        return AVERROR(EINVAL);
874    }
875
876    if (avctx->profile < FF_PROFILE_PRORES_4444) { /* 422 versions */
877        ctx->is_422 = 1;
878        if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
879            ctx->fill_y = av_malloc(4 * (DEFAULT_SLICE_MB_WIDTH << 8));
880            if (!ctx->fill_y)
881                return AVERROR(ENOMEM);
882            ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
883            ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 8);
884        }
885    } else { /* 444 */
886        ctx->is_422 = 0;
887        if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
888            ctx->fill_y = av_malloc(3 * (DEFAULT_SLICE_MB_WIDTH << 9));
889            if (!ctx->fill_y)
890                return AVERROR(ENOMEM);
891            ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
892            ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 9);
893        }
894        if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
895            ctx->need_alpha = 1;
896            ctx->fill_a = av_malloc(DEFAULT_SLICE_MB_WIDTH << 9); /* 8 blocks x 16px x 16px x sizeof (uint16) */
897            if (!ctx->fill_a)
898                return AVERROR(ENOMEM);
899        }
900    }
901
902    ff_fdctdsp_init(&ctx->fdsp, avctx);
903
904    avctx->codec_tag = AV_RL32((const uint8_t*)profiles[avctx->profile].name);
905
906    for (i = 1; i <= 16; i++) {
907        scale_mat(QMAT_LUMA[avctx->profile]  , ctx->qmat_luma[i - 1]  , i);
908        scale_mat(QMAT_CHROMA[avctx->profile], ctx->qmat_chroma[i - 1], i);
909    }
910
911    return 0;
912}
913
914static av_cold int prores_encode_close(AVCodecContext *avctx)
915{
916    ProresContext* ctx = avctx->priv_data;
917    av_freep(&ctx->fill_y);
918    av_freep(&ctx->fill_a);
919
920    return 0;
921}
922
923#define OFFSET(x) offsetof(ProresContext, x)
924#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
925
926static const AVOption options[] = {
927    { "vendor", "vendor ID", OFFSET(vendor), AV_OPT_TYPE_STRING, { .str = "fmpg" }, 0, 0, VE },
928    { NULL }
929};
930
931static const AVClass prores_enc_class = {
932    .class_name = "ProRes encoder",
933    .item_name  = av_default_item_name,
934    .option     = options,
935    .version    = LIBAVUTIL_VERSION_INT,
936};
937
938static const enum AVPixelFormat pix_fmts[] = {
939    AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
940    AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
941};
942
943const FFCodec ff_prores_aw_encoder = {
944    .p.name         = "prores_aw",
945    .p.long_name    = NULL_IF_CONFIG_SMALL("Apple ProRes"),
946    .p.type         = AVMEDIA_TYPE_VIDEO,
947    .p.id           = AV_CODEC_ID_PRORES,
948    .p.pix_fmts     = pix_fmts,
949    .priv_data_size = sizeof(ProresContext),
950    .init           = prores_encode_init,
951    .close          = prores_encode_close,
952    FF_CODEC_ENCODE_CB(prores_encode_frame),
953    .p.capabilities = AV_CODEC_CAP_FRAME_THREADS,
954    .p.priv_class   = &prores_enc_class,
955    .p.profiles     = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
956    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
957};
958
959const FFCodec ff_prores_encoder = {
960    .p.name         = "prores",
961    .p.long_name    = NULL_IF_CONFIG_SMALL("Apple ProRes"),
962    .p.type         = AVMEDIA_TYPE_VIDEO,
963    .p.id           = AV_CODEC_ID_PRORES,
964    .p.pix_fmts     = pix_fmts,
965    .priv_data_size = sizeof(ProresContext),
966    .init           = prores_encode_init,
967    .close          = prores_encode_close,
968    FF_CODEC_ENCODE_CB(prores_encode_frame),
969    .p.capabilities = AV_CODEC_CAP_FRAME_THREADS,
970    .p.priv_class   = &prores_enc_class,
971    .p.profiles     = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
972    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
973};
974