1/*
2 * MagicYUV encoder
3 * Copyright (c) 2017 Paul B Mahol
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include <stdlib.h>
23#include <string.h>
24
25#include "libavutil/opt.h"
26#include "libavutil/pixdesc.h"
27#include "libavutil/qsort.h"
28
29#include "avcodec.h"
30#include "bytestream.h"
31#include "codec_internal.h"
32#include "encode.h"
33#include "put_bits.h"
34#include "thread.h"
35#include "lossless_videoencdsp.h"
36
37#define MAGICYUV_EXTRADATA_SIZE 32
38
39typedef enum Prediction {
40    LEFT = 1,
41    GRADIENT,
42    MEDIAN,
43} Prediction;
44
45typedef struct HuffEntry {
46    uint8_t  len;
47    uint32_t code;
48} HuffEntry;
49
50typedef struct PTable {
51    int     value;  ///< input value
52    int64_t prob;   ///< number of occurences of this value in input
53} PTable;
54
55typedef struct MagicYUVContext {
56    const AVClass       *class;
57    int                  frame_pred;
58    PutBitContext        pb;
59    int                  planes;
60    uint8_t              format;
61    int                  slice_height;
62    int                  nb_slices;
63    int                  correlate;
64    int                  hshift[4];
65    int                  vshift[4];
66    uint8_t             *slices[4];
67    unsigned             slice_pos[4];
68    unsigned             tables_size;
69    uint8_t             *decorrelate_buf[2];
70    HuffEntry            he[4][256];
71    LLVidEncDSPContext   llvidencdsp;
72    void (*predict)(struct MagicYUVContext *s, const uint8_t *src, uint8_t *dst,
73                    ptrdiff_t stride, int width, int height);
74} MagicYUVContext;
75
76static void left_predict(MagicYUVContext *s,
77                         const uint8_t *src, uint8_t *dst, ptrdiff_t stride,
78                         int width, int height)
79{
80    uint8_t prev = 0;
81    int i, j;
82
83    for (i = 0; i < width; i++) {
84        dst[i] = src[i] - prev;
85        prev   = src[i];
86    }
87    dst += width;
88    src += stride;
89    for (j = 1; j < height; j++) {
90        prev = src[-stride];
91        for (i = 0; i < width; i++) {
92            dst[i] = src[i] - prev;
93            prev   = src[i];
94        }
95        dst += width;
96        src += stride;
97    }
98}
99
100static void gradient_predict(MagicYUVContext *s,
101                             const uint8_t *src, uint8_t *dst, ptrdiff_t stride,
102                             int width, int height)
103{
104    int left = 0, top, lefttop;
105    int i, j;
106
107    for (i = 0; i < width; i++) {
108        dst[i] = src[i] - left;
109        left   = src[i];
110    }
111    dst += width;
112    src += stride;
113    for (j = 1; j < height; j++) {
114        top = src[-stride];
115        left = src[0] - top;
116        dst[0] = left;
117        for (i = 1; i < width; i++) {
118            top = src[i - stride];
119            lefttop = src[i - (stride + 1)];
120            left = src[i-1];
121            dst[i] = (src[i] - top) - left + lefttop;
122        }
123        dst += width;
124        src += stride;
125    }
126}
127
128static void median_predict(MagicYUVContext *s,
129                           const uint8_t *src, uint8_t *dst, ptrdiff_t stride,
130                           int width, int height)
131{
132    int left = 0, lefttop;
133    int i, j;
134
135    for (i = 0; i < width; i++) {
136        dst[i] = src[i] - left;
137        left   = src[i];
138    }
139    dst += width;
140    src += stride;
141    for (j = 1; j < height; j++) {
142        left = lefttop = src[-stride];
143        s->llvidencdsp.sub_median_pred(dst, src - stride, src, width, &left, &lefttop);
144        dst += width;
145        src += stride;
146    }
147}
148
149static av_cold int magy_encode_init(AVCodecContext *avctx)
150{
151    MagicYUVContext *s = avctx->priv_data;
152    PutByteContext pb;
153    int i;
154
155    switch (avctx->pix_fmt) {
156    case AV_PIX_FMT_GBRP:
157        avctx->codec_tag = MKTAG('M', '8', 'R', 'G');
158        s->correlate = 1;
159        s->format = 0x65;
160        break;
161    case AV_PIX_FMT_GBRAP:
162        avctx->codec_tag = MKTAG('M', '8', 'R', 'A');
163        s->correlate = 1;
164        s->format = 0x66;
165        break;
166    case AV_PIX_FMT_YUV420P:
167        avctx->codec_tag = MKTAG('M', '8', 'Y', '0');
168        s->hshift[1] =
169        s->vshift[1] =
170        s->hshift[2] =
171        s->vshift[2] = 1;
172        s->format = 0x69;
173        break;
174    case AV_PIX_FMT_YUV422P:
175        avctx->codec_tag = MKTAG('M', '8', 'Y', '2');
176        s->hshift[1] =
177        s->hshift[2] = 1;
178        s->format = 0x68;
179        break;
180    case AV_PIX_FMT_YUV444P:
181        avctx->codec_tag = MKTAG('M', '8', 'Y', '4');
182        s->format = 0x67;
183        break;
184    case AV_PIX_FMT_YUVA444P:
185        avctx->codec_tag = MKTAG('M', '8', 'Y', 'A');
186        s->format = 0x6a;
187        break;
188    case AV_PIX_FMT_GRAY8:
189        avctx->codec_tag = MKTAG('M', '8', 'G', '0');
190        s->format = 0x6b;
191        break;
192    }
193    if (s->correlate) {
194        s->decorrelate_buf[0] = av_calloc(2U * avctx->height, FFALIGN(avctx->width, 16));
195        if (!s->decorrelate_buf[0])
196            return AVERROR(ENOMEM);
197        s->decorrelate_buf[1] = s->decorrelate_buf[0] + avctx->height * FFALIGN(avctx->width, 16);
198    }
199
200    ff_llvidencdsp_init(&s->llvidencdsp);
201
202    s->planes = av_pix_fmt_count_planes(avctx->pix_fmt);
203
204    s->nb_slices = 1;
205
206    for (i = 0; i < s->planes; i++) {
207        s->slices[i] = av_malloc(avctx->width * (avctx->height + 2) +
208                                 AV_INPUT_BUFFER_PADDING_SIZE);
209        if (!s->slices[i]) {
210            av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer.\n");
211            return AVERROR(ENOMEM);
212        }
213    }
214
215    switch (s->frame_pred) {
216    case LEFT:     s->predict = left_predict;     break;
217    case GRADIENT: s->predict = gradient_predict; break;
218    case MEDIAN:   s->predict = median_predict;   break;
219    }
220
221    avctx->extradata_size = MAGICYUV_EXTRADATA_SIZE;
222
223    avctx->extradata = av_mallocz(avctx->extradata_size +
224                                  AV_INPUT_BUFFER_PADDING_SIZE);
225
226    if (!avctx->extradata) {
227        av_log(avctx, AV_LOG_ERROR, "Could not allocate extradata.\n");
228        return AVERROR(ENOMEM);
229    }
230
231    bytestream2_init_writer(&pb, avctx->extradata, MAGICYUV_EXTRADATA_SIZE);
232    bytestream2_put_le32(&pb, MKTAG('M', 'A', 'G', 'Y'));
233    bytestream2_put_le32(&pb, 32);
234    bytestream2_put_byte(&pb, 7);
235    bytestream2_put_byte(&pb, s->format);
236    bytestream2_put_byte(&pb, 12);
237    bytestream2_put_byte(&pb, 0);
238
239    bytestream2_put_byte(&pb, 0);
240    bytestream2_put_byte(&pb, 0);
241    bytestream2_put_byte(&pb, 32);
242    bytestream2_put_byte(&pb, 0);
243
244    bytestream2_put_le32(&pb, avctx->width);
245    bytestream2_put_le32(&pb, avctx->height);
246    bytestream2_put_le32(&pb, avctx->width);
247    bytestream2_put_le32(&pb, avctx->height);
248
249    return 0;
250}
251
252static void calculate_codes(HuffEntry *he, uint16_t codes_count[33])
253{
254    for (unsigned i = 32, nb_codes = 0; i > 0; i--) {
255        uint16_t curr = codes_count[i];   // # of leafs of length i
256        codes_count[i] = nb_codes / 2;    // # of non-leaf nodes on level i
257        nb_codes = codes_count[i] + curr; // # of nodes on level i
258    }
259
260    for (unsigned i = 0; i < 256; i++) {
261        he[i].code = codes_count[he[i].len];
262        codes_count[he[i].len]++;
263    }
264}
265
266static void count_usage(uint8_t *src, int width,
267                        int height, PTable *counts)
268{
269    int i, j;
270
271    for (j = 0; j < height; j++) {
272        for (i = 0; i < width; i++) {
273            counts[src[i]].prob++;
274        }
275        src += width;
276    }
277}
278
279typedef struct PackageMergerList {
280    int nitems;             ///< number of items in the list and probability      ex. 4
281    int item_idx[515];      ///< index range for each item in items                   0, 2, 5, 9, 13
282    int probability[514];   ///< probability of each item                             3, 8, 18, 46
283    int items[257 * 16];    ///< chain of all individual values that make up items    A, B, A, B, C, A, B, C, D, C, D, D, E
284} PackageMergerList;
285
286static int compare_by_prob(const void *a, const void *b)
287{
288    const PTable *a2 = a;
289    const PTable *b2 = b;
290    return a2->prob - b2->prob;
291}
292
293static void magy_huffman_compute_bits(PTable *prob_table, HuffEntry *distincts,
294                                      uint16_t codes_counts[33],
295                                      int size, int max_length)
296{
297    PackageMergerList list_a, list_b, *to = &list_a, *from = &list_b, *temp;
298    int times, i, j, k;
299    int nbits[257] = {0};
300    int min;
301
302    av_assert0(max_length > 0);
303
304    to->nitems = 0;
305    from->nitems = 0;
306    to->item_idx[0] = 0;
307    from->item_idx[0] = 0;
308    AV_QSORT(prob_table, size, PTable, compare_by_prob);
309
310    for (times = 0; times <= max_length; times++) {
311        to->nitems = 0;
312        to->item_idx[0] = 0;
313
314        j = 0;
315        k = 0;
316
317        if (times < max_length) {
318            i = 0;
319        }
320        while (i < size || j + 1 < from->nitems) {
321            to->nitems++;
322            to->item_idx[to->nitems] = to->item_idx[to->nitems - 1];
323            if (i < size &&
324                (j + 1 >= from->nitems ||
325                 prob_table[i].prob <
326                     from->probability[j] + from->probability[j + 1])) {
327                to->items[to->item_idx[to->nitems]++] = prob_table[i].value;
328                to->probability[to->nitems - 1] = prob_table[i].prob;
329                i++;
330            } else {
331                for (k = from->item_idx[j]; k < from->item_idx[j + 2]; k++) {
332                    to->items[to->item_idx[to->nitems]++] = from->items[k];
333                }
334                to->probability[to->nitems - 1] =
335                    from->probability[j] + from->probability[j + 1];
336                j += 2;
337            }
338        }
339        temp = to;
340        to = from;
341        from = temp;
342    }
343
344    min = (size - 1 < from->nitems) ? size - 1 : from->nitems;
345    for (i = 0; i < from->item_idx[min]; i++) {
346        nbits[from->items[i]]++;
347    }
348
349    for (i = 0; i < size; i++) {
350        distincts[i].len = nbits[i];
351        codes_counts[nbits[i]]++;
352    }
353}
354
355static int encode_table(AVCodecContext *avctx, uint8_t *dst,
356                        int width, int height,
357                        PutBitContext *pb, HuffEntry *he)
358{
359    PTable counts[256] = { {0} };
360    uint16_t codes_counts[33] = { 0 };
361    int i;
362
363    count_usage(dst, width, height, counts);
364
365    for (i = 0; i < 256; i++) {
366        counts[i].prob++;
367        counts[i].value = i;
368    }
369
370    magy_huffman_compute_bits(counts, he, codes_counts, 256, 12);
371
372    calculate_codes(he, codes_counts);
373
374    for (i = 0; i < 256; i++) {
375        put_bits(pb, 1, 0);
376        put_bits(pb, 7, he[i].len);
377    }
378
379    return 0;
380}
381
382static int encode_slice(uint8_t *src, uint8_t *dst, int dst_size,
383                        int width, int height, HuffEntry *he, int prediction)
384{
385    PutBitContext pb;
386    int i, j;
387    int count;
388
389    init_put_bits(&pb, dst, dst_size);
390
391    put_bits(&pb, 8, 0);
392    put_bits(&pb, 8, prediction);
393
394    for (j = 0; j < height; j++) {
395        for (i = 0; i < width; i++) {
396            const int idx = src[i];
397            put_bits(&pb, he[idx].len, he[idx].code);
398        }
399
400        src += width;
401    }
402
403    count = put_bits_count(&pb) & 0x1F;
404
405    if (count)
406        put_bits(&pb, 32 - count, 0);
407
408    flush_put_bits(&pb);
409
410    return put_bytes_output(&pb);
411}
412
413static int magy_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
414                             const AVFrame *frame, int *got_packet)
415{
416    MagicYUVContext *s = avctx->priv_data;
417    PutByteContext pb;
418    const int width = avctx->width, height = avctx->height;
419    int pos, slice, i, j, ret = 0;
420
421    ret = ff_alloc_packet(avctx, pkt, (256 + 4 * s->nb_slices + width * height) *
422                          s->planes + 256);
423    if (ret < 0)
424        return ret;
425
426    bytestream2_init_writer(&pb, pkt->data, pkt->size);
427    bytestream2_put_le32(&pb, MKTAG('M', 'A', 'G', 'Y'));
428    bytestream2_put_le32(&pb, 32); // header size
429    bytestream2_put_byte(&pb, 7);  // version
430    bytestream2_put_byte(&pb, s->format);
431    bytestream2_put_byte(&pb, 12); // max huffman length
432    bytestream2_put_byte(&pb, 0);
433
434    bytestream2_put_byte(&pb, 0);
435    bytestream2_put_byte(&pb, 0);
436    bytestream2_put_byte(&pb, 32); // coder type
437    bytestream2_put_byte(&pb, 0);
438
439    bytestream2_put_le32(&pb, avctx->width);
440    bytestream2_put_le32(&pb, avctx->height);
441    bytestream2_put_le32(&pb, avctx->width);
442    bytestream2_put_le32(&pb, avctx->height);
443    bytestream2_put_le32(&pb, 0);
444
445    for (i = 0; i < s->planes; i++) {
446        bytestream2_put_le32(&pb, 0);
447        for (j = 1; j < s->nb_slices; j++) {
448            bytestream2_put_le32(&pb, 0);
449        }
450    }
451
452    bytestream2_put_byte(&pb, s->planes);
453
454    for (i = 0; i < s->planes; i++) {
455        for (slice = 0; slice < s->nb_slices; slice++) {
456            bytestream2_put_byte(&pb, i);
457        }
458    }
459
460    if (s->correlate) {
461        uint8_t *r, *g, *b, *decorrelated[2] = { s->decorrelate_buf[0],
462                                                 s->decorrelate_buf[1] };
463        const int decorrelate_linesize = FFALIGN(width, 16);
464        const uint8_t *const data[4] = { decorrelated[0], frame->data[0],
465                                         decorrelated[1], frame->data[3] };
466        const int linesize[4]  = { decorrelate_linesize, frame->linesize[0],
467                                   decorrelate_linesize, frame->linesize[3] };
468
469        g = frame->data[0];
470        b = frame->data[1];
471        r = frame->data[2];
472
473        for (i = 0; i < height; i++) {
474            s->llvidencdsp.diff_bytes(decorrelated[0], b, g, width);
475            s->llvidencdsp.diff_bytes(decorrelated[1], r, g, width);
476            g += frame->linesize[0];
477            b += frame->linesize[1];
478            r += frame->linesize[2];
479            decorrelated[0] += decorrelate_linesize;
480            decorrelated[1] += decorrelate_linesize;
481        }
482
483        for (i = 0; i < s->planes; i++) {
484            for (slice = 0; slice < s->nb_slices; slice++) {
485                s->predict(s, data[i], s->slices[i], linesize[i],
486                           frame->width, frame->height);
487            }
488        }
489    } else {
490        for (i = 0; i < s->planes; i++) {
491            for (slice = 0; slice < s->nb_slices; slice++) {
492                s->predict(s, frame->data[i], s->slices[i], frame->linesize[i],
493                           AV_CEIL_RSHIFT(frame->width, s->hshift[i]),
494                           AV_CEIL_RSHIFT(frame->height, s->vshift[i]));
495            }
496        }
497    }
498
499    init_put_bits(&s->pb, pkt->data + bytestream2_tell_p(&pb), bytestream2_get_bytes_left_p(&pb));
500
501    for (i = 0; i < s->planes; i++) {
502        encode_table(avctx, s->slices[i],
503                     AV_CEIL_RSHIFT(frame->width,  s->hshift[i]),
504                     AV_CEIL_RSHIFT(frame->height, s->vshift[i]),
505                     &s->pb, s->he[i]);
506    }
507    s->tables_size = put_bytes_count(&s->pb, 1);
508    bytestream2_skip_p(&pb, s->tables_size);
509
510    for (i = 0; i < s->planes; i++) {
511        unsigned slice_size;
512
513        s->slice_pos[i] = bytestream2_tell_p(&pb);
514        slice_size = encode_slice(s->slices[i], pkt->data + bytestream2_tell_p(&pb),
515                                  bytestream2_get_bytes_left_p(&pb),
516                                  AV_CEIL_RSHIFT(frame->width,  s->hshift[i]),
517                                  AV_CEIL_RSHIFT(frame->height, s->vshift[i]),
518                                  s->he[i], s->frame_pred);
519        bytestream2_skip_p(&pb, slice_size);
520    }
521
522    pos = bytestream2_tell_p(&pb);
523    bytestream2_seek_p(&pb, 32, SEEK_SET);
524    bytestream2_put_le32(&pb, s->slice_pos[0] - 32);
525    for (i = 0; i < s->planes; i++) {
526        bytestream2_put_le32(&pb, s->slice_pos[i] - 32);
527    }
528    bytestream2_seek_p(&pb, pos, SEEK_SET);
529
530    pkt->size   = bytestream2_tell_p(&pb);
531
532    *got_packet = 1;
533
534    return 0;
535}
536
537static av_cold int magy_encode_close(AVCodecContext *avctx)
538{
539    MagicYUVContext *s = avctx->priv_data;
540    int i;
541
542    for (i = 0; i < s->planes; i++)
543        av_freep(&s->slices[i]);
544    av_freep(&s->decorrelate_buf);
545
546    return 0;
547}
548
549#define OFFSET(x) offsetof(MagicYUVContext, x)
550#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
551static const AVOption options[] = {
552    { "pred", "Prediction method", OFFSET(frame_pred), AV_OPT_TYPE_INT, {.i64=LEFT}, LEFT, MEDIAN, VE, "pred" },
553    { "left",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LEFT },     0, 0, VE, "pred" },
554    { "gradient", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = GRADIENT }, 0, 0, VE, "pred" },
555    { "median",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MEDIAN },   0, 0, VE, "pred" },
556    { NULL},
557};
558
559static const AVClass magicyuv_class = {
560    .class_name = "magicyuv",
561    .item_name  = av_default_item_name,
562    .option     = options,
563    .version    = LIBAVUTIL_VERSION_INT,
564};
565
566const FFCodec ff_magicyuv_encoder = {
567    .p.name           = "magicyuv",
568    .p.long_name      = NULL_IF_CONFIG_SMALL("MagicYUV video"),
569    .p.type           = AVMEDIA_TYPE_VIDEO,
570    .p.id             = AV_CODEC_ID_MAGICYUV,
571    .priv_data_size   = sizeof(MagicYUVContext),
572    .p.priv_class     = &magicyuv_class,
573    .init             = magy_encode_init,
574    .close            = magy_encode_close,
575    FF_CODEC_ENCODE_CB(magy_encode_frame),
576    .p.capabilities   = AV_CODEC_CAP_FRAME_THREADS,
577    .p.pix_fmts       = (const enum AVPixelFormat[]) {
578                          AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_YUV422P,
579                          AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P, AV_PIX_FMT_GRAY8,
580                          AV_PIX_FMT_NONE
581                      },
582    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
583};
584