xref: /third_party/ffmpeg/libavcodec/speedhq.c (revision cabdff1a)
1/*
2 * NewTek SpeedHQ codec
3 * Copyright 2017 Steinar H. Gunderson
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * NewTek SpeedHQ decoder.
25 */
26
27#define BITSTREAM_READER_LE
28
29#include "config.h"
30#include "config_components.h"
31#include "libavutil/attributes.h"
32#include "libavutil/mem_internal.h"
33
34#include "avcodec.h"
35#include "blockdsp.h"
36#include "codec_internal.h"
37#include "get_bits.h"
38#include "idctdsp.h"
39#include "internal.h"
40#include "libavutil/thread.h"
41#include "mathops.h"
42#include "mpeg12dec.h"
43#include "mpeg12data.h"
44#include "mpeg12vlc.h"
45
46#define MAX_INDEX (64 - 1)
47
48/*
49 * 5 bits makes for very small tables, with no more than two lookups needed
50 * for the longest (10-bit) codes.
51 */
52#define ALPHA_VLC_BITS 5
53
54typedef struct SHQContext {
55    AVCodecContext *avctx;
56    BlockDSPContext bdsp;
57    IDCTDSPContext idsp;
58    ScanTable intra_scantable;
59    int quant_matrix[64];
60    enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 }
61        subsampling;
62    enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type;
63} SHQContext;
64
65
66/* AC codes: Very similar but not identical to MPEG-2. */
67static const uint16_t speedhq_vlc[123][2] = {
68    {0x0001,  2}, {0x0003,  3}, {0x000E,  4}, {0x0007,  5},
69    {0x0017,  5}, {0x0028,  6}, {0x0008,  6}, {0x006F,  7},
70    {0x001F,  7}, {0x00C4,  8}, {0x0044,  8}, {0x005F,  8},
71    {0x00DF,  8}, {0x007F,  8}, {0x00FF,  8}, {0x3E00, 14},
72    {0x1E00, 14}, {0x2E00, 14}, {0x0E00, 14}, {0x3600, 14},
73    {0x1600, 14}, {0x2600, 14}, {0x0600, 14}, {0x3A00, 14},
74    {0x1A00, 14}, {0x2A00, 14}, {0x0A00, 14}, {0x3200, 14},
75    {0x1200, 14}, {0x2200, 14}, {0x0200, 14}, {0x0C00, 15},
76    {0x7400, 15}, {0x3400, 15}, {0x5400, 15}, {0x1400, 15},
77    {0x6400, 15}, {0x2400, 15}, {0x4400, 15}, {0x0400, 15},
78    {0x0002,  3}, {0x000C,  5}, {0x004F,  7}, {0x00E4,  8},
79    {0x0004,  8}, {0x0D00, 13}, {0x1500, 13}, {0x7C00, 15},
80    {0x3C00, 15}, {0x5C00, 15}, {0x1C00, 15}, {0x6C00, 15},
81    {0x2C00, 15}, {0x4C00, 15}, {0xC800, 16}, {0x4800, 16},
82    {0x8800, 16}, {0x0800, 16}, {0x0300, 13}, {0x1D00, 13},
83    {0x0014,  5}, {0x0070,  7}, {0x003F,  8}, {0x00C0, 10},
84    {0x0500, 13}, {0x0180, 12}, {0x0280, 12}, {0x0C80, 12},
85    {0x0080, 12}, {0x0B00, 13}, {0x1300, 13}, {0x001C,  5},
86    {0x0064,  8}, {0x0380, 12}, {0x1900, 13}, {0x0D80, 12},
87    {0x0018,  6}, {0x00BF,  8}, {0x0480, 12}, {0x0B80, 12},
88    {0x0038,  6}, {0x0040,  9}, {0x0900, 13}, {0x0030,  7},
89    {0x0780, 12}, {0x2800, 16}, {0x0010,  7}, {0x0A80, 12},
90    {0x0050,  7}, {0x0880, 12}, {0x000F,  7}, {0x1100, 13},
91    {0x002F,  7}, {0x0100, 13}, {0x0084,  8}, {0x5800, 16},
92    {0x00A4,  8}, {0x9800, 16}, {0x0024,  8}, {0x1800, 16},
93    {0x0140,  9}, {0xE800, 16}, {0x01C0,  9}, {0x6800, 16},
94    {0x02C0, 10}, {0xA800, 16}, {0x0F80, 12}, {0x0580, 12},
95    {0x0980, 12}, {0x0E80, 12}, {0x0680, 12}, {0x1F00, 13},
96    {0x0F00, 13}, {0x1700, 13}, {0x0700, 13}, {0x1B00, 13},
97    {0xF800, 16}, {0x7800, 16}, {0xB800, 16}, {0x3800, 16},
98    {0xD800, 16},
99    {0x0020,  6}, /* escape */
100    {0x0006,  4}  /* EOB */
101};
102
103static const uint8_t speedhq_level[121] = {
104     1,  2,  3,  4,  5,  6,  7,  8,
105     9, 10, 11, 12, 13, 14, 15, 16,
106    17, 18, 19, 20, 21, 22, 23, 24,
107    25, 26, 27, 28, 29, 30, 31, 32,
108    33, 34, 35, 36, 37, 38, 39, 40,
109     1,  2,  3,  4,  5,  6,  7,  8,
110     9, 10, 11, 12, 13, 14, 15, 16,
111    17, 18, 19, 20,  1,  2,  3,  4,
112     5,  6,  7,  8,  9, 10, 11,  1,
113     2,  3,  4,  5,  1,  2,  3,  4,
114     1,  2,  3,  1,  2,  3,  1,  2,
115     1,  2,  1,  2,  1,  2,  1,  2,
116     1,  2,  1,  2,  1,  2,  1,  2,
117     1,  2,  1,  1,  1,  1,  1,  1,
118     1,  1,  1,  1,  1,  1,  1,  1,
119     1,
120};
121
122static const uint8_t speedhq_run[121] = {
123     0,  0,  0,  0,  0,  0,  0,  0,
124     0,  0,  0,  0,  0,  0,  0,  0,
125     0,  0,  0,  0,  0,  0,  0,  0,
126     0,  0,  0,  0,  0,  0,  0,  0,
127     0,  0,  0,  0,  0,  0,  0,  0,
128     1,  1,  1,  1,  1,  1,  1,  1,
129     1,  1,  1,  1,  1,  1,  1,  1,
130     1,  1,  1,  1,  2,  2,  2,  2,
131     2,  2,  2,  2,  2,  2,  2,  3,
132     3,  3,  3,  3,  4,  4,  4,  4,
133     5,  5,  5,  6,  6,  6,  7,  7,
134     8,  8,  9,  9, 10, 10, 11, 11,
135    12, 12, 13, 13, 14, 14, 15, 15,
136    16, 16, 17, 18, 19, 20, 21, 22,
137    23, 24, 25, 26, 27, 28, 29, 30,
138    31,
139};
140
141RLTable ff_rl_speedhq = {
142    121,
143    121,
144    speedhq_vlc,
145    speedhq_run,
146    speedhq_level,
147};
148
149#if CONFIG_SPEEDHQ_DECODER
150/* NOTE: The first element is always 16, unscaled. */
151static const uint8_t unscaled_quant_matrix[64] = {
152    16, 16, 19, 22, 26, 27, 29, 34,
153    16, 16, 22, 24, 27, 29, 34, 37,
154    19, 22, 26, 27, 29, 34, 34, 38,
155    22, 22, 26, 27, 29, 34, 37, 40,
156    22, 26, 27, 29, 32, 35, 40, 48,
157    26, 27, 29, 32, 35, 40, 48, 58,
158    26, 27, 29, 34, 38, 46, 56, 69,
159    27, 29, 35, 38, 46, 56, 69, 83
160};
161
162static VLC dc_lum_vlc_le;
163static VLC dc_chroma_vlc_le;
164static VLC dc_alpha_run_vlc_le;
165static VLC dc_alpha_level_vlc_le;
166
167static inline int decode_dc_le(GetBitContext *gb, int component)
168{
169    int code, diff;
170
171    if (component == 0 || component == 3) {
172        code = get_vlc2(gb, dc_lum_vlc_le.table, DC_VLC_BITS, 2);
173    } else {
174        code = get_vlc2(gb, dc_chroma_vlc_le.table, DC_VLC_BITS, 2);
175    }
176    if (!code) {
177        diff = 0;
178    } else {
179        diff = get_xbits_le(gb, code);
180    }
181    return diff;
182}
183
184static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)
185{
186    uint8_t block[128];
187    int i = 0, x, y;
188
189    memset(block, 0, sizeof(block));
190
191    {
192        OPEN_READER(re, gb);
193
194        for ( ;; ) {
195            int run, level;
196
197            UPDATE_CACHE_LE(re, gb);
198            GET_VLC(run, re, gb, dc_alpha_run_vlc_le.table, ALPHA_VLC_BITS, 2);
199
200            if (run < 0) break;
201            i += run;
202            if (i >= 128)
203                return AVERROR_INVALIDDATA;
204
205            UPDATE_CACHE_LE(re, gb);
206            GET_VLC(level, re, gb, dc_alpha_level_vlc_le.table, ALPHA_VLC_BITS, 2);
207            block[i++] = level;
208        }
209
210        CLOSE_READER(re, gb);
211    }
212
213    for (y = 0; y < 8; y++) {
214        for (x = 0; x < 16; x++) {
215            last_alpha[x] -= block[y * 16 + x];
216        }
217        memcpy(dest, last_alpha, 16);
218        dest += linesize;
219    }
220
221    return 0;
222}
223
224static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize)
225{
226    const int *quant_matrix = s->quant_matrix;
227    const uint8_t *scantable = s->intra_scantable.permutated;
228    LOCAL_ALIGNED_32(int16_t, block, [64]);
229    int dc_offset;
230
231    s->bdsp.clear_block(block);
232
233    dc_offset = decode_dc_le(gb, component);
234    last_dc[component] -= dc_offset;  /* Note: Opposite of most codecs. */
235    block[scantable[0]] = last_dc[component];  /* quant_matrix[0] is always 16. */
236
237    /* Read AC coefficients. */
238    {
239        int i = 0;
240        OPEN_READER(re, gb);
241        for ( ;; ) {
242            int level, run;
243            UPDATE_CACHE_LE(re, gb);
244            GET_RL_VLC(level, run, re, gb, ff_rl_speedhq.rl_vlc[0],
245                       TEX_VLC_BITS, 2, 0);
246            if (level == 127) {
247                break;
248            } else if (level) {
249                i += run;
250                if (i > MAX_INDEX)
251                    return AVERROR_INVALIDDATA;
252                /* If next bit is 1, level = -level */
253                level = (level ^ SHOW_SBITS(re, gb, 1)) -
254                        SHOW_SBITS(re, gb, 1);
255                LAST_SKIP_BITS(re, gb, 1);
256            } else {
257                /* Escape. */
258#if MIN_CACHE_BITS < 6 + 6 + 12
259#error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE
260#endif
261                run = SHOW_UBITS(re, gb, 6) + 1;
262                SKIP_BITS(re, gb, 6);
263                level = SHOW_UBITS(re, gb, 12) - 2048;
264                LAST_SKIP_BITS(re, gb, 12);
265
266                i += run;
267                if (i > MAX_INDEX)
268                    return AVERROR_INVALIDDATA;
269            }
270
271            block[scantable[i]] = (level * quant_matrix[i]) >> 4;
272        }
273        CLOSE_READER(re, gb);
274    }
275
276    s->idsp.idct_put(dest, linesize, block);
277
278    return 0;
279}
280
281static int decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride)
282{
283    int linesize_y  = frame->linesize[0] * line_stride;
284    int linesize_cb = frame->linesize[1] * line_stride;
285    int linesize_cr = frame->linesize[2] * line_stride;
286    int linesize_a;
287    int ret;
288
289    if (s->alpha_type != SHQ_NO_ALPHA)
290        linesize_a = frame->linesize[3] * line_stride;
291
292    for (int y = 0; y < frame->height; y += 16 * line_stride) {
293        int last_dc[4] = { 1024, 1024, 1024, 1024 };
294        uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
295        uint8_t last_alpha[16];
296        int x = frame->width - 8;
297
298        dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x;
299        if (s->subsampling == SHQ_SUBSAMPLING_420) {
300            dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2;
301            dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2;
302        } else {
303            av_assert2(s->subsampling == SHQ_SUBSAMPLING_422);
304            dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2;
305            dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2;
306        }
307        if (s->alpha_type != SHQ_NO_ALPHA) {
308            memset(last_alpha, 255, sizeof(last_alpha));
309            dest_a = frame->data[3] + frame->linesize[3] * (y + field_number) + x;
310        }
311
312        if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y, linesize_y)) < 0)
313            return ret;
314        if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
315            return ret;
316        if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
317            return ret;
318        if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
319            return ret;
320        if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
321            return ret;
322        if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
323            return ret;
324
325        if (s->subsampling != SHQ_SUBSAMPLING_420) {
326            if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
327                return ret;
328            if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
329                return ret;
330        }
331
332        if (s->alpha_type == SHQ_RLE_ALPHA) {
333            /* Alpha coded using 16x8 RLE blocks. */
334            if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a, linesize_a)) < 0)
335                return ret;
336            if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
337                return ret;
338        } else if (s->alpha_type == SHQ_DCT_ALPHA) {
339            /* Alpha encoded exactly like luma. */
340            if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a, linesize_a)) < 0)
341                return ret;
342            if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
343                return ret;
344            if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
345                return ret;
346            if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
347                return ret;
348        }
349    }
350
351    return 0;
352}
353
354static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride)
355{
356    int ret, slice_number, slice_offsets[5];
357    int linesize_y  = frame->linesize[0] * line_stride;
358    int linesize_cb = frame->linesize[1] * line_stride;
359    int linesize_cr = frame->linesize[2] * line_stride;
360    int linesize_a;
361    GetBitContext gb;
362
363    if (s->alpha_type != SHQ_NO_ALPHA)
364        linesize_a = frame->linesize[3] * line_stride;
365
366    if (end < start || end - start < 3 || end > buf_size)
367        return AVERROR_INVALIDDATA;
368
369    slice_offsets[0] = start;
370    slice_offsets[4] = end;
371    for (slice_number = 1; slice_number < 4; slice_number++) {
372        uint32_t last_offset, slice_len;
373
374        last_offset = slice_offsets[slice_number - 1];
375        slice_len = AV_RL24(buf + last_offset);
376        slice_offsets[slice_number] = last_offset + slice_len;
377
378        if (slice_len < 3 || slice_offsets[slice_number] > end - 3)
379            return AVERROR_INVALIDDATA;
380    }
381
382    for (slice_number = 0; slice_number < 4; slice_number++) {
383        uint32_t slice_begin, slice_end;
384        int x, y;
385
386        slice_begin = slice_offsets[slice_number];
387        slice_end = slice_offsets[slice_number + 1];
388
389        if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0)
390            return ret;
391
392        for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) {
393            uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
394            int last_dc[4] = { 1024, 1024, 1024, 1024 };
395            uint8_t last_alpha[16];
396
397            memset(last_alpha, 255, sizeof(last_alpha));
398
399            dest_y = frame->data[0] + frame->linesize[0] * (y + field_number);
400            if (s->subsampling == SHQ_SUBSAMPLING_420) {
401                dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number);
402                dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number);
403            } else {
404                dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number);
405                dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number);
406            }
407            if (s->alpha_type != SHQ_NO_ALPHA) {
408                dest_a = frame->data[3] + frame->linesize[3] * (y + field_number);
409            }
410
411            for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) {
412                /* Decode the four luma blocks. */
413                if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
414                    return ret;
415                if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
416                    return ret;
417                if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
418                    return ret;
419                if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
420                    return ret;
421
422                /*
423                 * Decode the first chroma block. For 4:2:0, this is the only one;
424                 * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block.
425                 */
426                if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
427                    return ret;
428                if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
429                    return ret;
430
431                if (s->subsampling != SHQ_SUBSAMPLING_420) {
432                    /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */
433                    if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
434                        return ret;
435                    if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
436                        return ret;
437
438                    if (s->subsampling == SHQ_SUBSAMPLING_444) {
439                        /* Top-right and bottom-right blocks. */
440                        if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0)
441                            return ret;
442                        if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0)
443                            return ret;
444                        if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0)
445                            return ret;
446                        if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0)
447                            return ret;
448
449                        dest_cb += 8;
450                        dest_cr += 8;
451                    }
452                }
453                dest_y += 16;
454                dest_cb += 8;
455                dest_cr += 8;
456
457                if (s->alpha_type == SHQ_RLE_ALPHA) {
458                    /* Alpha coded using 16x8 RLE blocks. */
459                    if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0)
460                        return ret;
461                    if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
462                        return ret;
463                    dest_a += 16;
464                } else if (s->alpha_type == SHQ_DCT_ALPHA) {
465                    /* Alpha encoded exactly like luma. */
466                    if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0)
467                        return ret;
468                    if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
469                        return ret;
470                    if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
471                        return ret;
472                    if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
473                        return ret;
474                    dest_a += 16;
475                }
476            }
477        }
478    }
479
480    if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15))
481        return decode_speedhq_border(s, &gb, frame, field_number, line_stride);
482
483    return 0;
484}
485
486static void compute_quant_matrix(int *output, int qscale)
487{
488    int i;
489    for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale;
490}
491
492static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame,
493                                int *got_frame, AVPacket *avpkt)
494{
495    SHQContext * const s = avctx->priv_data;
496    const uint8_t *buf   = avpkt->data;
497    int buf_size         = avpkt->size;
498    uint8_t quality;
499    uint32_t second_field_offset;
500    int ret;
501
502    if (buf_size < 4 || avctx->width < 8 || avctx->width % 8 != 0)
503        return AVERROR_INVALIDDATA;
504    if (buf_size < avctx->width*avctx->height / 64 / 4)
505        return AVERROR_INVALIDDATA;
506
507    quality = buf[0];
508    if (quality >= 100) {
509        return AVERROR_INVALIDDATA;
510    }
511
512    compute_quant_matrix(s->quant_matrix, 100 - quality);
513
514    second_field_offset = AV_RL24(buf + 1);
515    if (second_field_offset >= buf_size - 3) {
516        return AVERROR_INVALIDDATA;
517    }
518
519    avctx->coded_width = FFALIGN(avctx->width, 16);
520    avctx->coded_height = FFALIGN(avctx->height, 16);
521
522    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
523        return ret;
524    }
525    frame->key_frame = 1;
526
527    if (second_field_offset == 4 || second_field_offset == (buf_size-4)) {
528        /*
529         * Overlapping first and second fields is used to signal
530         * encoding only a single field. In this case, "height"
531         * is ambiguous; it could mean either the height of the
532         * frame as a whole, or of the field. The former would make
533         * more sense for compatibility with legacy decoders,
534         * but this matches the convention used in NDI, which is
535         * the primary user of this trick.
536         */
537        if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, buf_size, 1)) < 0)
538            return ret;
539    } else {
540        if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, second_field_offset, 2)) < 0)
541            return ret;
542        if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1, second_field_offset, buf_size, 2)) < 0)
543            return ret;
544    }
545
546    *got_frame = 1;
547    return buf_size;
548}
549
550/*
551 * Alpha VLC. Run and level are independently coded, and would be
552 * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't
553 * bother with combining them into one table.
554 */
555static av_cold void compute_alpha_vlcs(void)
556{
557    uint16_t run_code[134], level_code[266];
558    uint8_t run_bits[134], level_bits[266];
559    int16_t run_symbols[134], level_symbols[266];
560    int entry, i, sign;
561
562    /* Initialize VLC for alpha run. */
563    entry = 0;
564
565    /* 0 -> 0. */
566    run_code[entry] = 0;
567    run_bits[entry] = 1;
568    run_symbols[entry] = 0;
569    ++entry;
570
571    /* 10xx -> xx plus 1. */
572    for (i = 0; i < 4; ++i) {
573        run_code[entry] = (i << 2) | 1;
574        run_bits[entry] = 4;
575        run_symbols[entry] = i + 1;
576        ++entry;
577    }
578
579    /* 111xxxxxxx -> xxxxxxx. */
580    for (i = 0; i < 128; ++i) {
581        run_code[entry] = (i << 3) | 7;
582        run_bits[entry] = 10;
583        run_symbols[entry] = i;
584        ++entry;
585    }
586
587    /* 110 -> EOB. */
588    run_code[entry] = 3;
589    run_bits[entry] = 3;
590    run_symbols[entry] = -1;
591    ++entry;
592
593    av_assert0(entry == FF_ARRAY_ELEMS(run_code));
594
595    INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_run_vlc_le, ALPHA_VLC_BITS,
596                              FF_ARRAY_ELEMS(run_code),
597                              run_bits, 1, 1,
598                              run_code, 2, 2,
599                              run_symbols, 2, 2, 160);
600
601    /* Initialize VLC for alpha level. */
602    entry = 0;
603
604    for (sign = 0; sign <= 1; ++sign) {
605        /* 1s -> -1 or +1 (depending on sign bit). */
606        level_code[entry] = (sign << 1) | 1;
607        level_bits[entry] = 2;
608        level_symbols[entry] = sign ? -1 : 1;
609        ++entry;
610
611        /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */
612        for (i = 0; i < 4; ++i) {
613            level_code[entry] = (i << 3) | (sign << 2) | 2;
614            level_bits[entry] = 5;
615            level_symbols[entry] = sign ? -(i + 2) : (i + 2);
616            ++entry;
617        }
618    }
619
620    /*
621     * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes
622     * here that would better be encoded in other ways (e.g. 0 would be
623     * encoded by increasing run, and +/- 1 would be encoded with a
624     * shorter code), but it doesn't hurt to allow everything.
625     */
626    for (i = 0; i < 256; ++i) {
627        level_code[entry] = i << 2;
628        level_bits[entry] = 10;
629        level_symbols[entry] = i;
630        ++entry;
631    }
632
633    av_assert0(entry == FF_ARRAY_ELEMS(level_code));
634
635    INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_level_vlc_le, ALPHA_VLC_BITS,
636                              FF_ARRAY_ELEMS(level_code),
637                              level_bits, 1, 1,
638                              level_code, 2, 2,
639                              level_symbols, 2, 2, 288);
640}
641
642static av_cold void speedhq_static_init(void)
643{
644    /* Exactly the same as MPEG-2, except for a little-endian reader. */
645    INIT_CUSTOM_VLC_STATIC(&dc_lum_vlc_le, DC_VLC_BITS, 12,
646                           ff_mpeg12_vlc_dc_lum_bits, 1, 1,
647                           ff_mpeg12_vlc_dc_lum_code, 2, 2,
648                           INIT_VLC_OUTPUT_LE, 512);
649    INIT_CUSTOM_VLC_STATIC(&dc_chroma_vlc_le, DC_VLC_BITS, 12,
650                           ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
651                           ff_mpeg12_vlc_dc_chroma_code, 2, 2,
652                           INIT_VLC_OUTPUT_LE, 514);
653
654    INIT_2D_VLC_RL(ff_rl_speedhq, 674, INIT_VLC_LE);
655
656    compute_alpha_vlcs();
657}
658
659static av_cold int speedhq_decode_init(AVCodecContext *avctx)
660{
661    int ret;
662    static AVOnce init_once = AV_ONCE_INIT;
663    SHQContext * const s = avctx->priv_data;
664
665    s->avctx = avctx;
666
667    ret = ff_thread_once(&init_once, speedhq_static_init);
668    if (ret)
669        return AVERROR_UNKNOWN;
670
671    ff_blockdsp_init(&s->bdsp, avctx);
672    ff_idctdsp_init(&s->idsp, avctx);
673    ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
674
675    switch (avctx->codec_tag) {
676    case MKTAG('S', 'H', 'Q', '0'):
677        s->subsampling = SHQ_SUBSAMPLING_420;
678        s->alpha_type = SHQ_NO_ALPHA;
679        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
680        break;
681    case MKTAG('S', 'H', 'Q', '1'):
682        s->subsampling = SHQ_SUBSAMPLING_420;
683        s->alpha_type = SHQ_RLE_ALPHA;
684        avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
685        break;
686    case MKTAG('S', 'H', 'Q', '2'):
687        s->subsampling = SHQ_SUBSAMPLING_422;
688        s->alpha_type = SHQ_NO_ALPHA;
689        avctx->pix_fmt = AV_PIX_FMT_YUV422P;
690        break;
691    case MKTAG('S', 'H', 'Q', '3'):
692        s->subsampling = SHQ_SUBSAMPLING_422;
693        s->alpha_type = SHQ_RLE_ALPHA;
694        avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
695        break;
696    case MKTAG('S', 'H', 'Q', '4'):
697        s->subsampling = SHQ_SUBSAMPLING_444;
698        s->alpha_type = SHQ_NO_ALPHA;
699        avctx->pix_fmt = AV_PIX_FMT_YUV444P;
700        break;
701    case MKTAG('S', 'H', 'Q', '5'):
702        s->subsampling = SHQ_SUBSAMPLING_444;
703        s->alpha_type = SHQ_RLE_ALPHA;
704        avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
705        break;
706    case MKTAG('S', 'H', 'Q', '7'):
707        s->subsampling = SHQ_SUBSAMPLING_422;
708        s->alpha_type = SHQ_DCT_ALPHA;
709        avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
710        break;
711    case MKTAG('S', 'H', 'Q', '9'):
712        s->subsampling = SHQ_SUBSAMPLING_444;
713        s->alpha_type = SHQ_DCT_ALPHA;
714        avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
715        break;
716    default:
717        av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n",
718               avctx->codec_tag);
719        return AVERROR_INVALIDDATA;
720    }
721
722    /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */
723    avctx->colorspace = AVCOL_SPC_BT470BG;
724    avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
725
726    return 0;
727}
728
729const FFCodec ff_speedhq_decoder = {
730    .p.name         = "speedhq",
731    .p.long_name    = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
732    .p.type         = AVMEDIA_TYPE_VIDEO,
733    .p.id           = AV_CODEC_ID_SPEEDHQ,
734    .priv_data_size = sizeof(SHQContext),
735    .init           = speedhq_decode_init,
736    FF_CODEC_DECODE_CB(speedhq_decode_frame),
737    .p.capabilities = AV_CODEC_CAP_DR1,
738    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
739};
740#endif /* CONFIG_SPEEDHQ_DECODER */
741