1 /*
2  * NewTek SpeedHQ codec
3  * Copyright 2017 Steinar H. Gunderson
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * NewTek SpeedHQ decoder.
25  */
26 
27 #define BITSTREAM_READER_LE
28 
29 #include "config.h"
30 #include "config_components.h"
31 #include "libavutil/attributes.h"
32 #include "libavutil/mem_internal.h"
33 
34 #include "avcodec.h"
35 #include "blockdsp.h"
36 #include "codec_internal.h"
37 #include "get_bits.h"
38 #include "idctdsp.h"
39 #include "internal.h"
40 #include "libavutil/thread.h"
41 #include "mathops.h"
42 #include "mpeg12dec.h"
43 #include "mpeg12data.h"
44 #include "mpeg12vlc.h"
45 
46 #define MAX_INDEX (64 - 1)
47 
48 /*
49  * 5 bits makes for very small tables, with no more than two lookups needed
50  * for the longest (10-bit) codes.
51  */
52 #define ALPHA_VLC_BITS 5
53 
54 typedef struct SHQContext {
55     AVCodecContext *avctx;
56     BlockDSPContext bdsp;
57     IDCTDSPContext idsp;
58     ScanTable intra_scantable;
59     int quant_matrix[64];
60     enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 }
61         subsampling;
62     enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type;
63 } SHQContext;
64 
65 
66 /* AC codes: Very similar but not identical to MPEG-2. */
67 static const uint16_t speedhq_vlc[123][2] = {
68     {0x0001,  2}, {0x0003,  3}, {0x000E,  4}, {0x0007,  5},
69     {0x0017,  5}, {0x0028,  6}, {0x0008,  6}, {0x006F,  7},
70     {0x001F,  7}, {0x00C4,  8}, {0x0044,  8}, {0x005F,  8},
71     {0x00DF,  8}, {0x007F,  8}, {0x00FF,  8}, {0x3E00, 14},
72     {0x1E00, 14}, {0x2E00, 14}, {0x0E00, 14}, {0x3600, 14},
73     {0x1600, 14}, {0x2600, 14}, {0x0600, 14}, {0x3A00, 14},
74     {0x1A00, 14}, {0x2A00, 14}, {0x0A00, 14}, {0x3200, 14},
75     {0x1200, 14}, {0x2200, 14}, {0x0200, 14}, {0x0C00, 15},
76     {0x7400, 15}, {0x3400, 15}, {0x5400, 15}, {0x1400, 15},
77     {0x6400, 15}, {0x2400, 15}, {0x4400, 15}, {0x0400, 15},
78     {0x0002,  3}, {0x000C,  5}, {0x004F,  7}, {0x00E4,  8},
79     {0x0004,  8}, {0x0D00, 13}, {0x1500, 13}, {0x7C00, 15},
80     {0x3C00, 15}, {0x5C00, 15}, {0x1C00, 15}, {0x6C00, 15},
81     {0x2C00, 15}, {0x4C00, 15}, {0xC800, 16}, {0x4800, 16},
82     {0x8800, 16}, {0x0800, 16}, {0x0300, 13}, {0x1D00, 13},
83     {0x0014,  5}, {0x0070,  7}, {0x003F,  8}, {0x00C0, 10},
84     {0x0500, 13}, {0x0180, 12}, {0x0280, 12}, {0x0C80, 12},
85     {0x0080, 12}, {0x0B00, 13}, {0x1300, 13}, {0x001C,  5},
86     {0x0064,  8}, {0x0380, 12}, {0x1900, 13}, {0x0D80, 12},
87     {0x0018,  6}, {0x00BF,  8}, {0x0480, 12}, {0x0B80, 12},
88     {0x0038,  6}, {0x0040,  9}, {0x0900, 13}, {0x0030,  7},
89     {0x0780, 12}, {0x2800, 16}, {0x0010,  7}, {0x0A80, 12},
90     {0x0050,  7}, {0x0880, 12}, {0x000F,  7}, {0x1100, 13},
91     {0x002F,  7}, {0x0100, 13}, {0x0084,  8}, {0x5800, 16},
92     {0x00A4,  8}, {0x9800, 16}, {0x0024,  8}, {0x1800, 16},
93     {0x0140,  9}, {0xE800, 16}, {0x01C0,  9}, {0x6800, 16},
94     {0x02C0, 10}, {0xA800, 16}, {0x0F80, 12}, {0x0580, 12},
95     {0x0980, 12}, {0x0E80, 12}, {0x0680, 12}, {0x1F00, 13},
96     {0x0F00, 13}, {0x1700, 13}, {0x0700, 13}, {0x1B00, 13},
97     {0xF800, 16}, {0x7800, 16}, {0xB800, 16}, {0x3800, 16},
98     {0xD800, 16},
99     {0x0020,  6}, /* escape */
100     {0x0006,  4}  /* EOB */
101 };
102 
103 static const uint8_t speedhq_level[121] = {
104      1,  2,  3,  4,  5,  6,  7,  8,
105      9, 10, 11, 12, 13, 14, 15, 16,
106     17, 18, 19, 20, 21, 22, 23, 24,
107     25, 26, 27, 28, 29, 30, 31, 32,
108     33, 34, 35, 36, 37, 38, 39, 40,
109      1,  2,  3,  4,  5,  6,  7,  8,
110      9, 10, 11, 12, 13, 14, 15, 16,
111     17, 18, 19, 20,  1,  2,  3,  4,
112      5,  6,  7,  8,  9, 10, 11,  1,
113      2,  3,  4,  5,  1,  2,  3,  4,
114      1,  2,  3,  1,  2,  3,  1,  2,
115      1,  2,  1,  2,  1,  2,  1,  2,
116      1,  2,  1,  2,  1,  2,  1,  2,
117      1,  2,  1,  1,  1,  1,  1,  1,
118      1,  1,  1,  1,  1,  1,  1,  1,
119      1,
120 };
121 
122 static const uint8_t speedhq_run[121] = {
123      0,  0,  0,  0,  0,  0,  0,  0,
124      0,  0,  0,  0,  0,  0,  0,  0,
125      0,  0,  0,  0,  0,  0,  0,  0,
126      0,  0,  0,  0,  0,  0,  0,  0,
127      0,  0,  0,  0,  0,  0,  0,  0,
128      1,  1,  1,  1,  1,  1,  1,  1,
129      1,  1,  1,  1,  1,  1,  1,  1,
130      1,  1,  1,  1,  2,  2,  2,  2,
131      2,  2,  2,  2,  2,  2,  2,  3,
132      3,  3,  3,  3,  4,  4,  4,  4,
133      5,  5,  5,  6,  6,  6,  7,  7,
134      8,  8,  9,  9, 10, 10, 11, 11,
135     12, 12, 13, 13, 14, 14, 15, 15,
136     16, 16, 17, 18, 19, 20, 21, 22,
137     23, 24, 25, 26, 27, 28, 29, 30,
138     31,
139 };
140 
141 RLTable ff_rl_speedhq = {
142     121,
143     121,
144     speedhq_vlc,
145     speedhq_run,
146     speedhq_level,
147 };
148 
149 #if CONFIG_SPEEDHQ_DECODER
150 /* NOTE: The first element is always 16, unscaled. */
151 static const uint8_t unscaled_quant_matrix[64] = {
152     16, 16, 19, 22, 26, 27, 29, 34,
153     16, 16, 22, 24, 27, 29, 34, 37,
154     19, 22, 26, 27, 29, 34, 34, 38,
155     22, 22, 26, 27, 29, 34, 37, 40,
156     22, 26, 27, 29, 32, 35, 40, 48,
157     26, 27, 29, 32, 35, 40, 48, 58,
158     26, 27, 29, 34, 38, 46, 56, 69,
159     27, 29, 35, 38, 46, 56, 69, 83
160 };
161 
162 static VLC dc_lum_vlc_le;
163 static VLC dc_chroma_vlc_le;
164 static VLC dc_alpha_run_vlc_le;
165 static VLC dc_alpha_level_vlc_le;
166 
decode_dc_le(GetBitContext *gb, int component)167 static inline int decode_dc_le(GetBitContext *gb, int component)
168 {
169     int code, diff;
170 
171     if (component == 0 || component == 3) {
172         code = get_vlc2(gb, dc_lum_vlc_le.table, DC_VLC_BITS, 2);
173     } else {
174         code = get_vlc2(gb, dc_chroma_vlc_le.table, DC_VLC_BITS, 2);
175     }
176     if (!code) {
177         diff = 0;
178     } else {
179         diff = get_xbits_le(gb, code);
180     }
181     return diff;
182 }
183 
decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)184 static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)
185 {
186     uint8_t block[128];
187     int i = 0, x, y;
188 
189     memset(block, 0, sizeof(block));
190 
191     {
192         OPEN_READER(re, gb);
193 
194         for ( ;; ) {
195             int run, level;
196 
197             UPDATE_CACHE_LE(re, gb);
198             GET_VLC(run, re, gb, dc_alpha_run_vlc_le.table, ALPHA_VLC_BITS, 2);
199 
200             if (run < 0) break;
201             i += run;
202             if (i >= 128)
203                 return AVERROR_INVALIDDATA;
204 
205             UPDATE_CACHE_LE(re, gb);
206             GET_VLC(level, re, gb, dc_alpha_level_vlc_le.table, ALPHA_VLC_BITS, 2);
207             block[i++] = level;
208         }
209 
210         CLOSE_READER(re, gb);
211     }
212 
213     for (y = 0; y < 8; y++) {
214         for (x = 0; x < 16; x++) {
215             last_alpha[x] -= block[y * 16 + x];
216         }
217         memcpy(dest, last_alpha, 16);
218         dest += linesize;
219     }
220 
221     return 0;
222 }
223 
decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize)224 static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize)
225 {
226     const int *quant_matrix = s->quant_matrix;
227     const uint8_t *scantable = s->intra_scantable.permutated;
228     LOCAL_ALIGNED_32(int16_t, block, [64]);
229     int dc_offset;
230 
231     s->bdsp.clear_block(block);
232 
233     dc_offset = decode_dc_le(gb, component);
234     last_dc[component] -= dc_offset;  /* Note: Opposite of most codecs. */
235     block[scantable[0]] = last_dc[component];  /* quant_matrix[0] is always 16. */
236 
237     /* Read AC coefficients. */
238     {
239         int i = 0;
240         OPEN_READER(re, gb);
241         for ( ;; ) {
242             int level, run;
243             UPDATE_CACHE_LE(re, gb);
244             GET_RL_VLC(level, run, re, gb, ff_rl_speedhq.rl_vlc[0],
245                        TEX_VLC_BITS, 2, 0);
246             if (level == 127) {
247                 break;
248             } else if (level) {
249                 i += run;
250                 if (i > MAX_INDEX)
251                     return AVERROR_INVALIDDATA;
252                 /* If next bit is 1, level = -level */
253                 level = (level ^ SHOW_SBITS(re, gb, 1)) -
254                         SHOW_SBITS(re, gb, 1);
255                 LAST_SKIP_BITS(re, gb, 1);
256             } else {
257                 /* Escape. */
258 #if MIN_CACHE_BITS < 6 + 6 + 12
259 #error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE
260 #endif
261                 run = SHOW_UBITS(re, gb, 6) + 1;
262                 SKIP_BITS(re, gb, 6);
263                 level = SHOW_UBITS(re, gb, 12) - 2048;
264                 LAST_SKIP_BITS(re, gb, 12);
265 
266                 i += run;
267                 if (i > MAX_INDEX)
268                     return AVERROR_INVALIDDATA;
269             }
270 
271             block[scantable[i]] = (level * quant_matrix[i]) >> 4;
272         }
273         CLOSE_READER(re, gb);
274     }
275 
276     s->idsp.idct_put(dest, linesize, block);
277 
278     return 0;
279 }
280 
decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride)281 static int decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride)
282 {
283     int linesize_y  = frame->linesize[0] * line_stride;
284     int linesize_cb = frame->linesize[1] * line_stride;
285     int linesize_cr = frame->linesize[2] * line_stride;
286     int linesize_a;
287     int ret;
288 
289     if (s->alpha_type != SHQ_NO_ALPHA)
290         linesize_a = frame->linesize[3] * line_stride;
291 
292     for (int y = 0; y < frame->height; y += 16 * line_stride) {
293         int last_dc[4] = { 1024, 1024, 1024, 1024 };
294         uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
295         uint8_t last_alpha[16];
296         int x = frame->width - 8;
297 
298         dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x;
299         if (s->subsampling == SHQ_SUBSAMPLING_420) {
300             dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2;
301             dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2;
302         } else {
303             av_assert2(s->subsampling == SHQ_SUBSAMPLING_422);
304             dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2;
305             dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2;
306         }
307         if (s->alpha_type != SHQ_NO_ALPHA) {
308             memset(last_alpha, 255, sizeof(last_alpha));
309             dest_a = frame->data[3] + frame->linesize[3] * (y + field_number) + x;
310         }
311 
312         if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y, linesize_y)) < 0)
313             return ret;
314         if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
315             return ret;
316         if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
317             return ret;
318         if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
319             return ret;
320         if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
321             return ret;
322         if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
323             return ret;
324 
325         if (s->subsampling != SHQ_SUBSAMPLING_420) {
326             if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
327                 return ret;
328             if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
329                 return ret;
330         }
331 
332         if (s->alpha_type == SHQ_RLE_ALPHA) {
333             /* Alpha coded using 16x8 RLE blocks. */
334             if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a, linesize_a)) < 0)
335                 return ret;
336             if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
337                 return ret;
338         } else if (s->alpha_type == SHQ_DCT_ALPHA) {
339             /* Alpha encoded exactly like luma. */
340             if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a, linesize_a)) < 0)
341                 return ret;
342             if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
343                 return ret;
344             if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
345                 return ret;
346             if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
347                 return ret;
348         }
349     }
350 
351     return 0;
352 }
353 
decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride)354 static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride)
355 {
356     int ret, slice_number, slice_offsets[5];
357     int linesize_y  = frame->linesize[0] * line_stride;
358     int linesize_cb = frame->linesize[1] * line_stride;
359     int linesize_cr = frame->linesize[2] * line_stride;
360     int linesize_a;
361     GetBitContext gb;
362 
363     if (s->alpha_type != SHQ_NO_ALPHA)
364         linesize_a = frame->linesize[3] * line_stride;
365 
366     if (end < start || end - start < 3 || end > buf_size)
367         return AVERROR_INVALIDDATA;
368 
369     slice_offsets[0] = start;
370     slice_offsets[4] = end;
371     for (slice_number = 1; slice_number < 4; slice_number++) {
372         uint32_t last_offset, slice_len;
373 
374         last_offset = slice_offsets[slice_number - 1];
375         slice_len = AV_RL24(buf + last_offset);
376         slice_offsets[slice_number] = last_offset + slice_len;
377 
378         if (slice_len < 3 || slice_offsets[slice_number] > end - 3)
379             return AVERROR_INVALIDDATA;
380     }
381 
382     for (slice_number = 0; slice_number < 4; slice_number++) {
383         uint32_t slice_begin, slice_end;
384         int x, y;
385 
386         slice_begin = slice_offsets[slice_number];
387         slice_end = slice_offsets[slice_number + 1];
388 
389         if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0)
390             return ret;
391 
392         for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) {
393             uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
394             int last_dc[4] = { 1024, 1024, 1024, 1024 };
395             uint8_t last_alpha[16];
396 
397             memset(last_alpha, 255, sizeof(last_alpha));
398 
399             dest_y = frame->data[0] + frame->linesize[0] * (y + field_number);
400             if (s->subsampling == SHQ_SUBSAMPLING_420) {
401                 dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number);
402                 dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number);
403             } else {
404                 dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number);
405                 dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number);
406             }
407             if (s->alpha_type != SHQ_NO_ALPHA) {
408                 dest_a = frame->data[3] + frame->linesize[3] * (y + field_number);
409             }
410 
411             for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) {
412                 /* Decode the four luma blocks. */
413                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
414                     return ret;
415                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
416                     return ret;
417                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
418                     return ret;
419                 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
420                     return ret;
421 
422                 /*
423                  * Decode the first chroma block. For 4:2:0, this is the only one;
424                  * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block.
425                  */
426                 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
427                     return ret;
428                 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
429                     return ret;
430 
431                 if (s->subsampling != SHQ_SUBSAMPLING_420) {
432                     /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */
433                     if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
434                         return ret;
435                     if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
436                         return ret;
437 
438                     if (s->subsampling == SHQ_SUBSAMPLING_444) {
439                         /* Top-right and bottom-right blocks. */
440                         if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0)
441                             return ret;
442                         if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0)
443                             return ret;
444                         if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0)
445                             return ret;
446                         if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0)
447                             return ret;
448 
449                         dest_cb += 8;
450                         dest_cr += 8;
451                     }
452                 }
453                 dest_y += 16;
454                 dest_cb += 8;
455                 dest_cr += 8;
456 
457                 if (s->alpha_type == SHQ_RLE_ALPHA) {
458                     /* Alpha coded using 16x8 RLE blocks. */
459                     if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0)
460                         return ret;
461                     if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
462                         return ret;
463                     dest_a += 16;
464                 } else if (s->alpha_type == SHQ_DCT_ALPHA) {
465                     /* Alpha encoded exactly like luma. */
466                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0)
467                         return ret;
468                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
469                         return ret;
470                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
471                         return ret;
472                     if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
473                         return ret;
474                     dest_a += 16;
475                 }
476             }
477         }
478     }
479 
480     if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15))
481         return decode_speedhq_border(s, &gb, frame, field_number, line_stride);
482 
483     return 0;
484 }
485 
compute_quant_matrix(int *output, int qscale)486 static void compute_quant_matrix(int *output, int qscale)
487 {
488     int i;
489     for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale;
490 }
491 
speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *avpkt)492 static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame,
493                                 int *got_frame, AVPacket *avpkt)
494 {
495     SHQContext * const s = avctx->priv_data;
496     const uint8_t *buf   = avpkt->data;
497     int buf_size         = avpkt->size;
498     uint8_t quality;
499     uint32_t second_field_offset;
500     int ret;
501 
502     if (buf_size < 4 || avctx->width < 8 || avctx->width % 8 != 0)
503         return AVERROR_INVALIDDATA;
504     if (buf_size < avctx->width*avctx->height / 64 / 4)
505         return AVERROR_INVALIDDATA;
506 
507     quality = buf[0];
508     if (quality >= 100) {
509         return AVERROR_INVALIDDATA;
510     }
511 
512     compute_quant_matrix(s->quant_matrix, 100 - quality);
513 
514     second_field_offset = AV_RL24(buf + 1);
515     if (second_field_offset >= buf_size - 3) {
516         return AVERROR_INVALIDDATA;
517     }
518 
519     avctx->coded_width = FFALIGN(avctx->width, 16);
520     avctx->coded_height = FFALIGN(avctx->height, 16);
521 
522     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
523         return ret;
524     }
525     frame->key_frame = 1;
526 
527     if (second_field_offset == 4 || second_field_offset == (buf_size-4)) {
528         /*
529          * Overlapping first and second fields is used to signal
530          * encoding only a single field. In this case, "height"
531          * is ambiguous; it could mean either the height of the
532          * frame as a whole, or of the field. The former would make
533          * more sense for compatibility with legacy decoders,
534          * but this matches the convention used in NDI, which is
535          * the primary user of this trick.
536          */
537         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, buf_size, 1)) < 0)
538             return ret;
539     } else {
540         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, second_field_offset, 2)) < 0)
541             return ret;
542         if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1, second_field_offset, buf_size, 2)) < 0)
543             return ret;
544     }
545 
546     *got_frame = 1;
547     return buf_size;
548 }
549 
550 /*
551  * Alpha VLC. Run and level are independently coded, and would be
552  * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't
553  * bother with combining them into one table.
554  */
compute_alpha_vlcs(void)555 static av_cold void compute_alpha_vlcs(void)
556 {
557     uint16_t run_code[134], level_code[266];
558     uint8_t run_bits[134], level_bits[266];
559     int16_t run_symbols[134], level_symbols[266];
560     int entry, i, sign;
561 
562     /* Initialize VLC for alpha run. */
563     entry = 0;
564 
565     /* 0 -> 0. */
566     run_code[entry] = 0;
567     run_bits[entry] = 1;
568     run_symbols[entry] = 0;
569     ++entry;
570 
571     /* 10xx -> xx plus 1. */
572     for (i = 0; i < 4; ++i) {
573         run_code[entry] = (i << 2) | 1;
574         run_bits[entry] = 4;
575         run_symbols[entry] = i + 1;
576         ++entry;
577     }
578 
579     /* 111xxxxxxx -> xxxxxxx. */
580     for (i = 0; i < 128; ++i) {
581         run_code[entry] = (i << 3) | 7;
582         run_bits[entry] = 10;
583         run_symbols[entry] = i;
584         ++entry;
585     }
586 
587     /* 110 -> EOB. */
588     run_code[entry] = 3;
589     run_bits[entry] = 3;
590     run_symbols[entry] = -1;
591     ++entry;
592 
593     av_assert0(entry == FF_ARRAY_ELEMS(run_code));
594 
595     INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_run_vlc_le, ALPHA_VLC_BITS,
596                               FF_ARRAY_ELEMS(run_code),
597                               run_bits, 1, 1,
598                               run_code, 2, 2,
599                               run_symbols, 2, 2, 160);
600 
601     /* Initialize VLC for alpha level. */
602     entry = 0;
603 
604     for (sign = 0; sign <= 1; ++sign) {
605         /* 1s -> -1 or +1 (depending on sign bit). */
606         level_code[entry] = (sign << 1) | 1;
607         level_bits[entry] = 2;
608         level_symbols[entry] = sign ? -1 : 1;
609         ++entry;
610 
611         /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */
612         for (i = 0; i < 4; ++i) {
613             level_code[entry] = (i << 3) | (sign << 2) | 2;
614             level_bits[entry] = 5;
615             level_symbols[entry] = sign ? -(i + 2) : (i + 2);
616             ++entry;
617         }
618     }
619 
620     /*
621      * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes
622      * here that would better be encoded in other ways (e.g. 0 would be
623      * encoded by increasing run, and +/- 1 would be encoded with a
624      * shorter code), but it doesn't hurt to allow everything.
625      */
626     for (i = 0; i < 256; ++i) {
627         level_code[entry] = i << 2;
628         level_bits[entry] = 10;
629         level_symbols[entry] = i;
630         ++entry;
631     }
632 
633     av_assert0(entry == FF_ARRAY_ELEMS(level_code));
634 
635     INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_level_vlc_le, ALPHA_VLC_BITS,
636                               FF_ARRAY_ELEMS(level_code),
637                               level_bits, 1, 1,
638                               level_code, 2, 2,
639                               level_symbols, 2, 2, 288);
640 }
641 
speedhq_static_init(void)642 static av_cold void speedhq_static_init(void)
643 {
644     /* Exactly the same as MPEG-2, except for a little-endian reader. */
645     INIT_CUSTOM_VLC_STATIC(&dc_lum_vlc_le, DC_VLC_BITS, 12,
646                            ff_mpeg12_vlc_dc_lum_bits, 1, 1,
647                            ff_mpeg12_vlc_dc_lum_code, 2, 2,
648                            INIT_VLC_OUTPUT_LE, 512);
649     INIT_CUSTOM_VLC_STATIC(&dc_chroma_vlc_le, DC_VLC_BITS, 12,
650                            ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
651                            ff_mpeg12_vlc_dc_chroma_code, 2, 2,
652                            INIT_VLC_OUTPUT_LE, 514);
653 
654     INIT_2D_VLC_RL(ff_rl_speedhq, 674, INIT_VLC_LE);
655 
656     compute_alpha_vlcs();
657 }
658 
speedhq_decode_init(AVCodecContext *avctx)659 static av_cold int speedhq_decode_init(AVCodecContext *avctx)
660 {
661     int ret;
662     static AVOnce init_once = AV_ONCE_INIT;
663     SHQContext * const s = avctx->priv_data;
664 
665     s->avctx = avctx;
666 
667     ret = ff_thread_once(&init_once, speedhq_static_init);
668     if (ret)
669         return AVERROR_UNKNOWN;
670 
671     ff_blockdsp_init(&s->bdsp, avctx);
672     ff_idctdsp_init(&s->idsp, avctx);
673     ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
674 
675     switch (avctx->codec_tag) {
676     case MKTAG('S', 'H', 'Q', '0'):
677         s->subsampling = SHQ_SUBSAMPLING_420;
678         s->alpha_type = SHQ_NO_ALPHA;
679         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
680         break;
681     case MKTAG('S', 'H', 'Q', '1'):
682         s->subsampling = SHQ_SUBSAMPLING_420;
683         s->alpha_type = SHQ_RLE_ALPHA;
684         avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
685         break;
686     case MKTAG('S', 'H', 'Q', '2'):
687         s->subsampling = SHQ_SUBSAMPLING_422;
688         s->alpha_type = SHQ_NO_ALPHA;
689         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
690         break;
691     case MKTAG('S', 'H', 'Q', '3'):
692         s->subsampling = SHQ_SUBSAMPLING_422;
693         s->alpha_type = SHQ_RLE_ALPHA;
694         avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
695         break;
696     case MKTAG('S', 'H', 'Q', '4'):
697         s->subsampling = SHQ_SUBSAMPLING_444;
698         s->alpha_type = SHQ_NO_ALPHA;
699         avctx->pix_fmt = AV_PIX_FMT_YUV444P;
700         break;
701     case MKTAG('S', 'H', 'Q', '5'):
702         s->subsampling = SHQ_SUBSAMPLING_444;
703         s->alpha_type = SHQ_RLE_ALPHA;
704         avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
705         break;
706     case MKTAG('S', 'H', 'Q', '7'):
707         s->subsampling = SHQ_SUBSAMPLING_422;
708         s->alpha_type = SHQ_DCT_ALPHA;
709         avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
710         break;
711     case MKTAG('S', 'H', 'Q', '9'):
712         s->subsampling = SHQ_SUBSAMPLING_444;
713         s->alpha_type = SHQ_DCT_ALPHA;
714         avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
715         break;
716     default:
717         av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n",
718                avctx->codec_tag);
719         return AVERROR_INVALIDDATA;
720     }
721 
722     /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */
723     avctx->colorspace = AVCOL_SPC_BT470BG;
724     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
725 
726     return 0;
727 }
728 
729 const FFCodec ff_speedhq_decoder = {
730     .p.name         = "speedhq",
731     .p.long_name    = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
732     .p.type         = AVMEDIA_TYPE_VIDEO,
733     .p.id           = AV_CODEC_ID_SPEEDHQ,
734     .priv_data_size = sizeof(SHQContext),
735     .init           = speedhq_decode_init,
736     FF_CODEC_DECODE_CB(speedhq_decode_frame),
737     .p.capabilities = AV_CODEC_CAP_DR1,
738     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
739 };
740 #endif /* CONFIG_SPEEDHQ_DECODER */
741