1 /*
2 * NewTek SpeedHQ codec
3 * Copyright 2017 Steinar H. Gunderson
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * NewTek SpeedHQ decoder.
25 */
26
27 #define BITSTREAM_READER_LE
28
29 #include "config.h"
30 #include "config_components.h"
31 #include "libavutil/attributes.h"
32 #include "libavutil/mem_internal.h"
33
34 #include "avcodec.h"
35 #include "blockdsp.h"
36 #include "codec_internal.h"
37 #include "get_bits.h"
38 #include "idctdsp.h"
39 #include "internal.h"
40 #include "libavutil/thread.h"
41 #include "mathops.h"
42 #include "mpeg12dec.h"
43 #include "mpeg12data.h"
44 #include "mpeg12vlc.h"
45
46 #define MAX_INDEX (64 - 1)
47
48 /*
49 * 5 bits makes for very small tables, with no more than two lookups needed
50 * for the longest (10-bit) codes.
51 */
52 #define ALPHA_VLC_BITS 5
53
54 typedef struct SHQContext {
55 AVCodecContext *avctx;
56 BlockDSPContext bdsp;
57 IDCTDSPContext idsp;
58 ScanTable intra_scantable;
59 int quant_matrix[64];
60 enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 }
61 subsampling;
62 enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type;
63 } SHQContext;
64
65
66 /* AC codes: Very similar but not identical to MPEG-2. */
67 static const uint16_t speedhq_vlc[123][2] = {
68 {0x0001, 2}, {0x0003, 3}, {0x000E, 4}, {0x0007, 5},
69 {0x0017, 5}, {0x0028, 6}, {0x0008, 6}, {0x006F, 7},
70 {0x001F, 7}, {0x00C4, 8}, {0x0044, 8}, {0x005F, 8},
71 {0x00DF, 8}, {0x007F, 8}, {0x00FF, 8}, {0x3E00, 14},
72 {0x1E00, 14}, {0x2E00, 14}, {0x0E00, 14}, {0x3600, 14},
73 {0x1600, 14}, {0x2600, 14}, {0x0600, 14}, {0x3A00, 14},
74 {0x1A00, 14}, {0x2A00, 14}, {0x0A00, 14}, {0x3200, 14},
75 {0x1200, 14}, {0x2200, 14}, {0x0200, 14}, {0x0C00, 15},
76 {0x7400, 15}, {0x3400, 15}, {0x5400, 15}, {0x1400, 15},
77 {0x6400, 15}, {0x2400, 15}, {0x4400, 15}, {0x0400, 15},
78 {0x0002, 3}, {0x000C, 5}, {0x004F, 7}, {0x00E4, 8},
79 {0x0004, 8}, {0x0D00, 13}, {0x1500, 13}, {0x7C00, 15},
80 {0x3C00, 15}, {0x5C00, 15}, {0x1C00, 15}, {0x6C00, 15},
81 {0x2C00, 15}, {0x4C00, 15}, {0xC800, 16}, {0x4800, 16},
82 {0x8800, 16}, {0x0800, 16}, {0x0300, 13}, {0x1D00, 13},
83 {0x0014, 5}, {0x0070, 7}, {0x003F, 8}, {0x00C0, 10},
84 {0x0500, 13}, {0x0180, 12}, {0x0280, 12}, {0x0C80, 12},
85 {0x0080, 12}, {0x0B00, 13}, {0x1300, 13}, {0x001C, 5},
86 {0x0064, 8}, {0x0380, 12}, {0x1900, 13}, {0x0D80, 12},
87 {0x0018, 6}, {0x00BF, 8}, {0x0480, 12}, {0x0B80, 12},
88 {0x0038, 6}, {0x0040, 9}, {0x0900, 13}, {0x0030, 7},
89 {0x0780, 12}, {0x2800, 16}, {0x0010, 7}, {0x0A80, 12},
90 {0x0050, 7}, {0x0880, 12}, {0x000F, 7}, {0x1100, 13},
91 {0x002F, 7}, {0x0100, 13}, {0x0084, 8}, {0x5800, 16},
92 {0x00A4, 8}, {0x9800, 16}, {0x0024, 8}, {0x1800, 16},
93 {0x0140, 9}, {0xE800, 16}, {0x01C0, 9}, {0x6800, 16},
94 {0x02C0, 10}, {0xA800, 16}, {0x0F80, 12}, {0x0580, 12},
95 {0x0980, 12}, {0x0E80, 12}, {0x0680, 12}, {0x1F00, 13},
96 {0x0F00, 13}, {0x1700, 13}, {0x0700, 13}, {0x1B00, 13},
97 {0xF800, 16}, {0x7800, 16}, {0xB800, 16}, {0x3800, 16},
98 {0xD800, 16},
99 {0x0020, 6}, /* escape */
100 {0x0006, 4} /* EOB */
101 };
102
103 static const uint8_t speedhq_level[121] = {
104 1, 2, 3, 4, 5, 6, 7, 8,
105 9, 10, 11, 12, 13, 14, 15, 16,
106 17, 18, 19, 20, 21, 22, 23, 24,
107 25, 26, 27, 28, 29, 30, 31, 32,
108 33, 34, 35, 36, 37, 38, 39, 40,
109 1, 2, 3, 4, 5, 6, 7, 8,
110 9, 10, 11, 12, 13, 14, 15, 16,
111 17, 18, 19, 20, 1, 2, 3, 4,
112 5, 6, 7, 8, 9, 10, 11, 1,
113 2, 3, 4, 5, 1, 2, 3, 4,
114 1, 2, 3, 1, 2, 3, 1, 2,
115 1, 2, 1, 2, 1, 2, 1, 2,
116 1, 2, 1, 2, 1, 2, 1, 2,
117 1, 2, 1, 1, 1, 1, 1, 1,
118 1, 1, 1, 1, 1, 1, 1, 1,
119 1,
120 };
121
122 static const uint8_t speedhq_run[121] = {
123 0, 0, 0, 0, 0, 0, 0, 0,
124 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0,
126 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 0, 0, 0, 0, 0, 0, 0,
128 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 2, 2, 2, 2,
131 2, 2, 2, 2, 2, 2, 2, 3,
132 3, 3, 3, 3, 4, 4, 4, 4,
133 5, 5, 5, 6, 6, 6, 7, 7,
134 8, 8, 9, 9, 10, 10, 11, 11,
135 12, 12, 13, 13, 14, 14, 15, 15,
136 16, 16, 17, 18, 19, 20, 21, 22,
137 23, 24, 25, 26, 27, 28, 29, 30,
138 31,
139 };
140
141 RLTable ff_rl_speedhq = {
142 121,
143 121,
144 speedhq_vlc,
145 speedhq_run,
146 speedhq_level,
147 };
148
149 #if CONFIG_SPEEDHQ_DECODER
150 /* NOTE: The first element is always 16, unscaled. */
151 static const uint8_t unscaled_quant_matrix[64] = {
152 16, 16, 19, 22, 26, 27, 29, 34,
153 16, 16, 22, 24, 27, 29, 34, 37,
154 19, 22, 26, 27, 29, 34, 34, 38,
155 22, 22, 26, 27, 29, 34, 37, 40,
156 22, 26, 27, 29, 32, 35, 40, 48,
157 26, 27, 29, 32, 35, 40, 48, 58,
158 26, 27, 29, 34, 38, 46, 56, 69,
159 27, 29, 35, 38, 46, 56, 69, 83
160 };
161
162 static VLC dc_lum_vlc_le;
163 static VLC dc_chroma_vlc_le;
164 static VLC dc_alpha_run_vlc_le;
165 static VLC dc_alpha_level_vlc_le;
166
decode_dc_le(GetBitContext *gb, int component)167 static inline int decode_dc_le(GetBitContext *gb, int component)
168 {
169 int code, diff;
170
171 if (component == 0 || component == 3) {
172 code = get_vlc2(gb, dc_lum_vlc_le.table, DC_VLC_BITS, 2);
173 } else {
174 code = get_vlc2(gb, dc_chroma_vlc_le.table, DC_VLC_BITS, 2);
175 }
176 if (!code) {
177 diff = 0;
178 } else {
179 diff = get_xbits_le(gb, code);
180 }
181 return diff;
182 }
183
decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)184 static inline int decode_alpha_block(const SHQContext *s, GetBitContext *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize)
185 {
186 uint8_t block[128];
187 int i = 0, x, y;
188
189 memset(block, 0, sizeof(block));
190
191 {
192 OPEN_READER(re, gb);
193
194 for ( ;; ) {
195 int run, level;
196
197 UPDATE_CACHE_LE(re, gb);
198 GET_VLC(run, re, gb, dc_alpha_run_vlc_le.table, ALPHA_VLC_BITS, 2);
199
200 if (run < 0) break;
201 i += run;
202 if (i >= 128)
203 return AVERROR_INVALIDDATA;
204
205 UPDATE_CACHE_LE(re, gb);
206 GET_VLC(level, re, gb, dc_alpha_level_vlc_le.table, ALPHA_VLC_BITS, 2);
207 block[i++] = level;
208 }
209
210 CLOSE_READER(re, gb);
211 }
212
213 for (y = 0; y < 8; y++) {
214 for (x = 0; x < 16; x++) {
215 last_alpha[x] -= block[y * 16 + x];
216 }
217 memcpy(dest, last_alpha, 16);
218 dest += linesize;
219 }
220
221 return 0;
222 }
223
decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize)224 static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, int last_dc[4], int component, uint8_t *dest, int linesize)
225 {
226 const int *quant_matrix = s->quant_matrix;
227 const uint8_t *scantable = s->intra_scantable.permutated;
228 LOCAL_ALIGNED_32(int16_t, block, [64]);
229 int dc_offset;
230
231 s->bdsp.clear_block(block);
232
233 dc_offset = decode_dc_le(gb, component);
234 last_dc[component] -= dc_offset; /* Note: Opposite of most codecs. */
235 block[scantable[0]] = last_dc[component]; /* quant_matrix[0] is always 16. */
236
237 /* Read AC coefficients. */
238 {
239 int i = 0;
240 OPEN_READER(re, gb);
241 for ( ;; ) {
242 int level, run;
243 UPDATE_CACHE_LE(re, gb);
244 GET_RL_VLC(level, run, re, gb, ff_rl_speedhq.rl_vlc[0],
245 TEX_VLC_BITS, 2, 0);
246 if (level == 127) {
247 break;
248 } else if (level) {
249 i += run;
250 if (i > MAX_INDEX)
251 return AVERROR_INVALIDDATA;
252 /* If next bit is 1, level = -level */
253 level = (level ^ SHOW_SBITS(re, gb, 1)) -
254 SHOW_SBITS(re, gb, 1);
255 LAST_SKIP_BITS(re, gb, 1);
256 } else {
257 /* Escape. */
258 #if MIN_CACHE_BITS < 6 + 6 + 12
259 #error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE
260 #endif
261 run = SHOW_UBITS(re, gb, 6) + 1;
262 SKIP_BITS(re, gb, 6);
263 level = SHOW_UBITS(re, gb, 12) - 2048;
264 LAST_SKIP_BITS(re, gb, 12);
265
266 i += run;
267 if (i > MAX_INDEX)
268 return AVERROR_INVALIDDATA;
269 }
270
271 block[scantable[i]] = (level * quant_matrix[i]) >> 4;
272 }
273 CLOSE_READER(re, gb);
274 }
275
276 s->idsp.idct_put(dest, linesize, block);
277
278 return 0;
279 }
280
decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride)281 static int decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame *frame, int field_number, int line_stride)
282 {
283 int linesize_y = frame->linesize[0] * line_stride;
284 int linesize_cb = frame->linesize[1] * line_stride;
285 int linesize_cr = frame->linesize[2] * line_stride;
286 int linesize_a;
287 int ret;
288
289 if (s->alpha_type != SHQ_NO_ALPHA)
290 linesize_a = frame->linesize[3] * line_stride;
291
292 for (int y = 0; y < frame->height; y += 16 * line_stride) {
293 int last_dc[4] = { 1024, 1024, 1024, 1024 };
294 uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
295 uint8_t last_alpha[16];
296 int x = frame->width - 8;
297
298 dest_y = frame->data[0] + frame->linesize[0] * (y + field_number) + x;
299 if (s->subsampling == SHQ_SUBSAMPLING_420) {
300 dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number) + x / 2;
301 dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number) + x / 2;
302 } else {
303 av_assert2(s->subsampling == SHQ_SUBSAMPLING_422);
304 dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number) + x / 2;
305 dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number) + x / 2;
306 }
307 if (s->alpha_type != SHQ_NO_ALPHA) {
308 memset(last_alpha, 255, sizeof(last_alpha));
309 dest_a = frame->data[3] + frame->linesize[3] * (y + field_number) + x;
310 }
311
312 if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y, linesize_y)) < 0)
313 return ret;
314 if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
315 return ret;
316 if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
317 return ret;
318 if ((ret = decode_dct_block(s, gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
319 return ret;
320 if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
321 return ret;
322 if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
323 return ret;
324
325 if (s->subsampling != SHQ_SUBSAMPLING_420) {
326 if ((ret = decode_dct_block(s, gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
327 return ret;
328 if ((ret = decode_dct_block(s, gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
329 return ret;
330 }
331
332 if (s->alpha_type == SHQ_RLE_ALPHA) {
333 /* Alpha coded using 16x8 RLE blocks. */
334 if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a, linesize_a)) < 0)
335 return ret;
336 if ((ret = decode_alpha_block(s, gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
337 return ret;
338 } else if (s->alpha_type == SHQ_DCT_ALPHA) {
339 /* Alpha encoded exactly like luma. */
340 if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a, linesize_a)) < 0)
341 return ret;
342 if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
343 return ret;
344 if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
345 return ret;
346 if ((ret = decode_dct_block(s, gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
347 return ret;
348 }
349 }
350
351 return 0;
352 }
353
decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride)354 static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride)
355 {
356 int ret, slice_number, slice_offsets[5];
357 int linesize_y = frame->linesize[0] * line_stride;
358 int linesize_cb = frame->linesize[1] * line_stride;
359 int linesize_cr = frame->linesize[2] * line_stride;
360 int linesize_a;
361 GetBitContext gb;
362
363 if (s->alpha_type != SHQ_NO_ALPHA)
364 linesize_a = frame->linesize[3] * line_stride;
365
366 if (end < start || end - start < 3 || end > buf_size)
367 return AVERROR_INVALIDDATA;
368
369 slice_offsets[0] = start;
370 slice_offsets[4] = end;
371 for (slice_number = 1; slice_number < 4; slice_number++) {
372 uint32_t last_offset, slice_len;
373
374 last_offset = slice_offsets[slice_number - 1];
375 slice_len = AV_RL24(buf + last_offset);
376 slice_offsets[slice_number] = last_offset + slice_len;
377
378 if (slice_len < 3 || slice_offsets[slice_number] > end - 3)
379 return AVERROR_INVALIDDATA;
380 }
381
382 for (slice_number = 0; slice_number < 4; slice_number++) {
383 uint32_t slice_begin, slice_end;
384 int x, y;
385
386 slice_begin = slice_offsets[slice_number];
387 slice_end = slice_offsets[slice_number + 1];
388
389 if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - slice_begin - 3)) < 0)
390 return ret;
391
392 for (y = slice_number * 16 * line_stride; y < frame->height; y += line_stride * 64) {
393 uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a;
394 int last_dc[4] = { 1024, 1024, 1024, 1024 };
395 uint8_t last_alpha[16];
396
397 memset(last_alpha, 255, sizeof(last_alpha));
398
399 dest_y = frame->data[0] + frame->linesize[0] * (y + field_number);
400 if (s->subsampling == SHQ_SUBSAMPLING_420) {
401 dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + field_number);
402 dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + field_number);
403 } else {
404 dest_cb = frame->data[1] + frame->linesize[1] * (y + field_number);
405 dest_cr = frame->data[2] + frame->linesize[2] * (y + field_number);
406 }
407 if (s->alpha_type != SHQ_NO_ALPHA) {
408 dest_a = frame->data[3] + frame->linesize[3] * (y + field_number);
409 }
410
411 for (x = 0; x < frame->width - 8 * (s->subsampling != SHQ_SUBSAMPLING_444); x += 16) {
412 /* Decode the four luma blocks. */
413 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, linesize_y)) < 0)
414 return ret;
415 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, linesize_y)) < 0)
416 return ret;
417 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y, linesize_y)) < 0)
418 return ret;
419 if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 * linesize_y + 8, linesize_y)) < 0)
420 return ret;
421
422 /*
423 * Decode the first chroma block. For 4:2:0, this is the only one;
424 * for 4:2:2, it's the top block; for 4:4:4, it's the top-left block.
425 */
426 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, linesize_cb)) < 0)
427 return ret;
428 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, linesize_cr)) < 0)
429 return ret;
430
431 if (s->subsampling != SHQ_SUBSAMPLING_420) {
432 /* For 4:2:2, this is the bottom block; for 4:4:4, it's the bottom-left block. */
433 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb, linesize_cb)) < 0)
434 return ret;
435 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr, linesize_cr)) < 0)
436 return ret;
437
438 if (s->subsampling == SHQ_SUBSAMPLING_444) {
439 /* Top-right and bottom-right blocks. */
440 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8, linesize_cb)) < 0)
441 return ret;
442 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8, linesize_cr)) < 0)
443 return ret;
444 if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0)
445 return ret;
446 if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0)
447 return ret;
448
449 dest_cb += 8;
450 dest_cr += 8;
451 }
452 }
453 dest_y += 16;
454 dest_cb += 8;
455 dest_cr += 8;
456
457 if (s->alpha_type == SHQ_RLE_ALPHA) {
458 /* Alpha coded using 16x8 RLE blocks. */
459 if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a, linesize_a)) < 0)
460 return ret;
461 if ((ret = decode_alpha_block(s, &gb, last_alpha, dest_a + 8 * linesize_a, linesize_a)) < 0)
462 return ret;
463 dest_a += 16;
464 } else if (s->alpha_type == SHQ_DCT_ALPHA) {
465 /* Alpha encoded exactly like luma. */
466 if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, linesize_a)) < 0)
467 return ret;
468 if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8, linesize_a)) < 0)
469 return ret;
470 if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a, linesize_a)) < 0)
471 return ret;
472 if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a + 8 * linesize_a + 8, linesize_a)) < 0)
473 return ret;
474 dest_a += 16;
475 }
476 }
477 }
478 }
479
480 if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15))
481 return decode_speedhq_border(s, &gb, frame, field_number, line_stride);
482
483 return 0;
484 }
485
compute_quant_matrix(int *output, int qscale)486 static void compute_quant_matrix(int *output, int qscale)
487 {
488 int i;
489 for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[ff_zigzag_direct[i]] * qscale;
490 }
491
speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *avpkt)492 static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame,
493 int *got_frame, AVPacket *avpkt)
494 {
495 SHQContext * const s = avctx->priv_data;
496 const uint8_t *buf = avpkt->data;
497 int buf_size = avpkt->size;
498 uint8_t quality;
499 uint32_t second_field_offset;
500 int ret;
501
502 if (buf_size < 4 || avctx->width < 8 || avctx->width % 8 != 0)
503 return AVERROR_INVALIDDATA;
504 if (buf_size < avctx->width*avctx->height / 64 / 4)
505 return AVERROR_INVALIDDATA;
506
507 quality = buf[0];
508 if (quality >= 100) {
509 return AVERROR_INVALIDDATA;
510 }
511
512 compute_quant_matrix(s->quant_matrix, 100 - quality);
513
514 second_field_offset = AV_RL24(buf + 1);
515 if (second_field_offset >= buf_size - 3) {
516 return AVERROR_INVALIDDATA;
517 }
518
519 avctx->coded_width = FFALIGN(avctx->width, 16);
520 avctx->coded_height = FFALIGN(avctx->height, 16);
521
522 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
523 return ret;
524 }
525 frame->key_frame = 1;
526
527 if (second_field_offset == 4 || second_field_offset == (buf_size-4)) {
528 /*
529 * Overlapping first and second fields is used to signal
530 * encoding only a single field. In this case, "height"
531 * is ambiguous; it could mean either the height of the
532 * frame as a whole, or of the field. The former would make
533 * more sense for compatibility with legacy decoders,
534 * but this matches the convention used in NDI, which is
535 * the primary user of this trick.
536 */
537 if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, buf_size, 1)) < 0)
538 return ret;
539 } else {
540 if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, second_field_offset, 2)) < 0)
541 return ret;
542 if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1, second_field_offset, buf_size, 2)) < 0)
543 return ret;
544 }
545
546 *got_frame = 1;
547 return buf_size;
548 }
549
550 /*
551 * Alpha VLC. Run and level are independently coded, and would be
552 * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't
553 * bother with combining them into one table.
554 */
compute_alpha_vlcs(void)555 static av_cold void compute_alpha_vlcs(void)
556 {
557 uint16_t run_code[134], level_code[266];
558 uint8_t run_bits[134], level_bits[266];
559 int16_t run_symbols[134], level_symbols[266];
560 int entry, i, sign;
561
562 /* Initialize VLC for alpha run. */
563 entry = 0;
564
565 /* 0 -> 0. */
566 run_code[entry] = 0;
567 run_bits[entry] = 1;
568 run_symbols[entry] = 0;
569 ++entry;
570
571 /* 10xx -> xx plus 1. */
572 for (i = 0; i < 4; ++i) {
573 run_code[entry] = (i << 2) | 1;
574 run_bits[entry] = 4;
575 run_symbols[entry] = i + 1;
576 ++entry;
577 }
578
579 /* 111xxxxxxx -> xxxxxxx. */
580 for (i = 0; i < 128; ++i) {
581 run_code[entry] = (i << 3) | 7;
582 run_bits[entry] = 10;
583 run_symbols[entry] = i;
584 ++entry;
585 }
586
587 /* 110 -> EOB. */
588 run_code[entry] = 3;
589 run_bits[entry] = 3;
590 run_symbols[entry] = -1;
591 ++entry;
592
593 av_assert0(entry == FF_ARRAY_ELEMS(run_code));
594
595 INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_run_vlc_le, ALPHA_VLC_BITS,
596 FF_ARRAY_ELEMS(run_code),
597 run_bits, 1, 1,
598 run_code, 2, 2,
599 run_symbols, 2, 2, 160);
600
601 /* Initialize VLC for alpha level. */
602 entry = 0;
603
604 for (sign = 0; sign <= 1; ++sign) {
605 /* 1s -> -1 or +1 (depending on sign bit). */
606 level_code[entry] = (sign << 1) | 1;
607 level_bits[entry] = 2;
608 level_symbols[entry] = sign ? -1 : 1;
609 ++entry;
610
611 /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). */
612 for (i = 0; i < 4; ++i) {
613 level_code[entry] = (i << 3) | (sign << 2) | 2;
614 level_bits[entry] = 5;
615 level_symbols[entry] = sign ? -(i + 2) : (i + 2);
616 ++entry;
617 }
618 }
619
620 /*
621 * 00xxxxxxxx -> xxxxxxxx, in two's complement. There are many codes
622 * here that would better be encoded in other ways (e.g. 0 would be
623 * encoded by increasing run, and +/- 1 would be encoded with a
624 * shorter code), but it doesn't hurt to allow everything.
625 */
626 for (i = 0; i < 256; ++i) {
627 level_code[entry] = i << 2;
628 level_bits[entry] = 10;
629 level_symbols[entry] = i;
630 ++entry;
631 }
632
633 av_assert0(entry == FF_ARRAY_ELEMS(level_code));
634
635 INIT_LE_VLC_SPARSE_STATIC(&dc_alpha_level_vlc_le, ALPHA_VLC_BITS,
636 FF_ARRAY_ELEMS(level_code),
637 level_bits, 1, 1,
638 level_code, 2, 2,
639 level_symbols, 2, 2, 288);
640 }
641
speedhq_static_init(void)642 static av_cold void speedhq_static_init(void)
643 {
644 /* Exactly the same as MPEG-2, except for a little-endian reader. */
645 INIT_CUSTOM_VLC_STATIC(&dc_lum_vlc_le, DC_VLC_BITS, 12,
646 ff_mpeg12_vlc_dc_lum_bits, 1, 1,
647 ff_mpeg12_vlc_dc_lum_code, 2, 2,
648 INIT_VLC_OUTPUT_LE, 512);
649 INIT_CUSTOM_VLC_STATIC(&dc_chroma_vlc_le, DC_VLC_BITS, 12,
650 ff_mpeg12_vlc_dc_chroma_bits, 1, 1,
651 ff_mpeg12_vlc_dc_chroma_code, 2, 2,
652 INIT_VLC_OUTPUT_LE, 514);
653
654 INIT_2D_VLC_RL(ff_rl_speedhq, 674, INIT_VLC_LE);
655
656 compute_alpha_vlcs();
657 }
658
speedhq_decode_init(AVCodecContext *avctx)659 static av_cold int speedhq_decode_init(AVCodecContext *avctx)
660 {
661 int ret;
662 static AVOnce init_once = AV_ONCE_INIT;
663 SHQContext * const s = avctx->priv_data;
664
665 s->avctx = avctx;
666
667 ret = ff_thread_once(&init_once, speedhq_static_init);
668 if (ret)
669 return AVERROR_UNKNOWN;
670
671 ff_blockdsp_init(&s->bdsp, avctx);
672 ff_idctdsp_init(&s->idsp, avctx);
673 ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
674
675 switch (avctx->codec_tag) {
676 case MKTAG('S', 'H', 'Q', '0'):
677 s->subsampling = SHQ_SUBSAMPLING_420;
678 s->alpha_type = SHQ_NO_ALPHA;
679 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
680 break;
681 case MKTAG('S', 'H', 'Q', '1'):
682 s->subsampling = SHQ_SUBSAMPLING_420;
683 s->alpha_type = SHQ_RLE_ALPHA;
684 avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
685 break;
686 case MKTAG('S', 'H', 'Q', '2'):
687 s->subsampling = SHQ_SUBSAMPLING_422;
688 s->alpha_type = SHQ_NO_ALPHA;
689 avctx->pix_fmt = AV_PIX_FMT_YUV422P;
690 break;
691 case MKTAG('S', 'H', 'Q', '3'):
692 s->subsampling = SHQ_SUBSAMPLING_422;
693 s->alpha_type = SHQ_RLE_ALPHA;
694 avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
695 break;
696 case MKTAG('S', 'H', 'Q', '4'):
697 s->subsampling = SHQ_SUBSAMPLING_444;
698 s->alpha_type = SHQ_NO_ALPHA;
699 avctx->pix_fmt = AV_PIX_FMT_YUV444P;
700 break;
701 case MKTAG('S', 'H', 'Q', '5'):
702 s->subsampling = SHQ_SUBSAMPLING_444;
703 s->alpha_type = SHQ_RLE_ALPHA;
704 avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
705 break;
706 case MKTAG('S', 'H', 'Q', '7'):
707 s->subsampling = SHQ_SUBSAMPLING_422;
708 s->alpha_type = SHQ_DCT_ALPHA;
709 avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
710 break;
711 case MKTAG('S', 'H', 'Q', '9'):
712 s->subsampling = SHQ_SUBSAMPLING_444;
713 s->alpha_type = SHQ_DCT_ALPHA;
714 avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
715 break;
716 default:
717 av_log(avctx, AV_LOG_ERROR, "Unknown NewTek SpeedHQ FOURCC provided (%08X)\n",
718 avctx->codec_tag);
719 return AVERROR_INVALIDDATA;
720 }
721
722 /* This matches what NDI's RGB -> Y'CbCr 4:2:2 converter uses. */
723 avctx->colorspace = AVCOL_SPC_BT470BG;
724 avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
725
726 return 0;
727 }
728
729 const FFCodec ff_speedhq_decoder = {
730 .p.name = "speedhq",
731 .p.long_name = NULL_IF_CONFIG_SMALL("NewTek SpeedHQ"),
732 .p.type = AVMEDIA_TYPE_VIDEO,
733 .p.id = AV_CODEC_ID_SPEEDHQ,
734 .priv_data_size = sizeof(SHQContext),
735 .init = speedhq_decode_init,
736 FF_CODEC_DECODE_CB(speedhq_decode_frame),
737 .p.capabilities = AV_CODEC_CAP_DR1,
738 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
739 };
740 #endif /* CONFIG_SPEEDHQ_DECODER */
741