xref: /third_party/ffmpeg/libavcodec/4xm.c (revision cabdff1a)
1/*
2 * 4XM codec
3 * Copyright (c) 2003 Michael Niedermayer
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * 4XM codec.
25 */
26
27#include <inttypes.h>
28
29#include "libavutil/avassert.h"
30#include "libavutil/frame.h"
31#include "libavutil/imgutils.h"
32#include "libavutil/intreadwrite.h"
33#include "libavutil/mem_internal.h"
34#include "libavutil/thread.h"
35#include "avcodec.h"
36#include "blockdsp.h"
37#include "bswapdsp.h"
38#include "bytestream.h"
39#include "codec_internal.h"
40#include "get_bits.h"
41#include "internal.h"
42
43
44#define BLOCK_TYPE_VLC_BITS 5
45#define ACDC_VLC_BITS 9
46
47#define CFRAME_BUFFER_COUNT 100
48
49static const uint8_t block_type_tab[2][4][8][2] = {
50    {
51        {    // { 8, 4, 2 } x { 8, 4, 2}
52            { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 31, 5 }, { 0, 0 }
53        }, { // { 8, 4 } x 1
54            { 0, 1 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
55        }, { // 1 x { 8, 4 }
56            { 0, 1 }, { 2, 2 }, { 0, 0 }, { 6, 3 }, { 14, 4 }, { 15, 4 }, { 0, 0 }
57        }, { // 1 x 2, 2 x 1
58            { 0, 1 }, { 0, 0 }, { 0, 0 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 }
59        }
60    }, {
61        {   // { 8, 4, 2 } x { 8, 4, 2}
62            { 1, 2 }, { 4, 3 }, { 5, 3 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
63        }, {// { 8, 4 } x 1
64            { 1, 2 }, { 0, 0 }, { 2, 2 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
65        }, {// 1 x { 8, 4 }
66            { 1, 2 }, { 2, 2 }, { 0, 0 }, { 0, 2 }, { 6, 3 }, { 7, 3 }, { 0, 0 }
67        }, {// 1 x 2, 2 x 1
68            { 1, 2 }, { 0, 0 }, { 0, 0 }, { 0, 2 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
69      }
70    }
71};
72
73static const uint8_t size2index[4][4] = {
74    { -1, 3, 1, 1 },
75    {  3, 0, 0, 0 },
76    {  2, 0, 0, 0 },
77    {  2, 0, 0, 0 },
78};
79
80static const int8_t mv[256][2] = {
81    {   0,   0 }, {   0,  -1 }, {  -1,   0 }, {   1,   0 }, {   0,   1 }, {  -1,  -1 }, {   1,  -1 }, {  -1,   1 },
82    {   1,   1 }, {   0,  -2 }, {  -2,   0 }, {   2,   0 }, {   0,   2 }, {  -1,  -2 }, {   1,  -2 }, {  -2,  -1 },
83    {   2,  -1 }, {  -2,   1 }, {   2,   1 }, {  -1,   2 }, {   1,   2 }, {  -2,  -2 }, {   2,  -2 }, {  -2,   2 },
84    {   2,   2 }, {   0,  -3 }, {  -3,   0 }, {   3,   0 }, {   0,   3 }, {  -1,  -3 }, {   1,  -3 }, {  -3,  -1 },
85    {   3,  -1 }, {  -3,   1 }, {   3,   1 }, {  -1,   3 }, {   1,   3 }, {  -2,  -3 }, {   2,  -3 }, {  -3,  -2 },
86    {   3,  -2 }, {  -3,   2 }, {   3,   2 }, {  -2,   3 }, {   2,   3 }, {   0,  -4 }, {  -4,   0 }, {   4,   0 },
87    {   0,   4 }, {  -1,  -4 }, {   1,  -4 }, {  -4,  -1 }, {   4,  -1 }, {   4,   1 }, {  -1,   4 }, {   1,   4 },
88    {  -3,  -3 }, {  -3,   3 }, {   3,   3 }, {  -2,  -4 }, {  -4,  -2 }, {   4,  -2 }, {  -4,   2 }, {  -2,   4 },
89    {   2,   4 }, {  -3,  -4 }, {   3,  -4 }, {   4,  -3 }, {  -5,   0 }, {  -4,   3 }, {  -3,   4 }, {   3,   4 },
90    {  -1,  -5 }, {  -5,  -1 }, {  -5,   1 }, {  -1,   5 }, {  -2,  -5 }, {   2,  -5 }, {   5,  -2 }, {   5,   2 },
91    {  -4,  -4 }, {  -4,   4 }, {  -3,  -5 }, {  -5,  -3 }, {  -5,   3 }, {   3,   5 }, {  -6,   0 }, {   0,   6 },
92    {  -6,  -1 }, {  -6,   1 }, {   1,   6 }, {   2,  -6 }, {  -6,   2 }, {   2,   6 }, {  -5,  -4 }, {   5,   4 },
93    {   4,   5 }, {  -6,  -3 }, {   6,   3 }, {  -7,   0 }, {  -1,  -7 }, {   5,  -5 }, {  -7,   1 }, {  -1,   7 },
94    {   4,  -6 }, {   6,   4 }, {  -2,  -7 }, {  -7,   2 }, {  -3,  -7 }, {   7,  -3 }, {   3,   7 }, {   6,  -5 },
95    {   0,  -8 }, {  -1,  -8 }, {  -7,  -4 }, {  -8,   1 }, {   4,   7 }, {   2,  -8 }, {  -2,   8 }, {   6,   6 },
96    {  -8,   3 }, {   5,  -7 }, {  -5,   7 }, {   8,  -4 }, {   0,  -9 }, {  -9,  -1 }, {   1,   9 }, {   7,  -6 },
97    {  -7,   6 }, {  -5,  -8 }, {  -5,   8 }, {  -9,   3 }, {   9,  -4 }, {   7,  -7 }, {   8,  -6 }, {   6,   8 },
98    {  10,   1 }, { -10,   2 }, {   9,  -5 }, {  10,  -3 }, {  -8,  -7 }, { -10,  -4 }, {   6,  -9 }, { -11,   0 },
99    {  11,   1 }, { -11,  -2 }, {  -2,  11 }, {   7,  -9 }, {  -7,   9 }, {  10,   6 }, {  -4,  11 }, {   8,  -9 },
100    {   8,   9 }, {   5,  11 }, {   7, -10 }, {  12,  -3 }, {  11,   6 }, {  -9,  -9 }, {   8,  10 }, {   5,  12 },
101    { -11,   7 }, {  13,   2 }, {   6, -12 }, {  10,   9 }, { -11,   8 }, {  -7,  12 }, {   0,  14 }, {  14,  -2 },
102    {  -9,  11 }, {  -6,  13 }, { -14,  -4 }, {  -5, -14 }, {   5,  14 }, { -15,  -1 }, { -14,  -6 }, {   3, -15 },
103    {  11, -11 }, {  -7,  14 }, {  -5,  15 }, {   8, -14 }, {  15,   6 }, {   3,  16 }, {   7, -15 }, { -16,   5 },
104    {   0,  17 }, { -16,  -6 }, { -10,  14 }, { -16,   7 }, {  12,  13 }, { -16,   8 }, { -17,   6 }, { -18,   3 },
105    {  -7,  17 }, {  15,  11 }, {  16,  10 }, {   2, -19 }, {   3, -19 }, { -11, -16 }, { -18,   8 }, { -19,  -6 },
106    {   2, -20 }, { -17, -11 }, { -10, -18 }, {   8,  19 }, { -21,  -1 }, { -20,   7 }, {  -4,  21 }, {  21,   5 },
107    {  15,  16 }, {   2, -22 }, { -10, -20 }, { -22,   5 }, {  20, -11 }, {  -7, -22 }, { -12,  20 }, {  23,  -5 },
108    {  13, -20 }, {  24,  -2 }, { -15,  19 }, { -11,  22 }, {  16,  19 }, {  23, -10 }, { -18, -18 }, {  -9, -24 },
109    {  24, -10 }, {  -3,  26 }, { -23,  13 }, { -18, -20 }, {  17,  21 }, {  -4,  27 }, {  27,   6 }, {   1, -28 },
110    { -11,  26 }, { -17, -23 }, {   7,  28 }, {  11, -27 }, {  29,   5 }, { -23, -19 }, { -28, -11 }, { -21,  22 },
111    { -30,   7 }, { -17,  26 }, { -27,  16 }, {  13,  29 }, {  19, -26 }, {  10, -31 }, { -14, -30 }, {  20, -27 },
112    { -29,  18 }, { -16, -31 }, { -28, -22 }, {  21, -30 }, { -25,  28 }, {  26, -29 }, {  25, -32 }, { -32, -32 }
113};
114
115/* This is simply the scaled down elementwise product of the standard JPEG
116 * quantizer table and the AAN premul table. */
117static const uint8_t dequant_table[64] = {
118    16, 15, 13, 19, 24, 31, 28, 17,
119    17, 23, 25, 31, 36, 63, 45, 21,
120    18, 24, 27, 37, 52, 59, 49, 20,
121    16, 28, 34, 40, 60, 80, 51, 20,
122    18, 31, 48, 66, 68, 86, 56, 21,
123    19, 38, 56, 59, 64, 64, 48, 20,
124    27, 48, 55, 55, 56, 51, 35, 15,
125    20, 35, 34, 32, 31, 22, 15,  8,
126};
127
128static VLC block_type_vlc[2][4];
129
130
131typedef struct CFrameBuffer {
132    unsigned int allocated_size;
133    unsigned int size;
134    int id;
135    uint8_t *data;
136} CFrameBuffer;
137
138typedef struct FourXContext {
139    AVCodecContext *avctx;
140    BlockDSPContext bdsp;
141    BswapDSPContext bbdsp;
142    uint16_t *frame_buffer;
143    uint16_t *last_frame_buffer;
144    GetBitContext pre_gb;          ///< ac/dc prefix
145    GetBitContext gb;
146    GetByteContext g;
147    GetByteContext g2;
148    int mv[256];
149    VLC pre_vlc;
150    int last_dc;
151    DECLARE_ALIGNED(32, int16_t, block)[6][64];
152    void *bitstream_buffer;
153    unsigned int bitstream_buffer_size;
154    int version;
155    CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
156} FourXContext;
157
158
159#define FIX_1_082392200  70936
160#define FIX_1_414213562  92682
161#define FIX_1_847759065 121095
162#define FIX_2_613125930 171254
163
164#define MULTIPLY(var, const) ((int)((var) * (unsigned)(const)) >> 16)
165
166static void idct(int16_t block[64])
167{
168    int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
169    int tmp10, tmp11, tmp12, tmp13;
170    int z5, z10, z11, z12, z13;
171    int i;
172    int temp[64];
173
174    for (i = 0; i < 8; i++) {
175        tmp10 = block[8 * 0 + i] + block[8 * 4 + i];
176        tmp11 = block[8 * 0 + i] - block[8 * 4 + i];
177
178        tmp13 = block[8 * 2 + i] + block[8 * 6 + i];
179        tmp12 = MULTIPLY(block[8 * 2 + i] - block[8 * 6 + i], FIX_1_414213562) - tmp13;
180
181        tmp0 = tmp10 + tmp13;
182        tmp3 = tmp10 - tmp13;
183        tmp1 = tmp11 + tmp12;
184        tmp2 = tmp11 - tmp12;
185
186        z13 = block[8 * 5 + i] + block[8 * 3 + i];
187        z10 = block[8 * 5 + i] - block[8 * 3 + i];
188        z11 = block[8 * 1 + i] + block[8 * 7 + i];
189        z12 = block[8 * 1 + i] - block[8 * 7 + i];
190
191        tmp7  =          z11 + z13;
192        tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
193
194        z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
195        tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
196        tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
197
198        tmp6 = tmp12 - tmp7;
199        tmp5 = tmp11 - tmp6;
200        tmp4 = tmp10 + tmp5;
201
202        temp[8 * 0 + i] = tmp0 + tmp7;
203        temp[8 * 7 + i] = tmp0 - tmp7;
204        temp[8 * 1 + i] = tmp1 + tmp6;
205        temp[8 * 6 + i] = tmp1 - tmp6;
206        temp[8 * 2 + i] = tmp2 + tmp5;
207        temp[8 * 5 + i] = tmp2 - tmp5;
208        temp[8 * 4 + i] = tmp3 + tmp4;
209        temp[8 * 3 + i] = tmp3 - tmp4;
210    }
211
212    for (i = 0; i < 8 * 8; i += 8) {
213        tmp10 = temp[0 + i] + temp[4 + i];
214        tmp11 = temp[0 + i] - temp[4 + i];
215
216        tmp13 = temp[2 + i] + temp[6 + i];
217        tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
218
219        tmp0 = tmp10 + tmp13;
220        tmp3 = tmp10 - tmp13;
221        tmp1 = tmp11 + tmp12;
222        tmp2 = tmp11 - tmp12;
223
224        z13 = temp[5 + i] + temp[3 + i];
225        z10 = temp[5 + i] - temp[3 + i];
226        z11 = temp[1 + i] + temp[7 + i];
227        z12 = temp[1 + i] - temp[7 + i];
228
229        tmp7  = z11 + z13;
230        tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
231
232        z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
233        tmp10 = MULTIPLY(z12,  FIX_1_082392200) - z5;
234        tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5;
235
236        tmp6 = tmp12 - tmp7;
237        tmp5 = tmp11 - tmp6;
238        tmp4 = tmp10 + tmp5;
239
240        block[0 + i] = (tmp0 + tmp7) >> 6;
241        block[7 + i] = (tmp0 - tmp7) >> 6;
242        block[1 + i] = (tmp1 + tmp6) >> 6;
243        block[6 + i] = (tmp1 - tmp6) >> 6;
244        block[2 + i] = (tmp2 + tmp5) >> 6;
245        block[5 + i] = (tmp2 - tmp5) >> 6;
246        block[4 + i] = (tmp3 + tmp4) >> 6;
247        block[3 + i] = (tmp3 - tmp4) >> 6;
248    }
249}
250
251static av_cold void init_vlcs(void)
252{
253    static VLCElem table[2][4][32];
254    int i, j;
255
256    for (i = 0; i < 2; i++) {
257        for (j = 0; j < 4; j++) {
258            block_type_vlc[i][j].table           = table[i][j];
259            block_type_vlc[i][j].table_allocated = 32;
260            init_vlc(&block_type_vlc[i][j], BLOCK_TYPE_VLC_BITS, 7,
261                     &block_type_tab[i][j][0][1], 2, 1,
262                     &block_type_tab[i][j][0][0], 2, 1,
263                     INIT_VLC_USE_NEW_STATIC);
264        }
265    }
266}
267
268static void init_mv(FourXContext *f, int linesize)
269{
270    int i;
271
272    for (i = 0; i < 256; i++) {
273        if (f->version > 1)
274            f->mv[i] = mv[i][0] + mv[i][1] * linesize / 2;
275        else
276            f->mv[i] = (i & 15) - 8 + ((i >> 4) - 8) * linesize / 2;
277    }
278}
279
280#if HAVE_BIGENDIAN
281#define LE_CENTRIC_MUL(dst, src, scale, dc)             \
282    {                                                   \
283        unsigned tmpval = AV_RN32(src);                 \
284        tmpval = (tmpval << 16) | (tmpval >> 16);       \
285        tmpval = tmpval * (scale) + (dc);               \
286        tmpval = (tmpval << 16) | (tmpval >> 16);       \
287        AV_WN32A(dst, tmpval);                          \
288    }
289#else
290#define LE_CENTRIC_MUL(dst, src, scale, dc)              \
291    {                                                    \
292        unsigned tmpval = AV_RN32(src) * (scale) + (dc); \
293        AV_WN32A(dst, tmpval);                           \
294    }
295#endif
296
297static inline void mcdc(uint16_t *dst, const uint16_t *src, int log2w,
298                        int h, int stride, int scale, unsigned dc)
299{
300    int i;
301    dc *= 0x10001;
302
303    switch (log2w) {
304    case 0:
305        for (i = 0; i < h; i++) {
306            dst[0] = scale * src[0] + dc;
307            if (scale)
308                src += stride;
309            dst += stride;
310        }
311        break;
312    case 1:
313        for (i = 0; i < h; i++) {
314            LE_CENTRIC_MUL(dst, src, scale, dc);
315            if (scale)
316                src += stride;
317            dst += stride;
318        }
319        break;
320    case 2:
321        for (i = 0; i < h; i++) {
322            LE_CENTRIC_MUL(dst, src, scale, dc);
323            LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
324            if (scale)
325                src += stride;
326            dst += stride;
327        }
328        break;
329    case 3:
330        for (i = 0; i < h; i++) {
331            LE_CENTRIC_MUL(dst,     src,     scale, dc);
332            LE_CENTRIC_MUL(dst + 2, src + 2, scale, dc);
333            LE_CENTRIC_MUL(dst + 4, src + 4, scale, dc);
334            LE_CENTRIC_MUL(dst + 6, src + 6, scale, dc);
335            if (scale)
336                src += stride;
337            dst += stride;
338        }
339        break;
340    default:
341        av_assert0(0);
342    }
343}
344
345static int decode_p_block(FourXContext *f, uint16_t *dst, const uint16_t *src,
346                          int log2w, int log2h, int stride)
347{
348    int index, h, code, ret, scale = 1;
349    uint16_t *start, *end;
350    unsigned dc = 0;
351
352    av_assert0(log2w >= 0 && log2h >= 0);
353
354    index = size2index[log2h][log2w];
355    av_assert0(index >= 0);
356
357    if (get_bits_left(&f->gb) < 1)
358        return AVERROR_INVALIDDATA;
359    h     = 1 << log2h;
360    code  = get_vlc2(&f->gb, block_type_vlc[1 - (f->version > 1)][index].table,
361                     BLOCK_TYPE_VLC_BITS, 1);
362    av_assert0(code >= 0 && code <= 6);
363
364    start = f->last_frame_buffer;
365    end   = start + stride * (f->avctx->height - h + 1) - (1 << log2w);
366
367    if (code == 1) {
368        log2h--;
369        if ((ret = decode_p_block(f, dst, src, log2w, log2h, stride)) < 0)
370            return ret;
371        return decode_p_block(f, dst + (stride << log2h),
372                              src + (stride << log2h),
373                              log2w, log2h, stride);
374    } else if (code == 2) {
375        log2w--;
376        if ((ret = decode_p_block(f, dst , src, log2w, log2h, stride)) < 0)
377            return ret;
378        return decode_p_block(f, dst + (1 << log2w),
379                              src + (1 << log2w),
380                              log2w, log2h, stride);
381    } else if (code == 6) {
382        if (bytestream2_get_bytes_left(&f->g2) < 4) {
383            av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
384            return AVERROR_INVALIDDATA;
385        }
386        if (log2w) {
387            dst[0]      = bytestream2_get_le16u(&f->g2);
388            dst[1]      = bytestream2_get_le16u(&f->g2);
389        } else {
390            dst[0]      = bytestream2_get_le16u(&f->g2);
391            dst[stride] = bytestream2_get_le16u(&f->g2);
392        }
393        return 0;
394    }
395
396    if ((code&3)==0 && bytestream2_get_bytes_left(&f->g) < 1) {
397        av_log(f->avctx, AV_LOG_ERROR, "bytestream overread\n");
398        return AVERROR_INVALIDDATA;
399    }
400
401    if (code == 0) {
402        src  += f->mv[bytestream2_get_byte(&f->g)];
403    } else if (code == 3 && f->version >= 2) {
404        return 0;
405    } else if (code == 4) {
406        src  += f->mv[bytestream2_get_byte(&f->g)];
407        if (bytestream2_get_bytes_left(&f->g2) < 2){
408            av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
409            return AVERROR_INVALIDDATA;
410        }
411        dc    = bytestream2_get_le16(&f->g2);
412    } else if (code == 5) {
413        if (bytestream2_get_bytes_left(&f->g2) < 2){
414            av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
415            return AVERROR_INVALIDDATA;
416        }
417        av_assert0(start <= src && src <= end);
418        scale = 0;
419        dc    = bytestream2_get_le16(&f->g2);
420    }
421
422    if (start > src || src > end) {
423        av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
424        return AVERROR_INVALIDDATA;
425    }
426
427    mcdc(dst, src, log2w, h, stride, scale, dc);
428
429    return 0;
430}
431
432static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
433{
434    int x, y;
435    const int width  = f->avctx->width;
436    const int height = f->avctx->height;
437    uint16_t *dst    = f->frame_buffer;
438    uint16_t *src;
439    unsigned int bitstream_size, bytestream_size, wordstream_size, extra,
440                 bytestream_offset, wordstream_offset;
441    int ret;
442
443    src = f->last_frame_buffer;
444
445    if (f->version > 1) {
446        extra           = 20;
447        if (length < extra)
448            return AVERROR_INVALIDDATA;
449        bitstream_size  = AV_RL32(buf + 8);
450        wordstream_size = AV_RL32(buf + 12);
451        bytestream_size = AV_RL32(buf + 16);
452    } else {
453        extra           = 0;
454        bitstream_size  = AV_RL16(buf - 4);
455        wordstream_size = AV_RL16(buf - 2);
456        bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
457    }
458
459    if (bitstream_size > length || bitstream_size >= INT_MAX/8 ||
460        bytestream_size > length - bitstream_size ||
461        wordstream_size > length - bytestream_size - bitstream_size ||
462        extra > length - bytestream_size - bitstream_size - wordstream_size) {
463        av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
464        bitstream_size+ bytestream_size+ wordstream_size - length);
465        return AVERROR_INVALIDDATA;
466    }
467
468    av_fast_padded_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
469                          bitstream_size);
470    if (!f->bitstream_buffer)
471        return AVERROR(ENOMEM);
472    f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
473                       bitstream_size / 4);
474    init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
475
476    wordstream_offset = extra + bitstream_size;
477    bytestream_offset = extra + bitstream_size + wordstream_size;
478    bytestream2_init(&f->g2, buf + wordstream_offset,
479                     length - wordstream_offset);
480    bytestream2_init(&f->g, buf + bytestream_offset,
481                     length - bytestream_offset);
482
483    init_mv(f, width * 2);
484
485    for (y = 0; y < height; y += 8) {
486        for (x = 0; x < width; x += 8)
487            if ((ret = decode_p_block(f, dst + x, src + x, 3, 3, width)) < 0)
488                return ret;
489        src += 8 * width;
490        dst += 8 * width;
491    }
492
493    return 0;
494}
495
496/**
497 * decode block and dequantize.
498 * Note this is almost identical to MJPEG.
499 */
500static int decode_i_block(FourXContext *f, int16_t *block)
501{
502    int code, i, j, level, val;
503
504    if (get_bits_left(&f->pre_gb) < 2) {
505        av_log(f->avctx, AV_LOG_ERROR, "%d bits left before decode_i_block()\n", get_bits_left(&f->pre_gb));
506        return AVERROR_INVALIDDATA;
507    }
508
509    /* DC coef */
510    val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
511    if (val >> 4) {
512        av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
513        return AVERROR_INVALIDDATA;
514    }
515
516    if (val)
517        val = get_xbits(&f->gb, val);
518
519    val        = val * dequant_table[0] + f->last_dc;
520    f->last_dc = block[0] = val;
521    /* AC coefs */
522    i = 1;
523    for (;;) {
524        code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
525
526        /* EOB */
527        if (code == 0)
528            break;
529        if (code == 0xf0) {
530            i += 16;
531            if (i >= 64) {
532                av_log(f->avctx, AV_LOG_ERROR, "run %d overflow\n", i);
533                return 0;
534            }
535        } else {
536            if (code & 0xf) {
537                level = get_xbits(&f->gb, code & 0xf);
538            } else {
539                av_log(f->avctx, AV_LOG_ERROR, "0 coeff\n");
540                return AVERROR_INVALIDDATA;
541            }
542            i    += code >> 4;
543            if (i >= 64) {
544                av_log(f->avctx, AV_LOG_ERROR, "run %d overflow\n", i);
545                return 0;
546            }
547
548            j = ff_zigzag_direct[i];
549            block[j] = level * dequant_table[j];
550            i++;
551            if (i >= 64)
552                break;
553        }
554    }
555
556    return 0;
557}
558
559static inline void idct_put(FourXContext *f, int x, int y)
560{
561    int16_t (*block)[64] = f->block;
562    int stride           = f->avctx->width;
563    int i;
564    uint16_t *dst = f->frame_buffer + y * stride + x;
565
566    for (i = 0; i < 4; i++) {
567        block[i][0] += 0x80 * 8 * 8;
568        idct(block[i]);
569    }
570
571    if (!(f->avctx->flags & AV_CODEC_FLAG_GRAY)) {
572        for (i = 4; i < 6; i++)
573            idct(block[i]);
574    }
575
576    /* Note transform is:
577     * y  = ( 1b + 4g + 2r) / 14
578     * cb = ( 3b - 2g - 1r) / 14
579     * cr = (-1b - 4g + 5r) / 14 */
580    for (y = 0; y < 8; y++) {
581        for (x = 0; x < 8; x++) {
582            int16_t *temp = block[(x >> 2) + 2 * (y >> 2)] +
583                            2 * (x & 3) + 2 * 8 * (y & 3); // FIXME optimize
584            int cb = block[4][x + 8 * y];
585            int cr = block[5][x + 8 * y];
586            int cg = (cb + cr) >> 1;
587            int y;
588
589            cb += cb;
590
591            y               = temp[0];
592            dst[0]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
593            y               = temp[1];
594            dst[1]          = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
595            y               = temp[8];
596            dst[stride]     = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
597            y               = temp[9];
598            dst[1 + stride] = ((y + cb) >> 3) + (((y - cg) & 0xFC) << 3) + (((y + cr) & 0xF8) << 8);
599            dst            += 2;
600        }
601        dst += 2 * stride - 2 * 8;
602    }
603}
604
605static int decode_i_mb(FourXContext *f)
606{
607    int ret;
608    int i;
609
610    f->bdsp.clear_blocks(f->block[0]);
611
612    for (i = 0; i < 6; i++)
613        if ((ret = decode_i_block(f, f->block[i])) < 0)
614            return ret;
615
616    return 0;
617}
618
619static const uint8_t *read_huffman_tables(FourXContext *f,
620                                          const uint8_t * const buf,
621                                          int buf_size)
622{
623    int frequency[512] = { 0 };
624    uint8_t flag[512];
625    int up[512];
626    uint8_t len_tab[257];
627    int bits_tab[257];
628    int start, end;
629    const uint8_t *ptr = buf;
630    const uint8_t *ptr_end = buf + buf_size;
631    int j;
632
633    memset(up, -1, sizeof(up));
634
635    start = *ptr++;
636    end   = *ptr++;
637    for (;;) {
638        int i;
639
640        if (ptr_end - ptr < FFMAX(end - start + 1, 0) + 1) {
641            av_log(f->avctx, AV_LOG_ERROR, "invalid data in read_huffman_tables\n");
642            return NULL;
643        }
644
645        for (i = start; i <= end; i++)
646            frequency[i] = *ptr++;
647        start = *ptr++;
648        if (start == 0)
649            break;
650
651        end = *ptr++;
652    }
653    frequency[256] = 1;
654
655    while ((ptr - buf) & 3)
656        ptr++; // 4byte align
657
658    if (ptr > ptr_end) {
659        av_log(f->avctx, AV_LOG_ERROR, "ptr overflow in read_huffman_tables\n");
660        return NULL;
661    }
662
663    for (j = 257; j < 512; j++) {
664        int min_freq[2] = { 256 * 256, 256 * 256 };
665        int smallest[2] = { 0, 0 };
666        int i;
667        for (i = 0; i < j; i++) {
668            if (frequency[i] == 0)
669                continue;
670            if (frequency[i] < min_freq[1]) {
671                if (frequency[i] < min_freq[0]) {
672                    min_freq[1] = min_freq[0];
673                    smallest[1] = smallest[0];
674                    min_freq[0] = frequency[i];
675                    smallest[0] = i;
676                } else {
677                    min_freq[1] = frequency[i];
678                    smallest[1] = i;
679                }
680            }
681        }
682        if (min_freq[1] == 256 * 256)
683            break;
684
685        frequency[j]           = min_freq[0] + min_freq[1];
686        flag[smallest[0]]      = 0;
687        flag[smallest[1]]      = 1;
688        up[smallest[0]]        =
689        up[smallest[1]]        = j;
690        frequency[smallest[0]] = frequency[smallest[1]] = 0;
691    }
692
693    for (j = 0; j < 257; j++) {
694        int node, len = 0, bits = 0;
695
696        for (node = j; up[node] != -1; node = up[node]) {
697            bits += flag[node] << len;
698            len++;
699            if (len > 31)
700                // can this happen at all ?
701                av_log(f->avctx, AV_LOG_ERROR,
702                       "vlc length overflow\n");
703        }
704
705        bits_tab[j] = bits;
706        len_tab[j]  = len;
707    }
708
709    ff_free_vlc(&f->pre_vlc);
710    if (init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab, 1, 1,
711                 bits_tab, 4, 4, 0))
712        return NULL;
713
714    return ptr;
715}
716
717static int mix(int c0, int c1)
718{
719    int blue  =  2 * (c0 & 0x001F) + (c1 & 0x001F);
720    int green = (2 * (c0 & 0x03E0) + (c1 & 0x03E0)) >> 5;
721    int red   =  2 * (c0 >> 10)    + (c1 >> 10);
722    return red / 3 * 1024 + green / 3 * 32 + blue / 3;
723}
724
725static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
726{
727    int x, y, x2, y2;
728    const int width  = f->avctx->width;
729    const int height = f->avctx->height;
730    const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
731    uint16_t *dst    = f->frame_buffer;
732    const uint8_t *buf_end = buf + length;
733    GetByteContext g3;
734
735    if (length < mbs * 8) {
736        av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
737        return AVERROR_INVALIDDATA;
738    }
739    bytestream2_init(&g3, buf, length);
740
741    for (y = 0; y < height; y += 16) {
742        for (x = 0; x < width; x += 16) {
743            unsigned int color[4] = { 0 }, bits;
744            if (buf_end - buf < 8)
745                return AVERROR_INVALIDDATA;
746            // warning following is purely guessed ...
747            color[0] = bytestream2_get_le16u(&g3);
748            color[1] = bytestream2_get_le16u(&g3);
749
750            if (color[0] & 0x8000)
751                av_log(f->avctx, AV_LOG_ERROR, "unk bit 1\n");
752            if (color[1] & 0x8000)
753                av_log(f->avctx, AV_LOG_ERROR, "unk bit 2\n");
754
755            color[2] = mix(color[0], color[1]);
756            color[3] = mix(color[1], color[0]);
757
758            bits = bytestream2_get_le32u(&g3);
759            for (y2 = 0; y2 < 16; y2++) {
760                for (x2 = 0; x2 < 16; x2++) {
761                    int index = 2 * (x2 >> 2) + 8 * (y2 >> 2);
762                    dst[y2 * width + x2] = color[(bits >> index) & 3];
763                }
764            }
765            dst += 16;
766        }
767        dst += 16 * width - x;
768    }
769
770    return 0;
771}
772
773static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
774{
775    int x, y, ret;
776    const int width  = f->avctx->width;
777    const int height = f->avctx->height;
778    const unsigned int bitstream_size = AV_RL32(buf);
779    unsigned int prestream_size;
780    const uint8_t *prestream;
781
782    if (bitstream_size > (1 << 26))
783        return AVERROR_INVALIDDATA;
784
785    if (length < bitstream_size + 12) {
786        av_log(f->avctx, AV_LOG_ERROR, "packet size too small\n");
787        return AVERROR_INVALIDDATA;
788    }
789
790    prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
791    prestream      =             buf + bitstream_size + 12;
792
793    if (prestream_size + bitstream_size + 12 != length
794        || prestream_size > (1 << 26)) {
795        av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n",
796               prestream_size, bitstream_size, length);
797        return AVERROR_INVALIDDATA;
798    }
799
800    prestream = read_huffman_tables(f, prestream, prestream_size);
801    if (!prestream) {
802        av_log(f->avctx, AV_LOG_ERROR, "Error reading Huffman tables.\n");
803        return AVERROR_INVALIDDATA;
804    }
805
806    av_assert0(prestream <= buf + length);
807
808    init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
809
810    prestream_size = length + buf - prestream;
811
812    av_fast_padded_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
813                          prestream_size);
814    if (!f->bitstream_buffer)
815        return AVERROR(ENOMEM);
816    f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream,
817                       prestream_size / 4);
818    init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
819
820    f->last_dc = 0 * 128 * 8 * 8;
821
822    for (y = 0; y < height; y += 16) {
823        for (x = 0; x < width; x += 16) {
824            if ((ret = decode_i_mb(f)) < 0)
825                return ret;
826
827            idct_put(f, x, y);
828        }
829    }
830
831    if (get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
832        av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
833
834    return 0;
835}
836
837static int decode_frame(AVCodecContext *avctx, AVFrame *picture,
838                        int *got_frame, AVPacket *avpkt)
839{
840    const uint8_t *buf    = avpkt->data;
841    int buf_size          = avpkt->size;
842    FourXContext *const f = avctx->priv_data;
843    int i, frame_4cc, frame_size, ret;
844
845    if (buf_size < 20)
846        return AVERROR_INVALIDDATA;
847
848    av_assert0(avctx->width % 16 == 0 && avctx->height % 16 == 0);
849
850    if (buf_size < AV_RL32(buf + 4) + 8) {
851        av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %"PRIu32"\n",
852               buf_size, AV_RL32(buf + 4));
853        return AVERROR_INVALIDDATA;
854    }
855
856    frame_4cc = AV_RL32(buf);
857
858    if (frame_4cc == AV_RL32("cfrm")) {
859        int free_index       = -1;
860        int id, whole_size;
861        const int data_size  = buf_size - 20;
862        CFrameBuffer *cfrm;
863
864        if (f->version <= 1) {
865            av_log(f->avctx, AV_LOG_ERROR, "cfrm in version %d\n", f->version);
866            return AVERROR_INVALIDDATA;
867        }
868
869        id         = AV_RL32(buf + 12);
870        whole_size = AV_RL32(buf + 16);
871
872        if (data_size < 0 || whole_size < 0) {
873            av_log(f->avctx, AV_LOG_ERROR, "sizes invalid\n");
874            return AVERROR_INVALIDDATA;
875        }
876
877        for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
878            if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
879                av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
880                       f->cfrm[i].id);
881
882        for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
883            if (f->cfrm[i].id == id)
884                break;
885            if (f->cfrm[i].size == 0)
886                free_index = i;
887        }
888
889        if (i >= CFRAME_BUFFER_COUNT) {
890            if (free_index < 0)
891                return AVERROR_INVALIDDATA;
892            i             = free_index;
893            f->cfrm[i].id = id;
894        }
895        cfrm = &f->cfrm[i];
896
897        if (data_size > UINT_MAX -  cfrm->size - AV_INPUT_BUFFER_PADDING_SIZE)
898            return AVERROR_INVALIDDATA;
899
900        cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
901                                     cfrm->size + data_size + AV_INPUT_BUFFER_PADDING_SIZE);
902        // explicit check needed as memcpy below might not catch a NULL
903        if (!cfrm->data) {
904            av_log(f->avctx, AV_LOG_ERROR, "realloc failure\n");
905            return AVERROR(ENOMEM);
906        }
907
908        memcpy(cfrm->data + cfrm->size, buf + 20, data_size);
909        cfrm->size += data_size;
910
911        if (cfrm->size >= whole_size) {
912            buf        = cfrm->data;
913            frame_size = cfrm->size;
914
915            if (id != avctx->frame_number)
916                av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n",
917                       id, avctx->frame_number);
918
919            if (f->version <= 1)
920                return AVERROR_INVALIDDATA;
921
922            cfrm->size = cfrm->id = 0;
923            frame_4cc  = AV_RL32("pfrm");
924        } else
925            return buf_size;
926    } else {
927        buf        = buf      + 12;
928        frame_size = buf_size - 12;
929    }
930
931    if ((ret = ff_get_buffer(avctx, picture, 0)) < 0)
932        return ret;
933
934    if (frame_4cc == AV_RL32("ifr2")) {
935        picture->pict_type = AV_PICTURE_TYPE_I;
936        if ((ret = decode_i2_frame(f, buf - 4, frame_size + 4)) < 0) {
937            av_log(f->avctx, AV_LOG_ERROR, "decode i2 frame failed\n");
938            return ret;
939        }
940    } else if (frame_4cc == AV_RL32("ifrm")) {
941        picture->pict_type = AV_PICTURE_TYPE_I;
942        if ((ret = decode_i_frame(f, buf, frame_size)) < 0) {
943            av_log(f->avctx, AV_LOG_ERROR, "decode i frame failed\n");
944            return ret;
945        }
946    } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
947        picture->pict_type = AV_PICTURE_TYPE_P;
948        if ((ret = decode_p_frame(f, buf, frame_size)) < 0) {
949            av_log(f->avctx, AV_LOG_ERROR, "decode p frame failed\n");
950            return ret;
951        }
952    } else if (frame_4cc == AV_RL32("snd_")) {
953        av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
954               buf_size);
955    } else {
956        av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n",
957               buf_size);
958    }
959
960    picture->key_frame = picture->pict_type == AV_PICTURE_TYPE_I;
961
962    av_image_copy_plane(picture->data[0], picture->linesize[0],
963                        (const uint8_t*)f->frame_buffer,  avctx->width * 2,
964                        avctx->width * 2, avctx->height);
965    FFSWAP(uint16_t *, f->frame_buffer, f->last_frame_buffer);
966
967    *got_frame = 1;
968
969    emms_c();
970
971    return buf_size;
972}
973
974static av_cold int decode_end(AVCodecContext *avctx)
975{
976    FourXContext * const f = avctx->priv_data;
977    int i;
978
979    av_freep(&f->frame_buffer);
980    av_freep(&f->last_frame_buffer);
981    av_freep(&f->bitstream_buffer);
982    f->bitstream_buffer_size = 0;
983    for (i = 0; i < CFRAME_BUFFER_COUNT; i++) {
984        av_freep(&f->cfrm[i].data);
985        f->cfrm[i].allocated_size = 0;
986    }
987    ff_free_vlc(&f->pre_vlc);
988
989    return 0;
990}
991
992static av_cold int decode_init(AVCodecContext *avctx)
993{
994    static AVOnce init_static_once = AV_ONCE_INIT;
995    FourXContext * const f = avctx->priv_data;
996    int ret;
997
998    if (avctx->extradata_size != 4 || !avctx->extradata) {
999        av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
1000        return AVERROR_INVALIDDATA;
1001    }
1002    if((avctx->width % 16) || (avctx->height % 16)) {
1003        av_log(avctx, AV_LOG_ERROR, "unsupported width/height\n");
1004        return AVERROR_INVALIDDATA;
1005    }
1006
1007    ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
1008    if (ret < 0)
1009        return ret;
1010
1011    f->frame_buffer      = av_mallocz(avctx->width * avctx->height * 2);
1012    f->last_frame_buffer = av_mallocz(avctx->width * avctx->height * 2);
1013    if (!f->frame_buffer || !f->last_frame_buffer)
1014        return AVERROR(ENOMEM);
1015
1016    f->version = AV_RL32(avctx->extradata) >> 16;
1017    ff_blockdsp_init(&f->bdsp, avctx);
1018    ff_bswapdsp_init(&f->bbdsp);
1019    f->avctx = avctx;
1020
1021    if (f->version > 2)
1022        avctx->pix_fmt = AV_PIX_FMT_RGB565;
1023    else
1024        avctx->pix_fmt = AV_PIX_FMT_BGR555;
1025
1026    ff_thread_once(&init_static_once, init_vlcs);
1027
1028    return 0;
1029}
1030
1031const FFCodec ff_fourxm_decoder = {
1032    .p.name         = "4xm",
1033    .p.long_name    = NULL_IF_CONFIG_SMALL("4X Movie"),
1034    .p.type         = AVMEDIA_TYPE_VIDEO,
1035    .p.id           = AV_CODEC_ID_4XM,
1036    .priv_data_size = sizeof(FourXContext),
1037    .init           = decode_init,
1038    .close          = decode_end,
1039    FF_CODEC_DECODE_CB(decode_frame),
1040    .p.capabilities = AV_CODEC_CAP_DR1,
1041    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1042};
1043