xref: /third_party/ffmpeg/libavcodec/svq3.c (revision cabdff1a)
1/*
2 * Copyright (c) 2003 The FFmpeg Project
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21/*
22 * How to use this decoder:
23 * SVQ3 data is transported within Apple Quicktime files. Quicktime files
24 * have stsd atoms to describe media trak properties. A stsd atom for a
25 * video trak contains 1 or more ImageDescription atoms. These atoms begin
26 * with the 4-byte length of the atom followed by the codec fourcc. Some
27 * decoders need information in this atom to operate correctly. Such
28 * is the case with SVQ3. In order to get the best use out of this decoder,
29 * the calling app must make the SVQ3 ImageDescription atom available
30 * via the AVCodecContext's extradata[_size] field:
31 *
32 * AVCodecContext.extradata = pointer to ImageDescription, first characters
33 * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
34 * AVCodecContext.extradata_size = size of ImageDescription atom memory
35 * buffer (which will be the same as the ImageDescription atom size field
36 * from the QT file, minus 4 bytes since the length is missing)
37 *
38 * You will know you have these parameters passed correctly when the decoder
39 * correctly decodes this file:
40 *  http://samples.mplayerhq.hu/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
41 */
42
43#include <inttypes.h>
44
45#include "libavutil/attributes.h"
46#include "libavutil/crc.h"
47#include "libavutil/mem_internal.h"
48
49#include "codec_internal.h"
50#include "internal.h"
51#include "avcodec.h"
52#include "mpegutils.h"
53#include "h264data.h"
54#include "h264dsp.h"
55#include "h264pred.h"
56#include "h264_parse.h"
57#include "golomb.h"
58#include "hpeldsp.h"
59#include "mathops.h"
60#include "rectangle.h"
61#include "tpeldsp.h"
62#include "videodsp.h"
63
64#if CONFIG_ZLIB
65#include <zlib.h>
66#endif
67
68#include "svq1.h"
69
70/**
71 * @file
72 * svq3 decoder.
73 */
74
75typedef struct SVQ3Frame {
76    AVFrame *f;
77
78    int16_t (*motion_val_buf[2])[2];
79    int16_t (*motion_val[2])[2];
80
81    uint32_t *mb_type_buf, *mb_type;
82} SVQ3Frame;
83
84typedef struct SVQ3Context {
85    AVCodecContext *avctx;
86
87    H264DSPContext  h264dsp;
88    H264PredContext hpc;
89    HpelDSPContext hdsp;
90    TpelDSPContext tdsp;
91    VideoDSPContext vdsp;
92
93    SVQ3Frame *cur_pic;
94    SVQ3Frame *next_pic;
95    SVQ3Frame *last_pic;
96    GetBitContext gb;
97    GetBitContext gb_slice;
98    uint8_t *slice_buf;
99    unsigned slice_buf_size;
100    int halfpel_flag;
101    int thirdpel_flag;
102    int has_watermark;
103    uint32_t watermark_key;
104    int adaptive_quant;
105    int h_edge_pos;
106    int v_edge_pos;
107    int last_frame_output;
108    int slice_num;
109    int qscale;
110    int cbp;
111    int frame_num;
112    int frame_num_offset;
113    int prev_frame_num_offset;
114    int prev_frame_num;
115
116    enum AVPictureType pict_type;
117    enum AVPictureType slice_type;
118    int low_delay;
119
120    int mb_x, mb_y;
121    int mb_xy;
122    int mb_width, mb_height;
123    int mb_stride, mb_num;
124    int b_stride;
125
126    uint32_t *mb2br_xy;
127
128    int chroma_pred_mode;
129    int intra16x16_pred_mode;
130
131    int8_t   intra4x4_pred_mode_cache[5 * 8];
132    int8_t (*intra4x4_pred_mode);
133
134    unsigned int top_samples_available;
135    unsigned int left_samples_available;
136
137    uint8_t *edge_emu_buffer;
138
139    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
140    DECLARE_ALIGNED(8,  int8_t, ref_cache)[2][5 * 8];
141    DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
142    DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
143    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
144    uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
145    int block_offset[2 * (16 * 3)];
146    SVQ3Frame frames[3];
147} SVQ3Context;
148
149#define FULLPEL_MODE  1
150#define HALFPEL_MODE  2
151#define THIRDPEL_MODE 3
152#define PREDICT_MODE  4
153
154/* dual scan (from some older H.264 draft)
155 * o-->o-->o   o
156 *         |  /|
157 * o   o   o / o
158 * | / |   |/  |
159 * o   o   o   o
160 *   /
161 * o-->o-->o-->o
162 */
163static const uint8_t svq3_scan[16] = {
164    0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
165    2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
166    0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
167    0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
168};
169
170static const uint8_t luma_dc_zigzag_scan[16] = {
171    0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
172    3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
173    1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
174    3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
175};
176
177static const uint8_t svq3_pred_0[25][2] = {
178    { 0, 0 },
179    { 1, 0 }, { 0, 1 },
180    { 0, 2 }, { 1, 1 }, { 2, 0 },
181    { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
182    { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
183    { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
184    { 2, 4 }, { 3, 3 }, { 4, 2 },
185    { 4, 3 }, { 3, 4 },
186    { 4, 4 }
187};
188
189static const int8_t svq3_pred_1[6][6][5] = {
190    { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
191      { 2,  1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
192    { { 0,  2, -1, -1, -1 }, { 0, 2,  1,  4,  3 }, { 0, 1,  2,  4,  3 },
193      { 0,  2,  1,  4,  3 }, { 2, 0,  1,  3,  4 }, { 0, 4,  2,  1,  3 } },
194    { { 2,  0, -1, -1, -1 }, { 2, 1,  0,  4,  3 }, { 1, 2,  4,  0,  3 },
195      { 2,  1,  0,  4,  3 }, { 2, 1,  4,  3,  0 }, { 1, 2,  4,  0,  3 } },
196    { { 2,  0, -1, -1, -1 }, { 2, 0,  1,  4,  3 }, { 1, 2,  0,  4,  3 },
197      { 2,  1,  0,  4,  3 }, { 2, 1,  3,  4,  0 }, { 2, 4,  1,  0,  3 } },
198    { { 0,  2, -1, -1, -1 }, { 0, 2,  1,  3,  4 }, { 1, 2,  3,  0,  4 },
199      { 2,  0,  1,  3,  4 }, { 2, 1,  3,  0,  4 }, { 2, 0,  4,  3,  1 } },
200    { { 0,  2, -1, -1, -1 }, { 0, 2,  4,  1,  3 }, { 1, 4,  2,  0,  3 },
201      { 4,  2,  0,  1,  3 }, { 2, 0,  1,  4,  3 }, { 4, 2,  1,  0,  3 } },
202};
203
204static const struct {
205    uint8_t run;
206    uint8_t level;
207} svq3_dct_tables[2][16] = {
208    { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
209      { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
210    { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
211      { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
212};
213
214static const uint32_t svq3_dequant_coeff[32] = {
215     3881,  4351,  4890,  5481,   6154,   6914,   7761,   8718,
216     9781, 10987, 12339, 13828,  15523,  17435,  19561,  21873,
217    24552, 27656, 30847, 34870,  38807,  43747,  49103,  54683,
218    61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
219};
220
221static void svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
222{
223    const unsigned qmul = svq3_dequant_coeff[qp];
224#define stride 16
225    int i;
226    int temp[16];
227    static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
228
229    for (i = 0; i < 4; i++) {
230        const int z0 = 13 * (input[4 * i + 0] +      input[4 * i + 2]);
231        const int z1 = 13 * (input[4 * i + 0] -      input[4 * i + 2]);
232        const int z2 =  7 *  input[4 * i + 1] - 17 * input[4 * i + 3];
233        const int z3 = 17 *  input[4 * i + 1] +  7 * input[4 * i + 3];
234
235        temp[4 * i + 0] = z0 + z3;
236        temp[4 * i + 1] = z1 + z2;
237        temp[4 * i + 2] = z1 - z2;
238        temp[4 * i + 3] = z0 - z3;
239    }
240
241    for (i = 0; i < 4; i++) {
242        const int offset = x_offset[i];
243        const int z0     = 13 * (temp[4 * 0 + i] +      temp[4 * 2 + i]);
244        const int z1     = 13 * (temp[4 * 0 + i] -      temp[4 * 2 + i]);
245        const int z2     =  7 *  temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
246        const int z3     = 17 *  temp[4 * 1 + i] +  7 * temp[4 * 3 + i];
247
248        output[stride *  0 + offset] = (int)((z0 + z3) * qmul + 0x80000) >> 20;
249        output[stride *  2 + offset] = (int)((z1 + z2) * qmul + 0x80000) >> 20;
250        output[stride *  8 + offset] = (int)((z1 - z2) * qmul + 0x80000) >> 20;
251        output[stride * 10 + offset] = (int)((z0 - z3) * qmul + 0x80000) >> 20;
252    }
253}
254#undef stride
255
256static void svq3_add_idct_c(uint8_t *dst, int16_t *block,
257                            int stride, int qp, int dc)
258{
259    const int qmul = svq3_dequant_coeff[qp];
260    int i;
261
262    if (dc) {
263        dc       = 13 * 13 * (dc == 1 ? 1538U* block[0]
264                                      : qmul * (block[0] >> 3) / 2);
265        block[0] = 0;
266    }
267
268    for (i = 0; i < 4; i++) {
269        const int z0 = 13 * (block[0 + 4 * i] +      block[2 + 4 * i]);
270        const int z1 = 13 * (block[0 + 4 * i] -      block[2 + 4 * i]);
271        const int z2 =  7 *  block[1 + 4 * i] - 17 * block[3 + 4 * i];
272        const int z3 = 17 *  block[1 + 4 * i] +  7 * block[3 + 4 * i];
273
274        block[0 + 4 * i] = z0 + z3;
275        block[1 + 4 * i] = z1 + z2;
276        block[2 + 4 * i] = z1 - z2;
277        block[3 + 4 * i] = z0 - z3;
278    }
279
280    for (i = 0; i < 4; i++) {
281        const unsigned z0 = 13 * (block[i + 4 * 0] +      block[i + 4 * 2]);
282        const unsigned z1 = 13 * (block[i + 4 * 0] -      block[i + 4 * 2]);
283        const unsigned z2 =  7 *  block[i + 4 * 1] - 17 * block[i + 4 * 3];
284        const unsigned z3 = 17 *  block[i + 4 * 1] +  7 * block[i + 4 * 3];
285        const int rr = (dc + 0x80000u);
286
287        dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((int)((z0 + z3) * qmul + rr) >> 20));
288        dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((int)((z1 + z2) * qmul + rr) >> 20));
289        dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((int)((z1 - z2) * qmul + rr) >> 20));
290        dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((int)((z0 - z3) * qmul + rr) >> 20));
291    }
292
293    memset(block, 0, 16 * sizeof(int16_t));
294}
295
296static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
297                                    int index, const int type)
298{
299    static const uint8_t *const scan_patterns[4] = {
300        luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
301    };
302
303    int run, level, sign, limit;
304    unsigned vlc;
305    const int intra           = 3 * type >> 2;
306    const uint8_t *const scan = scan_patterns[type];
307
308    for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
309        for (; (vlc = get_interleaved_ue_golomb(gb)) != 0; index++) {
310            if ((int32_t)vlc < 0)
311                return -1;
312
313            sign     = (vlc & 1) ? 0 : -1;
314            vlc      = vlc + 1 >> 1;
315
316            if (type == 3) {
317                if (vlc < 3) {
318                    run   = 0;
319                    level = vlc;
320                } else if (vlc < 4) {
321                    run   = 1;
322                    level = 1;
323                } else {
324                    run   = vlc & 0x3;
325                    level = (vlc + 9 >> 2) - run;
326                }
327            } else {
328                if (vlc < 16U) {
329                    run   = svq3_dct_tables[intra][vlc].run;
330                    level = svq3_dct_tables[intra][vlc].level;
331                } else if (intra) {
332                    run   = vlc & 0x7;
333                    level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
334                } else {
335                    run   = vlc & 0xF;
336                    level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
337                }
338            }
339
340
341            if ((index += run) >= limit)
342                return -1;
343
344            block[scan[index]] = (level ^ sign) - sign;
345        }
346
347        if (type != 2) {
348            break;
349        }
350    }
351
352    return 0;
353}
354
355static av_always_inline int
356svq3_fetch_diagonal_mv(const SVQ3Context *s, const int16_t **C,
357                       int i, int list, int part_width)
358{
359    const int topright_ref = s->ref_cache[list][i - 8 + part_width];
360
361    if (topright_ref != PART_NOT_AVAILABLE) {
362        *C = s->mv_cache[list][i - 8 + part_width];
363        return topright_ref;
364    } else {
365        *C = s->mv_cache[list][i - 8 - 1];
366        return s->ref_cache[list][i - 8 - 1];
367    }
368}
369
370/**
371 * Get the predicted MV.
372 * @param n the block index
373 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
374 * @param mx the x component of the predicted motion vector
375 * @param my the y component of the predicted motion vector
376 */
377static av_always_inline void svq3_pred_motion(const SVQ3Context *s, int n,
378                                              int part_width, int list,
379                                              int ref, int *const mx, int *const my)
380{
381    const int index8       = scan8[n];
382    const int top_ref      = s->ref_cache[list][index8 - 8];
383    const int left_ref     = s->ref_cache[list][index8 - 1];
384    const int16_t *const A = s->mv_cache[list][index8 - 1];
385    const int16_t *const B = s->mv_cache[list][index8 - 8];
386    const int16_t *C;
387    int diagonal_ref, match_count;
388
389/* mv_cache
390 * B . . A T T T T
391 * U . . L . . , .
392 * U . . L . . . .
393 * U . . L . . , .
394 * . . . L . . . .
395 */
396
397    diagonal_ref = svq3_fetch_diagonal_mv(s, &C, index8, list, part_width);
398    match_count  = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
399    if (match_count > 1) { //most common
400        *mx = mid_pred(A[0], B[0], C[0]);
401        *my = mid_pred(A[1], B[1], C[1]);
402    } else if (match_count == 1) {
403        if (left_ref == ref) {
404            *mx = A[0];
405            *my = A[1];
406        } else if (top_ref == ref) {
407            *mx = B[0];
408            *my = B[1];
409        } else {
410            *mx = C[0];
411            *my = C[1];
412        }
413    } else {
414        if (top_ref      == PART_NOT_AVAILABLE &&
415            diagonal_ref == PART_NOT_AVAILABLE &&
416            left_ref     != PART_NOT_AVAILABLE) {
417            *mx = A[0];
418            *my = A[1];
419        } else {
420            *mx = mid_pred(A[0], B[0], C[0]);
421            *my = mid_pred(A[1], B[1], C[1]);
422        }
423    }
424}
425
426static inline void svq3_mc_dir_part(SVQ3Context *s,
427                                    int x, int y, int width, int height,
428                                    int mx, int my, int dxy,
429                                    int thirdpel, int dir, int avg)
430{
431    const SVQ3Frame *pic = (dir == 0) ? s->last_pic : s->next_pic;
432    uint8_t *src, *dest;
433    int i, emu = 0;
434    int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
435    int linesize   = s->cur_pic->f->linesize[0];
436    int uvlinesize = s->cur_pic->f->linesize[1];
437
438    mx += x;
439    my += y;
440
441    if (mx < 0 || mx >= s->h_edge_pos - width  - 1 ||
442        my < 0 || my >= s->v_edge_pos - height - 1) {
443        emu = 1;
444        mx = av_clip(mx, -16, s->h_edge_pos - width  + 15);
445        my = av_clip(my, -16, s->v_edge_pos - height + 15);
446    }
447
448    /* form component predictions */
449    dest = s->cur_pic->f->data[0] + x + y * linesize;
450    src  = pic->f->data[0] + mx + my * linesize;
451
452    if (emu) {
453        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
454                                 linesize, linesize,
455                                 width + 1, height + 1,
456                                 mx, my, s->h_edge_pos, s->v_edge_pos);
457        src = s->edge_emu_buffer;
458    }
459    if (thirdpel)
460        (avg ? s->tdsp.avg_tpel_pixels_tab
461             : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
462                                                 width, height);
463    else
464        (avg ? s->hdsp.avg_pixels_tab
465             : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
466                                                       height);
467
468    if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
469        mx     = mx + (mx < (int) x) >> 1;
470        my     = my + (my < (int) y) >> 1;
471        width  = width  >> 1;
472        height = height >> 1;
473        blocksize++;
474
475        for (i = 1; i < 3; i++) {
476            dest = s->cur_pic->f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
477            src  = pic->f->data[i] + mx + my * uvlinesize;
478
479            if (emu) {
480                s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
481                                         uvlinesize, uvlinesize,
482                                         width + 1, height + 1,
483                                         mx, my, (s->h_edge_pos >> 1),
484                                         s->v_edge_pos >> 1);
485                src = s->edge_emu_buffer;
486            }
487            if (thirdpel)
488                (avg ? s->tdsp.avg_tpel_pixels_tab
489                     : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
490                                                         uvlinesize,
491                                                         width, height);
492            else
493                (avg ? s->hdsp.avg_pixels_tab
494                     : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
495                                                               uvlinesize,
496                                                               height);
497        }
498    }
499}
500
501static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
502                              int dir, int avg)
503{
504    int i, j, k, mx, my, dx, dy, x, y;
505    const int part_width    = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
506    const int part_height   = 16 >> ((unsigned)(size + 1) / 3);
507    const int extra_width   = (mode == PREDICT_MODE) ? -16 * 6 : 0;
508    const int h_edge_pos    = 6 * (s->h_edge_pos - part_width)  - extra_width;
509    const int v_edge_pos    = 6 * (s->v_edge_pos - part_height) - extra_width;
510
511    for (i = 0; i < 16; i += part_height)
512        for (j = 0; j < 16; j += part_width) {
513            const int b_xy = (4 * s->mb_x + (j >> 2)) +
514                             (4 * s->mb_y + (i >> 2)) * s->b_stride;
515            int dxy;
516            x = 16 * s->mb_x + j;
517            y = 16 * s->mb_y + i;
518            k = (j >> 2 & 1) + (i >> 1 & 2) +
519                (j >> 1 & 4) + (i      & 8);
520
521            if (mode != PREDICT_MODE) {
522                svq3_pred_motion(s, k, part_width >> 2, dir, 1, &mx, &my);
523            } else {
524                mx = s->next_pic->motion_val[0][b_xy][0] * 2;
525                my = s->next_pic->motion_val[0][b_xy][1] * 2;
526
527                if (dir == 0) {
528                    mx = mx * s->frame_num_offset /
529                         s->prev_frame_num_offset + 1 >> 1;
530                    my = my * s->frame_num_offset /
531                         s->prev_frame_num_offset + 1 >> 1;
532                } else {
533                    mx = mx * (s->frame_num_offset - s->prev_frame_num_offset) /
534                         s->prev_frame_num_offset + 1 >> 1;
535                    my = my * (s->frame_num_offset - s->prev_frame_num_offset) /
536                         s->prev_frame_num_offset + 1 >> 1;
537                }
538            }
539
540            /* clip motion vector prediction to frame border */
541            mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
542            my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
543
544            /* get (optional) motion vector differential */
545            if (mode == PREDICT_MODE) {
546                dx = dy = 0;
547            } else {
548                dy = get_interleaved_se_golomb(&s->gb_slice);
549                dx = get_interleaved_se_golomb(&s->gb_slice);
550
551                if (dx != (int16_t)dx || dy != (int16_t)dy) {
552                    av_log(s->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
553                    return -1;
554                }
555            }
556
557            /* compute motion vector */
558            if (mode == THIRDPEL_MODE) {
559                int fx, fy;
560                mx  = (mx + 1 >> 1) + dx;
561                my  = (my + 1 >> 1) + dy;
562                fx  = (unsigned)(mx + 0x30000) / 3 - 0x10000;
563                fy  = (unsigned)(my + 0x30000) / 3 - 0x10000;
564                dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
565
566                svq3_mc_dir_part(s, x, y, part_width, part_height,
567                                 fx, fy, dxy, 1, dir, avg);
568                mx += mx;
569                my += my;
570            } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
571                mx  = (unsigned)(mx + 1 + 0x30000) / 3 + dx - 0x10000;
572                my  = (unsigned)(my + 1 + 0x30000) / 3 + dy - 0x10000;
573                dxy = (mx & 1) + 2 * (my & 1);
574
575                svq3_mc_dir_part(s, x, y, part_width, part_height,
576                                 mx >> 1, my >> 1, dxy, 0, dir, avg);
577                mx *= 3;
578                my *= 3;
579            } else {
580                mx = (unsigned)(mx + 3 + 0x60000) / 6 + dx - 0x10000;
581                my = (unsigned)(my + 3 + 0x60000) / 6 + dy - 0x10000;
582
583                svq3_mc_dir_part(s, x, y, part_width, part_height,
584                                 mx, my, 0, 0, dir, avg);
585                mx *= 6;
586                my *= 6;
587            }
588
589            /* update mv_cache */
590            if (mode != PREDICT_MODE) {
591                int32_t mv = pack16to32(mx, my);
592
593                if (part_height == 8 && i < 8) {
594                    AV_WN32A(s->mv_cache[dir][scan8[k] + 1 * 8], mv);
595
596                    if (part_width == 8 && j < 8)
597                        AV_WN32A(s->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
598                }
599                if (part_width == 8 && j < 8)
600                    AV_WN32A(s->mv_cache[dir][scan8[k] + 1], mv);
601                if (part_width == 4 || part_height == 4)
602                    AV_WN32A(s->mv_cache[dir][scan8[k]], mv);
603            }
604
605            /* write back motion vectors */
606            fill_rectangle(s->cur_pic->motion_val[dir][b_xy],
607                           part_width >> 2, part_height >> 2, s->b_stride,
608                           pack16to32(mx, my), 4);
609        }
610
611    return 0;
612}
613
614static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
615                                                    int mb_type, const int *block_offset,
616                                                    int linesize, uint8_t *dest_y)
617{
618    int i;
619    if (!IS_INTRA4x4(mb_type)) {
620        for (i = 0; i < 16; i++)
621            if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
622                uint8_t *const ptr = dest_y + block_offset[i];
623                svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
624                                s->qscale, IS_INTRA(mb_type) ? 1 : 0);
625            }
626    }
627}
628
629static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
630                                                       int mb_type,
631                                                       const int *block_offset,
632                                                       int linesize,
633                                                       uint8_t *dest_y)
634{
635    int i;
636    int qscale = s->qscale;
637
638    if (IS_INTRA4x4(mb_type)) {
639        for (i = 0; i < 16; i++) {
640            uint8_t *const ptr = dest_y + block_offset[i];
641            const int dir      = s->intra4x4_pred_mode_cache[scan8[i]];
642
643            uint8_t *topright;
644            int nnz;
645            if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
646                av_assert2(s->mb_y || linesize <= block_offset[i]);
647                topright = ptr + 4 - linesize;
648            } else
649                topright = NULL;
650
651            s->hpc.pred4x4[dir](ptr, topright, linesize);
652            nnz = s->non_zero_count_cache[scan8[i]];
653            if (nnz) {
654                svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
655            }
656        }
657    } else {
658        s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
659        svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
660    }
661}
662
663static void hl_decode_mb(SVQ3Context *s)
664{
665    const int mb_x    = s->mb_x;
666    const int mb_y    = s->mb_y;
667    const int mb_xy   = s->mb_xy;
668    const int mb_type = s->cur_pic->mb_type[mb_xy];
669    uint8_t *dest_y, *dest_cb, *dest_cr;
670    int linesize, uvlinesize;
671    int i, j;
672    const int *block_offset = &s->block_offset[0];
673    const int block_h   = 16 >> 1;
674
675    linesize   = s->cur_pic->f->linesize[0];
676    uvlinesize = s->cur_pic->f->linesize[1];
677
678    dest_y  = s->cur_pic->f->data[0] + (mb_x     + mb_y * linesize)  * 16;
679    dest_cb = s->cur_pic->f->data[1] +  mb_x * 8 + mb_y * uvlinesize * block_h;
680    dest_cr = s->cur_pic->f->data[2] +  mb_x * 8 + mb_y * uvlinesize * block_h;
681
682    s->vdsp.prefetch(dest_y  + (s->mb_x & 3) * 4 * linesize   + 64, linesize,      4);
683    s->vdsp.prefetch(dest_cb + (s->mb_x & 7)     * uvlinesize + 64, dest_cr - dest_cb, 2);
684
685    if (IS_INTRA(mb_type)) {
686        s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
687        s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
688
689        hl_decode_mb_predict_luma(s, mb_type, block_offset, linesize, dest_y);
690    }
691
692    hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
693
694    if (s->cbp & 0x30) {
695        uint8_t *dest[2] = { dest_cb, dest_cr };
696        s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
697                                               s->dequant4_coeff[4][0]);
698        s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
699                                               s->dequant4_coeff[4][0]);
700        for (j = 1; j < 3; j++) {
701            for (i = j * 16; i < j * 16 + 4; i++)
702                if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
703                    uint8_t *const ptr = dest[j - 1] + block_offset[i];
704                    svq3_add_idct_c(ptr, s->mb + i * 16,
705                                    uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
706                }
707        }
708    }
709}
710
711static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
712{
713    int i, j, k, m, dir, mode;
714    int cbp = 0;
715    uint32_t vlc;
716    int8_t *top, *left;
717    const int mb_xy = s->mb_xy;
718    const int b_xy  = 4 * s->mb_x + 4 * s->mb_y * s->b_stride;
719
720    s->top_samples_available      = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
721    s->left_samples_available     = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
722
723    if (mb_type == 0) {           /* SKIP */
724        if (s->pict_type == AV_PICTURE_TYPE_P ||
725            s->next_pic->mb_type[mb_xy] == -1) {
726            svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
727                             0, 0, 0, 0, 0, 0);
728
729            if (s->pict_type == AV_PICTURE_TYPE_B)
730                svq3_mc_dir_part(s, 16 * s->mb_x, 16 * s->mb_y, 16, 16,
731                                 0, 0, 0, 0, 1, 1);
732
733            mb_type = MB_TYPE_SKIP;
734        } else {
735            mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
736            if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
737                return -1;
738            if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
739                return -1;
740
741            mb_type = MB_TYPE_16x16;
742        }
743    } else if (mb_type < 8) {     /* INTER */
744        if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&s->gb_slice))
745            mode = THIRDPEL_MODE;
746        else if (s->halfpel_flag &&
747                 s->thirdpel_flag == !get_bits1(&s->gb_slice))
748            mode = HALFPEL_MODE;
749        else
750            mode = FULLPEL_MODE;
751
752        /* fill caches */
753        /* note ref_cache should contain here:
754         *  ????????
755         *  ???11111
756         *  N??11111
757         *  N??11111
758         *  N??11111
759         */
760
761        for (m = 0; m < 2; m++) {
762            if (s->mb_x > 0 && s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6] != -1) {
763                for (i = 0; i < 4; i++)
764                    AV_COPY32(s->mv_cache[m][scan8[0] - 1 + i * 8],
765                              s->cur_pic->motion_val[m][b_xy - 1 + i * s->b_stride]);
766            } else {
767                for (i = 0; i < 4; i++)
768                    AV_ZERO32(s->mv_cache[m][scan8[0] - 1 + i * 8]);
769            }
770            if (s->mb_y > 0) {
771                memcpy(s->mv_cache[m][scan8[0] - 1 * 8],
772                       s->cur_pic->motion_val[m][b_xy - s->b_stride],
773                       4 * 2 * sizeof(int16_t));
774                memset(&s->ref_cache[m][scan8[0] - 1 * 8],
775                       (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
776
777                if (s->mb_x < s->mb_width - 1) {
778                    AV_COPY32(s->mv_cache[m][scan8[0] + 4 - 1 * 8],
779                              s->cur_pic->motion_val[m][b_xy - s->b_stride + 4]);
780                    s->ref_cache[m][scan8[0] + 4 - 1 * 8] =
781                        (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride + 1] + 6] == -1 ||
782                         s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
783                } else
784                    s->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
785                if (s->mb_x > 0) {
786                    AV_COPY32(s->mv_cache[m][scan8[0] - 1 - 1 * 8],
787                              s->cur_pic->motion_val[m][b_xy - s->b_stride - 1]);
788                    s->ref_cache[m][scan8[0] - 1 - 1 * 8] =
789                        (s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
790                } else
791                    s->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
792            } else
793                memset(&s->ref_cache[m][scan8[0] - 1 * 8 - 1],
794                       PART_NOT_AVAILABLE, 8);
795
796            if (s->pict_type != AV_PICTURE_TYPE_B)
797                break;
798        }
799
800        /* decode motion vector(s) and form prediction(s) */
801        if (s->pict_type == AV_PICTURE_TYPE_P) {
802            if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
803                return -1;
804        } else {        /* AV_PICTURE_TYPE_B */
805            if (mb_type != 2) {
806                if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
807                    return -1;
808            } else {
809                for (i = 0; i < 4; i++)
810                    memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
811                           0, 4 * 2 * sizeof(int16_t));
812            }
813            if (mb_type != 1) {
814                if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
815                    return -1;
816            } else {
817                for (i = 0; i < 4; i++)
818                    memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
819                           0, 4 * 2 * sizeof(int16_t));
820            }
821        }
822
823        mb_type = MB_TYPE_16x16;
824    } else if (mb_type == 8 || mb_type == 33) {   /* INTRA4x4 */
825        int8_t *i4x4       = s->intra4x4_pred_mode + s->mb2br_xy[s->mb_xy];
826        int8_t *i4x4_cache = s->intra4x4_pred_mode_cache;
827
828        memset(s->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
829
830        if (mb_type == 8) {
831            if (s->mb_x > 0) {
832                for (i = 0; i < 4; i++)
833                    s->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - 1] + 6 - i];
834                if (s->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
835                    s->left_samples_available = 0x5F5F;
836            }
837            if (s->mb_y > 0) {
838                s->intra4x4_pred_mode_cache[4 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 0];
839                s->intra4x4_pred_mode_cache[5 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 1];
840                s->intra4x4_pred_mode_cache[6 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 2];
841                s->intra4x4_pred_mode_cache[7 + 8 * 0] = s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride] + 3];
842
843                if (s->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
844                    s->top_samples_available = 0x33FF;
845            }
846
847            /* decode prediction codes for luma blocks */
848            for (i = 0; i < 16; i += 2) {
849                vlc = get_interleaved_ue_golomb(&s->gb_slice);
850
851                if (vlc >= 25U) {
852                    av_log(s->avctx, AV_LOG_ERROR,
853                           "luma prediction:%"PRIu32"\n", vlc);
854                    return -1;
855                }
856
857                left = &s->intra4x4_pred_mode_cache[scan8[i] - 1];
858                top  = &s->intra4x4_pred_mode_cache[scan8[i] - 8];
859
860                left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
861                left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
862
863                if (left[1] == -1 || left[2] == -1) {
864                    av_log(s->avctx, AV_LOG_ERROR, "weird prediction\n");
865                    return -1;
866                }
867            }
868        } else {    /* mb_type == 33, DC_128_PRED block type */
869            for (i = 0; i < 4; i++)
870                memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
871        }
872
873        AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
874        i4x4[4] = i4x4_cache[7 + 8 * 3];
875        i4x4[5] = i4x4_cache[7 + 8 * 2];
876        i4x4[6] = i4x4_cache[7 + 8 * 1];
877
878        if (mb_type == 8) {
879            ff_h264_check_intra4x4_pred_mode(s->intra4x4_pred_mode_cache,
880                                             s->avctx, s->top_samples_available,
881                                             s->left_samples_available);
882
883            s->top_samples_available  = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
884            s->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
885        } else {
886            for (i = 0; i < 4; i++)
887                memset(&s->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
888
889            s->top_samples_available  = 0x33FF;
890            s->left_samples_available = 0x5F5F;
891        }
892
893        mb_type = MB_TYPE_INTRA4x4;
894    } else {                      /* INTRA16x16 */
895        dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
896        dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
897
898        if ((s->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(s->avctx, s->top_samples_available,
899                                                                     s->left_samples_available, dir, 0)) < 0) {
900            av_log(s->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
901            return s->intra16x16_pred_mode;
902        }
903
904        cbp     = ff_h264_i_mb_type_info[mb_type - 8].cbp;
905        mb_type = MB_TYPE_INTRA16x16;
906    }
907
908    if (!IS_INTER(mb_type) && s->pict_type != AV_PICTURE_TYPE_I) {
909        for (i = 0; i < 4; i++)
910            memset(s->cur_pic->motion_val[0][b_xy + i * s->b_stride],
911                   0, 4 * 2 * sizeof(int16_t));
912        if (s->pict_type == AV_PICTURE_TYPE_B) {
913            for (i = 0; i < 4; i++)
914                memset(s->cur_pic->motion_val[1][b_xy + i * s->b_stride],
915                       0, 4 * 2 * sizeof(int16_t));
916        }
917    }
918    if (!IS_INTRA4x4(mb_type)) {
919        memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
920    }
921    if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
922        memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
923    }
924
925    if (!IS_INTRA16x16(mb_type) &&
926        (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B)) {
927        if ((vlc = get_interleaved_ue_golomb(&s->gb_slice)) >= 48U){
928            av_log(s->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
929            return -1;
930        }
931
932        cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
933                                : ff_h264_golomb_to_inter_cbp[vlc];
934    }
935    if (IS_INTRA16x16(mb_type) ||
936        (s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
937        s->qscale += get_interleaved_se_golomb(&s->gb_slice);
938
939        if (s->qscale > 31u) {
940            av_log(s->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
941            return -1;
942        }
943    }
944    if (IS_INTRA16x16(mb_type)) {
945        AV_ZERO128(s->mb_luma_dc[0] + 0);
946        AV_ZERO128(s->mb_luma_dc[0] + 8);
947        if (svq3_decode_block(&s->gb_slice, s->mb_luma_dc[0], 0, 1)) {
948            av_log(s->avctx, AV_LOG_ERROR,
949                   "error while decoding intra luma dc\n");
950            return -1;
951        }
952    }
953
954    if (cbp) {
955        const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
956        const int type  = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
957
958        for (i = 0; i < 4; i++)
959            if ((cbp & (1 << i))) {
960                for (j = 0; j < 4; j++) {
961                    k = index ? (1 * (j & 1) + 2 * (i & 1) +
962                                 2 * (j & 2) + 4 * (i & 2))
963                              : (4 * i + j);
964                    s->non_zero_count_cache[scan8[k]] = 1;
965
966                    if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], index, type)) {
967                        av_log(s->avctx, AV_LOG_ERROR,
968                               "error while decoding block\n");
969                        return -1;
970                    }
971                }
972            }
973
974        if ((cbp & 0x30)) {
975            for (i = 1; i < 3; ++i)
976                if (svq3_decode_block(&s->gb_slice, &s->mb[16 * 16 * i], 0, 3)) {
977                    av_log(s->avctx, AV_LOG_ERROR,
978                           "error while decoding chroma dc block\n");
979                    return -1;
980                }
981
982            if ((cbp & 0x20)) {
983                for (i = 1; i < 3; i++) {
984                    for (j = 0; j < 4; j++) {
985                        k                                 = 16 * i + j;
986                        s->non_zero_count_cache[scan8[k]] = 1;
987
988                        if (svq3_decode_block(&s->gb_slice, &s->mb[16 * k], 1, 1)) {
989                            av_log(s->avctx, AV_LOG_ERROR,
990                                   "error while decoding chroma ac block\n");
991                            return -1;
992                        }
993                    }
994                }
995            }
996        }
997    }
998
999    s->cbp                     = cbp;
1000    s->cur_pic->mb_type[mb_xy] = mb_type;
1001
1002    if (IS_INTRA(mb_type))
1003        s->chroma_pred_mode = ff_h264_check_intra_pred_mode(s->avctx, s->top_samples_available,
1004                                                            s->left_samples_available, DC_PRED8x8, 1);
1005
1006    return 0;
1007}
1008
1009static int svq3_decode_slice_header(AVCodecContext *avctx)
1010{
1011    SVQ3Context *s = avctx->priv_data;
1012    const int mb_xy   = s->mb_xy;
1013    int i, header;
1014    unsigned slice_id;
1015
1016    header = get_bits(&s->gb, 8);
1017
1018    if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
1019        /* TODO: what? */
1020        av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
1021        return -1;
1022    } else {
1023        int slice_bits, slice_bytes, slice_length;
1024        int length = header >> 5 & 3;
1025
1026        slice_length = show_bits(&s->gb, 8 * length);
1027        slice_bits   = slice_length * 8;
1028        slice_bytes  = slice_length + length - 1;
1029
1030        skip_bits(&s->gb, 8);
1031
1032        av_fast_padded_malloc(&s->slice_buf, &s->slice_buf_size, slice_bytes);
1033        if (!s->slice_buf)
1034            return AVERROR(ENOMEM);
1035
1036        if (slice_bytes * 8LL > get_bits_left(&s->gb)) {
1037            av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
1038            return AVERROR_INVALIDDATA;
1039        }
1040        memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
1041
1042        if (s->watermark_key) {
1043            uint32_t header = AV_RL32(&s->slice_buf[1]);
1044            AV_WL32(&s->slice_buf[1], header ^ s->watermark_key);
1045        }
1046        init_get_bits(&s->gb_slice, s->slice_buf, slice_bits);
1047
1048        if (length > 0) {
1049            memmove(s->slice_buf, &s->slice_buf[slice_length], length - 1);
1050        }
1051        skip_bits_long(&s->gb, slice_bytes * 8);
1052    }
1053
1054    if ((slice_id = get_interleaved_ue_golomb(&s->gb_slice)) >= 3) {
1055        av_log(s->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
1056        return -1;
1057    }
1058
1059    s->slice_type = ff_h264_golomb_to_pict_type[slice_id];
1060
1061    if ((header & 0x9F) == 2) {
1062        i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
1063        get_bits(&s->gb_slice, i);
1064    } else if (get_bits1(&s->gb_slice)) {
1065        avpriv_report_missing_feature(s->avctx, "Media key encryption");
1066        return AVERROR_PATCHWELCOME;
1067    }
1068
1069    s->slice_num      = get_bits(&s->gb_slice, 8);
1070    s->qscale         = get_bits(&s->gb_slice, 5);
1071    s->adaptive_quant = get_bits1(&s->gb_slice);
1072
1073    /* unknown fields */
1074    skip_bits1(&s->gb_slice);
1075
1076    if (s->has_watermark)
1077        skip_bits1(&s->gb_slice);
1078
1079    skip_bits1(&s->gb_slice);
1080    skip_bits(&s->gb_slice, 2);
1081
1082    if (skip_1stop_8data_bits(&s->gb_slice) < 0)
1083        return AVERROR_INVALIDDATA;
1084
1085    /* reset intra predictors and invalidate motion vector references */
1086    if (s->mb_x > 0) {
1087        memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - 1] + 3,
1088               -1, 4 * sizeof(int8_t));
1089        memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_x],
1090               -1, 8 * sizeof(int8_t) * s->mb_x);
1091    }
1092    if (s->mb_y > 0) {
1093        memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy - s->mb_stride],
1094               -1, 8 * sizeof(int8_t) * (s->mb_width - s->mb_x));
1095
1096        if (s->mb_x > 0)
1097            s->intra4x4_pred_mode[s->mb2br_xy[mb_xy - s->mb_stride - 1] + 3] = -1;
1098    }
1099
1100    return 0;
1101}
1102
1103static void init_dequant4_coeff_table(SVQ3Context *s)
1104{
1105    int q, x;
1106    const int max_qp = 51;
1107
1108    for (q = 0; q < max_qp + 1; q++) {
1109        int shift = ff_h264_quant_div6[q] + 2;
1110        int idx   = ff_h264_quant_rem6[q];
1111        for (x = 0; x < 16; x++)
1112            s->dequant4_coeff[q][(x >> 2) | ((x << 2) & 0xF)] =
1113                ((uint32_t)ff_h264_dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] * 16) << shift;
1114    }
1115}
1116
1117static av_cold int svq3_decode_init(AVCodecContext *avctx)
1118{
1119    SVQ3Context *s = avctx->priv_data;
1120    int m, x, y;
1121    unsigned char *extradata;
1122    unsigned char *extradata_end;
1123    unsigned int size;
1124    int marker_found = 0;
1125    int ret;
1126
1127    s->cur_pic  = &s->frames[0];
1128    s->last_pic = &s->frames[1];
1129    s->next_pic = &s->frames[2];
1130
1131    s->cur_pic->f  = av_frame_alloc();
1132    s->last_pic->f = av_frame_alloc();
1133    s->next_pic->f = av_frame_alloc();
1134    if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1135        return AVERROR(ENOMEM);
1136
1137    ff_h264dsp_init(&s->h264dsp, 8, 1);
1138    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1139    ff_videodsp_init(&s->vdsp, 8);
1140
1141
1142    avctx->bits_per_raw_sample = 8;
1143
1144    ff_hpeldsp_init(&s->hdsp, avctx->flags);
1145    ff_tpeldsp_init(&s->tdsp);
1146
1147    avctx->pix_fmt     = AV_PIX_FMT_YUVJ420P;
1148    avctx->color_range = AVCOL_RANGE_JPEG;
1149
1150    s->avctx         = avctx;
1151    s->halfpel_flag  = 1;
1152    s->thirdpel_flag = 1;
1153    s->has_watermark = 0;
1154
1155    /* prowl for the "SEQH" marker in the extradata */
1156    extradata     = (unsigned char *)avctx->extradata;
1157    extradata_end = avctx->extradata + avctx->extradata_size;
1158    if (extradata) {
1159        for (m = 0; m + 8 < avctx->extradata_size; m++) {
1160            if (!memcmp(extradata, "SEQH", 4)) {
1161                marker_found = 1;
1162                break;
1163            }
1164            extradata++;
1165        }
1166    }
1167
1168    /* if a match was found, parse the extra data */
1169    if (marker_found) {
1170        GetBitContext gb;
1171        int frame_size_code;
1172        int unk0, unk1, unk2, unk3, unk4;
1173        int w,h;
1174
1175        size = AV_RB32(&extradata[4]);
1176        if (size > extradata_end - extradata - 8)
1177            return AVERROR_INVALIDDATA;
1178        init_get_bits(&gb, extradata + 8, size * 8);
1179
1180        /* 'frame size code' and optional 'width, height' */
1181        frame_size_code = get_bits(&gb, 3);
1182        switch (frame_size_code) {
1183        case 0:
1184            w = 160;
1185            h = 120;
1186            break;
1187        case 1:
1188            w = 128;
1189            h =  96;
1190            break;
1191        case 2:
1192            w = 176;
1193            h = 144;
1194            break;
1195        case 3:
1196            w = 352;
1197            h = 288;
1198            break;
1199        case 4:
1200            w = 704;
1201            h = 576;
1202            break;
1203        case 5:
1204            w = 240;
1205            h = 180;
1206            break;
1207        case 6:
1208            w = 320;
1209            h = 240;
1210            break;
1211        case 7:
1212            w = get_bits(&gb, 12);
1213            h = get_bits(&gb, 12);
1214            break;
1215        }
1216        ret = ff_set_dimensions(avctx, w, h);
1217        if (ret < 0)
1218            return ret;
1219
1220        s->halfpel_flag  = get_bits1(&gb);
1221        s->thirdpel_flag = get_bits1(&gb);
1222
1223        /* unknown fields */
1224        unk0 = get_bits1(&gb);
1225        unk1 = get_bits1(&gb);
1226        unk2 = get_bits1(&gb);
1227        unk3 = get_bits1(&gb);
1228
1229        s->low_delay = get_bits1(&gb);
1230
1231        /* unknown field */
1232        unk4 = get_bits1(&gb);
1233
1234        av_log(avctx, AV_LOG_DEBUG, "Unknown fields %d %d %d %d %d\n",
1235               unk0, unk1, unk2, unk3, unk4);
1236
1237        if (skip_1stop_8data_bits(&gb) < 0)
1238            return AVERROR_INVALIDDATA;
1239
1240        s->has_watermark  = get_bits1(&gb);
1241        avctx->has_b_frames = !s->low_delay;
1242        if (s->has_watermark) {
1243#if CONFIG_ZLIB
1244            unsigned watermark_width  = get_interleaved_ue_golomb(&gb);
1245            unsigned watermark_height = get_interleaved_ue_golomb(&gb);
1246            int u1                    = get_interleaved_ue_golomb(&gb);
1247            int u2                    = get_bits(&gb, 8);
1248            int u3                    = get_bits(&gb, 2);
1249            int u4                    = get_interleaved_ue_golomb(&gb);
1250            unsigned long buf_len     = watermark_width *
1251                                        watermark_height * 4;
1252            int offset                = get_bits_count(&gb) + 7 >> 3;
1253            uint8_t *buf;
1254
1255            if (watermark_height <= 0 ||
1256                (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1257                return AVERROR_INVALIDDATA;
1258
1259            buf = av_malloc(buf_len);
1260            if (!buf)
1261                return AVERROR(ENOMEM);
1262
1263            av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1264                   watermark_width, watermark_height);
1265            av_log(avctx, AV_LOG_DEBUG,
1266                   "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1267                   u1, u2, u3, u4, offset);
1268            if (uncompress(buf, &buf_len, extradata + 8 + offset,
1269                           size - offset) != Z_OK) {
1270                av_log(avctx, AV_LOG_ERROR,
1271                       "could not uncompress watermark logo\n");
1272                av_free(buf);
1273                return -1;
1274            }
1275            s->watermark_key = av_bswap16(av_crc(av_crc_get_table(AV_CRC_16_CCITT), 0, buf, buf_len));
1276
1277            s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1278            av_log(avctx, AV_LOG_DEBUG,
1279                   "watermark key %#"PRIx32"\n", s->watermark_key);
1280            av_free(buf);
1281#else
1282            av_log(avctx, AV_LOG_ERROR,
1283                   "this svq3 file contains watermark which need zlib support compiled in\n");
1284            return AVERROR(ENOSYS);
1285#endif
1286        }
1287    }
1288
1289    s->mb_width   = (avctx->width + 15) / 16;
1290    s->mb_height  = (avctx->height + 15) / 16;
1291    s->mb_stride  = s->mb_width + 1;
1292    s->mb_num     = s->mb_width * s->mb_height;
1293    s->b_stride   = 4 * s->mb_width;
1294    s->h_edge_pos = s->mb_width * 16;
1295    s->v_edge_pos = s->mb_height * 16;
1296
1297    s->intra4x4_pred_mode = av_mallocz(s->mb_stride * 2 * 8);
1298    if (!s->intra4x4_pred_mode)
1299        return AVERROR(ENOMEM);
1300
1301    s->mb2br_xy = av_mallocz(s->mb_stride * (s->mb_height + 1) *
1302                             sizeof(*s->mb2br_xy));
1303    if (!s->mb2br_xy)
1304        return AVERROR(ENOMEM);
1305
1306    for (y = 0; y < s->mb_height; y++)
1307        for (x = 0; x < s->mb_width; x++) {
1308            const int mb_xy = x + y * s->mb_stride;
1309
1310            s->mb2br_xy[mb_xy] = 8 * (mb_xy % (2 * s->mb_stride));
1311        }
1312
1313    init_dequant4_coeff_table(s);
1314
1315    return 0;
1316}
1317
1318static void free_picture(SVQ3Frame *pic)
1319{
1320    int i;
1321    for (i = 0; i < 2; i++) {
1322        av_freep(&pic->motion_val_buf[i]);
1323    }
1324    av_freep(&pic->mb_type_buf);
1325
1326    av_frame_unref(pic->f);
1327}
1328
1329static int get_buffer(AVCodecContext *avctx, SVQ3Frame *pic)
1330{
1331    SVQ3Context *s = avctx->priv_data;
1332    const int big_mb_num    = s->mb_stride * (s->mb_height + 1) + 1;
1333    const int b4_stride     = s->mb_width * 4 + 1;
1334    const int b4_array_size = b4_stride * s->mb_height * 4;
1335    int ret;
1336
1337    if (!pic->motion_val_buf[0]) {
1338        int i;
1339
1340        pic->mb_type_buf = av_calloc(big_mb_num + s->mb_stride, sizeof(uint32_t));
1341        if (!pic->mb_type_buf)
1342            return AVERROR(ENOMEM);
1343        pic->mb_type = pic->mb_type_buf + 2 * s->mb_stride + 1;
1344
1345        for (i = 0; i < 2; i++) {
1346            pic->motion_val_buf[i] = av_calloc(b4_array_size + 4, 2 * sizeof(int16_t));
1347            if (!pic->motion_val_buf[i]) {
1348                ret = AVERROR(ENOMEM);
1349                goto fail;
1350            }
1351
1352            pic->motion_val[i] = pic->motion_val_buf[i] + 4;
1353        }
1354    }
1355
1356    ret = ff_get_buffer(avctx, pic->f,
1357                        (s->pict_type != AV_PICTURE_TYPE_B) ?
1358                         AV_GET_BUFFER_FLAG_REF : 0);
1359    if (ret < 0)
1360        goto fail;
1361
1362    if (!s->edge_emu_buffer) {
1363        s->edge_emu_buffer = av_calloc(pic->f->linesize[0], 17);
1364        if (!s->edge_emu_buffer)
1365            return AVERROR(ENOMEM);
1366    }
1367
1368    return 0;
1369fail:
1370    free_picture(pic);
1371    return ret;
1372}
1373
1374static int svq3_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
1375                             int *got_frame, AVPacket *avpkt)
1376{
1377    SVQ3Context *s     = avctx->priv_data;
1378    int buf_size       = avpkt->size;
1379    int left;
1380    int ret, m, i;
1381
1382    /* special case for last picture */
1383    if (buf_size == 0) {
1384        if (s->next_pic->f->data[0] && !s->low_delay && !s->last_frame_output) {
1385            ret = av_frame_ref(rframe, s->next_pic->f);
1386            if (ret < 0)
1387                return ret;
1388            s->last_frame_output = 1;
1389            *got_frame          = 1;
1390        }
1391        return 0;
1392    }
1393
1394    s->mb_x = s->mb_y = s->mb_xy = 0;
1395
1396    ret = init_get_bits8(&s->gb, avpkt->data, avpkt->size);
1397    if (ret < 0)
1398        return ret;
1399
1400    if (svq3_decode_slice_header(avctx))
1401        return -1;
1402
1403    s->pict_type = s->slice_type;
1404
1405    if (s->pict_type != AV_PICTURE_TYPE_B)
1406        FFSWAP(SVQ3Frame*, s->next_pic, s->last_pic);
1407
1408    av_frame_unref(s->cur_pic->f);
1409
1410    /* for skipping the frame */
1411    s->cur_pic->f->pict_type = s->pict_type;
1412    s->cur_pic->f->key_frame = (s->pict_type == AV_PICTURE_TYPE_I);
1413
1414    ret = get_buffer(avctx, s->cur_pic);
1415    if (ret < 0)
1416        return ret;
1417
1418    for (i = 0; i < 16; i++) {
1419        s->block_offset[i]           = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1420        s->block_offset[48 + i]      = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
1421    }
1422    for (i = 0; i < 16; i++) {
1423        s->block_offset[16 + i]      =
1424        s->block_offset[32 + i]      = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1425        s->block_offset[48 + 16 + i] =
1426        s->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
1427    }
1428
1429    if (s->pict_type != AV_PICTURE_TYPE_I) {
1430        if (!s->last_pic->f->data[0]) {
1431            av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1432            av_frame_unref(s->last_pic->f);
1433            ret = get_buffer(avctx, s->last_pic);
1434            if (ret < 0)
1435                return ret;
1436            memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1437            memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1438                   s->last_pic->f->linesize[1]);
1439            memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1440                   s->last_pic->f->linesize[2]);
1441        }
1442
1443        if (s->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1444            av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1445            av_frame_unref(s->next_pic->f);
1446            ret = get_buffer(avctx, s->next_pic);
1447            if (ret < 0)
1448                return ret;
1449            memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1450            memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1451                   s->next_pic->f->linesize[1]);
1452            memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1453                   s->next_pic->f->linesize[2]);
1454        }
1455    }
1456
1457    if (avctx->debug & FF_DEBUG_PICT_INFO)
1458        av_log(s->avctx, AV_LOG_DEBUG,
1459               "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1460               av_get_picture_type_char(s->pict_type),
1461               s->halfpel_flag, s->thirdpel_flag,
1462               s->adaptive_quant, s->qscale, s->slice_num);
1463
1464    if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
1465        avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
1466        avctx->skip_frame >= AVDISCARD_ALL)
1467        return 0;
1468
1469    if (s->pict_type == AV_PICTURE_TYPE_B) {
1470        s->frame_num_offset = s->slice_num - s->prev_frame_num;
1471
1472        if (s->frame_num_offset < 0)
1473            s->frame_num_offset += 256;
1474        if (s->frame_num_offset == 0 ||
1475            s->frame_num_offset >= s->prev_frame_num_offset) {
1476            av_log(s->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1477            return -1;
1478        }
1479    } else {
1480        s->prev_frame_num        = s->frame_num;
1481        s->frame_num             = s->slice_num;
1482        s->prev_frame_num_offset = s->frame_num - s->prev_frame_num;
1483
1484        if (s->prev_frame_num_offset < 0)
1485            s->prev_frame_num_offset += 256;
1486    }
1487
1488    for (m = 0; m < 2; m++) {
1489        int i;
1490        for (i = 0; i < 4; i++) {
1491            int j;
1492            for (j = -1; j < 4; j++)
1493                s->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1494            if (i < 3)
1495                s->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1496        }
1497    }
1498
1499    for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
1500        for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
1501            unsigned mb_type;
1502            s->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
1503
1504            if ((get_bits_left(&s->gb_slice)) <= 7) {
1505                if (((get_bits_count(&s->gb_slice) & 7) == 0 ||
1506                    show_bits(&s->gb_slice, get_bits_left(&s->gb_slice) & 7) == 0)) {
1507
1508                    if (svq3_decode_slice_header(avctx))
1509                        return -1;
1510                }
1511                if (s->slice_type != s->pict_type) {
1512                    avpriv_request_sample(avctx, "non constant slice type");
1513                }
1514                /* TODO: support s->mb_skip_run */
1515            }
1516
1517            mb_type = get_interleaved_ue_golomb(&s->gb_slice);
1518
1519            if (s->pict_type == AV_PICTURE_TYPE_I)
1520                mb_type += 8;
1521            else if (s->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1522                mb_type += 4;
1523            if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1524                av_log(s->avctx, AV_LOG_ERROR,
1525                       "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
1526                return -1;
1527            }
1528
1529            if (mb_type != 0 || s->cbp)
1530                hl_decode_mb(s);
1531
1532            if (s->pict_type != AV_PICTURE_TYPE_B && !s->low_delay)
1533                s->cur_pic->mb_type[s->mb_x + s->mb_y * s->mb_stride] =
1534                    (s->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1535        }
1536
1537        ff_draw_horiz_band(avctx, s->cur_pic->f,
1538                           s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1539                           16 * s->mb_y, 16, PICT_FRAME, 0,
1540                           s->low_delay);
1541    }
1542
1543    left = buf_size*8 - get_bits_count(&s->gb_slice);
1544
1545    if (s->mb_y != s->mb_height || s->mb_x != s->mb_width) {
1546        av_log(avctx, AV_LOG_INFO, "frame num %d incomplete pic x %d y %d left %d\n", avctx->frame_number, s->mb_y, s->mb_x, left);
1547        //av_hex_dump(stderr, buf+buf_size-8, 8);
1548    }
1549
1550    if (left < 0) {
1551        av_log(avctx, AV_LOG_ERROR, "frame num %d left %d\n", avctx->frame_number, left);
1552        return -1;
1553    }
1554
1555    if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay)
1556        ret = av_frame_ref(rframe, s->cur_pic->f);
1557    else if (s->last_pic->f->data[0])
1558        ret = av_frame_ref(rframe, s->last_pic->f);
1559    if (ret < 0)
1560        return ret;
1561
1562    /* Do not output the last pic after seeking. */
1563    if (s->last_pic->f->data[0] || s->low_delay)
1564        *got_frame = 1;
1565
1566    if (s->pict_type != AV_PICTURE_TYPE_B) {
1567        FFSWAP(SVQ3Frame*, s->cur_pic, s->next_pic);
1568    } else {
1569        av_frame_unref(s->cur_pic->f);
1570    }
1571
1572    return buf_size;
1573}
1574
1575static av_cold int svq3_decode_end(AVCodecContext *avctx)
1576{
1577    SVQ3Context *s = avctx->priv_data;
1578
1579    for (int i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1580        free_picture(&s->frames[i]);
1581        av_frame_free(&s->frames[i].f);
1582    }
1583    av_freep(&s->slice_buf);
1584    av_freep(&s->intra4x4_pred_mode);
1585    av_freep(&s->edge_emu_buffer);
1586    av_freep(&s->mb2br_xy);
1587
1588    return 0;
1589}
1590
1591const FFCodec ff_svq3_decoder = {
1592    .p.name         = "svq3",
1593    .p.long_name    = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1594    .p.type         = AVMEDIA_TYPE_VIDEO,
1595    .p.id           = AV_CODEC_ID_SVQ3,
1596    .priv_data_size = sizeof(SVQ3Context),
1597    .init           = svq3_decode_init,
1598    .close          = svq3_decode_end,
1599    FF_CODEC_DECODE_CB(svq3_decode_frame),
1600    .p.capabilities = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1601                      AV_CODEC_CAP_DR1             |
1602                      AV_CODEC_CAP_DELAY,
1603    .p.pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1604                                                     AV_PIX_FMT_NONE},
1605    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1606};
1607