1/*
2 * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * H.264 / AVC / MPEG-4 part10 cavlc bitstream decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
26 */
27
28#define CABAC(h) 0
29#define UNCHECKED_BITSTREAM_READER 1
30
31#include "h264dec.h"
32#include "h264_mvpred.h"
33#include "h264data.h"
34#include "golomb.h"
35#include "mpegutils.h"
36#include "libavutil/avassert.h"
37
38
39static const uint8_t golomb_to_inter_cbp_gray[16]={
40 0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
41};
42
43static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
4415, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
45};
46
47static const uint8_t chroma_dc_coeff_token_len[4*5]={
48 2, 0, 0, 0,
49 6, 1, 0, 0,
50 6, 6, 3, 0,
51 6, 7, 7, 6,
52 6, 8, 8, 7,
53};
54
55static const uint8_t chroma_dc_coeff_token_bits[4*5]={
56 1, 0, 0, 0,
57 7, 1, 0, 0,
58 4, 6, 1, 0,
59 3, 3, 2, 5,
60 2, 3, 2, 0,
61};
62
63static const uint8_t chroma422_dc_coeff_token_len[4*9]={
64  1,  0,  0,  0,
65  7,  2,  0,  0,
66  7,  7,  3,  0,
67  9,  7,  7,  5,
68  9,  9,  7,  6,
69 10, 10,  9,  7,
70 11, 11, 10,  7,
71 12, 12, 11, 10,
72 13, 12, 12, 11,
73};
74
75static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
76  1,   0,  0, 0,
77 15,   1,  0, 0,
78 14,  13,  1, 0,
79  7,  12, 11, 1,
80  6,   5, 10, 1,
81  7,   6,  4, 9,
82  7,   6,  5, 8,
83  7,   6,  5, 4,
84  7,   5,  4, 4,
85};
86
87static const uint8_t coeff_token_len[4][4*17]={
88{
89     1, 0, 0, 0,
90     6, 2, 0, 0,     8, 6, 3, 0,     9, 8, 7, 5,    10, 9, 8, 6,
91    11,10, 9, 7,    13,11,10, 8,    13,13,11, 9,    13,13,13,10,
92    14,14,13,11,    14,14,14,13,    15,15,14,14,    15,15,15,14,
93    16,15,15,15,    16,16,16,15,    16,16,16,16,    16,16,16,16,
94},
95{
96     2, 0, 0, 0,
97     6, 2, 0, 0,     6, 5, 3, 0,     7, 6, 6, 4,     8, 6, 6, 4,
98     8, 7, 7, 5,     9, 8, 8, 6,    11, 9, 9, 6,    11,11,11, 7,
99    12,11,11, 9,    12,12,12,11,    12,12,12,11,    13,13,13,12,
100    13,13,13,13,    13,14,13,13,    14,14,14,13,    14,14,14,14,
101},
102{
103     4, 0, 0, 0,
104     6, 4, 0, 0,     6, 5, 4, 0,     6, 5, 5, 4,     7, 5, 5, 4,
105     7, 5, 5, 4,     7, 6, 6, 4,     7, 6, 6, 4,     8, 7, 7, 5,
106     8, 8, 7, 6,     9, 8, 8, 7,     9, 9, 8, 8,     9, 9, 9, 8,
107    10, 9, 9, 9,    10,10,10,10,    10,10,10,10,    10,10,10,10,
108},
109{
110     6, 0, 0, 0,
111     6, 6, 0, 0,     6, 6, 6, 0,     6, 6, 6, 6,     6, 6, 6, 6,
112     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
113     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
114     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,     6, 6, 6, 6,
115}
116};
117
118static const uint8_t coeff_token_bits[4][4*17]={
119{
120     1, 0, 0, 0,
121     5, 1, 0, 0,     7, 4, 1, 0,     7, 6, 5, 3,     7, 6, 5, 3,
122     7, 6, 5, 4,    15, 6, 5, 4,    11,14, 5, 4,     8,10,13, 4,
123    15,14, 9, 4,    11,10,13,12,    15,14, 9,12,    11,10,13, 8,
124    15, 1, 9,12,    11,14,13, 8,     7,10, 9,12,     4, 6, 5, 8,
125},
126{
127     3, 0, 0, 0,
128    11, 2, 0, 0,     7, 7, 3, 0,     7,10, 9, 5,     7, 6, 5, 4,
129     4, 6, 5, 6,     7, 6, 5, 8,    15, 6, 5, 4,    11,14,13, 4,
130    15,10, 9, 4,    11,14,13,12,     8,10, 9, 8,    15,14,13,12,
131    11,10, 9,12,     7,11, 6, 8,     9, 8,10, 1,     7, 6, 5, 4,
132},
133{
134    15, 0, 0, 0,
135    15,14, 0, 0,    11,15,13, 0,     8,12,14,12,    15,10,11,11,
136    11, 8, 9,10,     9,14,13, 9,     8,10, 9, 8,    15,14,13,13,
137    11,14,10,12,    15,10,13,12,    11,14, 9,12,     8,10,13, 8,
138    13, 7, 9,12,     9,12,11,10,     5, 8, 7, 6,     1, 4, 3, 2,
139},
140{
141     3, 0, 0, 0,
142     0, 1, 0, 0,     4, 5, 6, 0,     8, 9,10,11,    12,13,14,15,
143    16,17,18,19,    20,21,22,23,    24,25,26,27,    28,29,30,31,
144    32,33,34,35,    36,37,38,39,    40,41,42,43,    44,45,46,47,
145    48,49,50,51,    52,53,54,55,    56,57,58,59,    60,61,62,63,
146}
147};
148
149static const uint8_t total_zeros_len[16][16]= {
150    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
151    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
152    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
153    {5,3,4,4,3,3,3,4,3,4,5,5,5},
154    {4,4,4,3,3,3,3,3,4,5,4,5},
155    {6,5,3,3,3,3,3,3,4,3,6},
156    {6,5,3,3,3,2,3,4,3,6},
157    {6,4,5,3,2,2,3,3,6},
158    {6,6,4,2,2,3,2,5},
159    {5,5,3,2,2,2,4},
160    {4,4,3,3,1,3},
161    {4,4,2,1,3},
162    {3,3,1,2},
163    {2,2,1},
164    {1,1},
165};
166
167static const uint8_t total_zeros_bits[16][16]= {
168    {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
169    {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
170    {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
171    {3,7,5,4,6,5,4,3,3,2,2,1,0},
172    {5,4,3,7,6,5,4,3,2,1,1,0},
173    {1,1,7,6,5,4,3,2,1,1,0},
174    {1,1,5,4,3,3,2,1,1,0},
175    {1,1,1,3,3,2,2,1,0},
176    {1,0,1,3,2,1,1,1},
177    {1,0,1,3,2,1,1},
178    {0,1,1,2,1,3},
179    {0,1,1,1,1},
180    {0,1,1,1},
181    {0,1,1},
182    {0,1},
183};
184
185static const uint8_t chroma_dc_total_zeros_len[3][4]= {
186    { 1, 2, 3, 3,},
187    { 1, 2, 2, 0,},
188    { 1, 1, 0, 0,},
189};
190
191static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
192    { 1, 1, 1, 0,},
193    { 1, 1, 0, 0,},
194    { 1, 0, 0, 0,},
195};
196
197static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
198    { 1, 3, 3, 4, 4, 4, 5, 5 },
199    { 3, 2, 3, 3, 3, 3, 3 },
200    { 3, 3, 2, 2, 3, 3 },
201    { 3, 2, 2, 2, 3 },
202    { 2, 2, 2, 2 },
203    { 2, 2, 1 },
204    { 1, 1 },
205};
206
207static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
208    { 1, 2, 3, 2, 3, 1, 1, 0 },
209    { 0, 1, 1, 4, 5, 6, 7 },
210    { 0, 1, 1, 2, 6, 7 },
211    { 6, 0, 1, 2, 7 },
212    { 0, 1, 2, 3 },
213    { 0, 1, 1 },
214    { 0, 1 },
215};
216
217static const uint8_t run_len[7][16]={
218    {1,1},
219    {1,2,2},
220    {2,2,2,2},
221    {2,2,2,3,3},
222    {2,2,3,3,3,3},
223    {2,3,3,3,3,3,3},
224    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
225};
226
227static const uint8_t run_bits[7][16]={
228    {1,0},
229    {1,1,0},
230    {3,2,1,0},
231    {3,2,1,1,0},
232    {3,2,3,2,1,0},
233    {3,0,1,3,2,5,4},
234    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
235};
236
237static VLC coeff_token_vlc[4];
238static VLCElem coeff_token_vlc_tables[520+332+280+256];
239static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
240
241static VLC chroma_dc_coeff_token_vlc;
242static VLCElem chroma_dc_coeff_token_vlc_table[256];
243static const int chroma_dc_coeff_token_vlc_table_size = 256;
244
245static VLC chroma422_dc_coeff_token_vlc;
246static VLCElem chroma422_dc_coeff_token_vlc_table[8192];
247static const int chroma422_dc_coeff_token_vlc_table_size = 8192;
248
249static VLC total_zeros_vlc[15+1];
250static VLCElem total_zeros_vlc_tables[15][512];
251static const int total_zeros_vlc_tables_size = 512;
252
253static VLC chroma_dc_total_zeros_vlc[3+1];
254static VLCElem chroma_dc_total_zeros_vlc_tables[3][8];
255static const int chroma_dc_total_zeros_vlc_tables_size = 8;
256
257static VLC chroma422_dc_total_zeros_vlc[7+1];
258static VLCElem chroma422_dc_total_zeros_vlc_tables[7][32];
259static const int chroma422_dc_total_zeros_vlc_tables_size = 32;
260
261static VLC run_vlc[6+1];
262static VLCElem run_vlc_tables[6][8];
263static const int run_vlc_tables_size = 8;
264
265static VLC run7_vlc;
266static VLCElem run7_vlc_table[96];
267static const int run7_vlc_table_size = 96;
268
269#define LEVEL_TAB_BITS 8
270static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
271
272#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
273#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
274#define COEFF_TOKEN_VLC_BITS           8
275#define TOTAL_ZEROS_VLC_BITS           9
276#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
277#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
278#define RUN_VLC_BITS                   3
279#define RUN7_VLC_BITS                  6
280
281/**
282 * Get the predicted number of non-zero coefficients.
283 * @param n block index
284 */
285static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
286{
287    const int index8= scan8[n];
288    const int left = sl->non_zero_count_cache[index8 - 1];
289    const int top  = sl->non_zero_count_cache[index8 - 8];
290    int i= left + top;
291
292    if(i<64) i= (i+1)>>1;
293
294    ff_tlog(h->avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
295
296    return i&31;
297}
298
299static av_cold void init_cavlc_level_tab(void){
300    int suffix_length;
301    unsigned int i;
302
303    for(suffix_length=0; suffix_length<7; suffix_length++){
304        for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
305            int prefix= LEVEL_TAB_BITS - av_log2(2*i);
306
307            if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
308                int level_code = (prefix << suffix_length) +
309                    (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
310                int mask = -(level_code&1);
311                level_code = (((2 + level_code) >> 1) ^ mask) - mask;
312                cavlc_level_tab[suffix_length][i][0]= level_code;
313                cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
314            }else if(prefix + 1 <= LEVEL_TAB_BITS){
315                cavlc_level_tab[suffix_length][i][0]= prefix+100;
316                cavlc_level_tab[suffix_length][i][1]= prefix + 1;
317            }else{
318                cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
319                cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
320            }
321        }
322    }
323}
324
325av_cold void ff_h264_decode_init_vlc(void)
326{
327    int offset;
328
329    chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
330    chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
331    init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
332             &chroma_dc_coeff_token_len [0], 1, 1,
333             &chroma_dc_coeff_token_bits[0], 1, 1,
334             INIT_VLC_USE_NEW_STATIC);
335
336    chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
337    chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
338    init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
339             &chroma422_dc_coeff_token_len [0], 1, 1,
340             &chroma422_dc_coeff_token_bits[0], 1, 1,
341             INIT_VLC_USE_NEW_STATIC);
342
343    offset = 0;
344    for (int i = 0; i < 4; i++) {
345        coeff_token_vlc[i].table = coeff_token_vlc_tables + offset;
346        coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
347        init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
348                 &coeff_token_len [i][0], 1, 1,
349                 &coeff_token_bits[i][0], 1, 1,
350                 INIT_VLC_USE_NEW_STATIC);
351        offset += coeff_token_vlc_tables_size[i];
352    }
353    /*
354     * This is a one time safety check to make sure that
355     * the packed static coeff_token_vlc table sizes
356     * were initialized correctly.
357     */
358    av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
359
360    for (int i = 0; i < 3; i++) {
361        chroma_dc_total_zeros_vlc[i + 1].table = chroma_dc_total_zeros_vlc_tables[i];
362        chroma_dc_total_zeros_vlc[i + 1].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
363        init_vlc(&chroma_dc_total_zeros_vlc[i + 1],
364                 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
365                 &chroma_dc_total_zeros_len [i][0], 1, 1,
366                 &chroma_dc_total_zeros_bits[i][0], 1, 1,
367                 INIT_VLC_USE_NEW_STATIC);
368    }
369
370    for (int i = 0; i < 7; i++) {
371        chroma422_dc_total_zeros_vlc[i + 1].table = chroma422_dc_total_zeros_vlc_tables[i];
372        chroma422_dc_total_zeros_vlc[i + 1].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
373        init_vlc(&chroma422_dc_total_zeros_vlc[i + 1],
374                 CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8,
375                 &chroma422_dc_total_zeros_len [i][0], 1, 1,
376                 &chroma422_dc_total_zeros_bits[i][0], 1, 1,
377                 INIT_VLC_USE_NEW_STATIC);
378    }
379
380    for (int i = 0; i < 15; i++) {
381        total_zeros_vlc[i + 1].table = total_zeros_vlc_tables[i];
382        total_zeros_vlc[i + 1].table_allocated = total_zeros_vlc_tables_size;
383        init_vlc(&total_zeros_vlc[i + 1],
384                 TOTAL_ZEROS_VLC_BITS, 16,
385                 &total_zeros_len [i][0], 1, 1,
386                 &total_zeros_bits[i][0], 1, 1,
387                 INIT_VLC_USE_NEW_STATIC);
388    }
389
390    for (int i = 0; i < 6; i++) {
391        run_vlc[i + 1].table = run_vlc_tables[i];
392        run_vlc[i + 1].table_allocated = run_vlc_tables_size;
393        init_vlc(&run_vlc[i + 1],
394                 RUN_VLC_BITS, 7,
395                 &run_len [i][0], 1, 1,
396                 &run_bits[i][0], 1, 1,
397                 INIT_VLC_USE_NEW_STATIC);
398    }
399    run7_vlc.table = run7_vlc_table;
400    run7_vlc.table_allocated = run7_vlc_table_size;
401    init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
402             &run_len [6][0], 1, 1,
403             &run_bits[6][0], 1, 1,
404             INIT_VLC_USE_NEW_STATIC);
405
406    init_cavlc_level_tab();
407}
408
409static inline int get_level_prefix(GetBitContext *gb){
410    unsigned int buf;
411    int log;
412
413    OPEN_READER(re, gb);
414    UPDATE_CACHE(re, gb);
415    buf=GET_CACHE(re, gb);
416
417    log= 32 - av_log2(buf);
418
419    LAST_SKIP_BITS(re, gb, log);
420    CLOSE_READER(re, gb);
421
422    return log-1;
423}
424
425/**
426 * Decode a residual block.
427 * @param n block index
428 * @param scantable scantable
429 * @param max_coeff number of coefficients in the block
430 * @return <0 if an error occurred
431 */
432static int decode_residual(const H264Context *h, H264SliceContext *sl,
433                           GetBitContext *gb, int16_t *block, int n,
434                           const uint8_t *scantable, const uint32_t *qmul,
435                           int max_coeff)
436{
437    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
438    int level[16];
439    int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
440
441    //FIXME put trailing_onex into the context
442
443    if(max_coeff <= 8){
444        if (max_coeff == 4)
445            coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
446        else
447            coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
448        total_coeff= coeff_token>>2;
449    }else{
450        if(n >= LUMA_DC_BLOCK_INDEX){
451            total_coeff= pred_non_zero_count(h, sl, (n - LUMA_DC_BLOCK_INDEX)*16);
452            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
453            total_coeff= coeff_token>>2;
454        }else{
455            total_coeff= pred_non_zero_count(h, sl, n);
456            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
457            total_coeff= coeff_token>>2;
458        }
459    }
460    sl->non_zero_count_cache[scan8[n]] = total_coeff;
461
462    //FIXME set last_non_zero?
463
464    if(total_coeff==0)
465        return 0;
466    if(total_coeff > (unsigned)max_coeff) {
467        av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
468        return -1;
469    }
470
471    trailing_ones= coeff_token&3;
472    ff_tlog(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
473    av_assert2(total_coeff<=16);
474
475    i = show_bits(gb, 3);
476    skip_bits(gb, trailing_ones);
477    level[0] = 1-((i&4)>>1);
478    level[1] = 1-((i&2)   );
479    level[2] = 1-((i&1)<<1);
480
481    if(trailing_ones<total_coeff) {
482        int mask, prefix;
483        int suffix_length = total_coeff > 10 & trailing_ones < 3;
484        int bitsi= show_bits(gb, LEVEL_TAB_BITS);
485        int level_code= cavlc_level_tab[suffix_length][bitsi][0];
486
487        skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
488        if(level_code >= 100){
489            prefix= level_code - 100;
490            if(prefix == LEVEL_TAB_BITS)
491                prefix += get_level_prefix(gb);
492
493            //first coefficient has suffix_length equal to 0 or 1
494            if(prefix<14){ //FIXME try to build a large unified VLC table for all this
495                if(suffix_length)
496                    level_code= (prefix<<1) + get_bits1(gb); //part
497                else
498                    level_code= prefix; //part
499            }else if(prefix==14){
500                if(suffix_length)
501                    level_code= (prefix<<1) + get_bits1(gb); //part
502                else
503                    level_code= prefix + get_bits(gb, 4); //part
504            }else{
505                level_code= 30;
506                if(prefix>=16){
507                    if(prefix > 25+3){
508                        av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
509                        return -1;
510                    }
511                    level_code += (1<<(prefix-3))-4096;
512                }
513                level_code += get_bits(gb, prefix-3); //part
514            }
515
516            if(trailing_ones < 3) level_code += 2;
517
518            suffix_length = 2;
519            mask= -(level_code&1);
520            level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
521        }else{
522            level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
523
524            suffix_length = 1 + (level_code + 3U > 6U);
525            level[trailing_ones]= level_code;
526        }
527
528        //remaining coefficients have suffix_length > 0
529        for(i=trailing_ones+1;i<total_coeff;i++) {
530            static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
531            int bitsi= show_bits(gb, LEVEL_TAB_BITS);
532            level_code= cavlc_level_tab[suffix_length][bitsi][0];
533
534            skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
535            if(level_code >= 100){
536                prefix= level_code - 100;
537                if(prefix == LEVEL_TAB_BITS){
538                    prefix += get_level_prefix(gb);
539                }
540                if(prefix<15){
541                    level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
542                }else{
543                    level_code = 15<<suffix_length;
544                    if (prefix>=16) {
545                        if(prefix > 25+3){
546                            av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
547                            return AVERROR_INVALIDDATA;
548                        }
549                        level_code += (1<<(prefix-3))-4096;
550                    }
551                    level_code += get_bits(gb, prefix-3);
552                }
553                mask= -(level_code&1);
554                level_code= (((2+level_code)>>1) ^ mask) - mask;
555            }
556            level[i]= level_code;
557            suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
558        }
559    }
560
561    if(total_coeff == max_coeff)
562        zeros_left=0;
563    else{
564        if (max_coeff <= 8) {
565            if (max_coeff == 4)
566                zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff].table,
567                                      CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
568            else
569                zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff].table,
570                                      CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
571        } else {
572            zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
573        }
574    }
575
576#define STORE_BLOCK(type) \
577    scantable += zeros_left + total_coeff - 1; \
578    if(n >= LUMA_DC_BLOCK_INDEX){ \
579        ((type*)block)[*scantable] = level[0]; \
580        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
581            if(zeros_left < 7) \
582                run_before= get_vlc2(gb, run_vlc[zeros_left].table, RUN_VLC_BITS, 1); \
583            else \
584                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
585            zeros_left -= run_before; \
586            scantable -= 1 + run_before; \
587            ((type*)block)[*scantable]= level[i]; \
588        } \
589        for(;i<total_coeff;i++) { \
590            scantable--; \
591            ((type*)block)[*scantable]= level[i]; \
592        } \
593    }else{ \
594        ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
595        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
596            if(zeros_left < 7) \
597                run_before= get_vlc2(gb, run_vlc[zeros_left].table, RUN_VLC_BITS, 1); \
598            else \
599                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
600            zeros_left -= run_before; \
601            scantable -= 1 + run_before; \
602            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
603        } \
604        for(;i<total_coeff;i++) { \
605            scantable--; \
606            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
607        } \
608    }
609
610    if (h->pixel_shift) {
611        STORE_BLOCK(int32_t)
612    } else {
613        STORE_BLOCK(int16_t)
614    }
615
616    if(zeros_left<0){
617        av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
618        return -1;
619    }
620
621    return 0;
622}
623
624static av_always_inline
625int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
626                         GetBitContext *gb, const uint8_t *scan,
627                         const uint8_t *scan8x8, int pixel_shift,
628                         int mb_type, int cbp, int p)
629{
630    int i4x4, i8x8;
631    int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
632    if(IS_INTRA16x16(mb_type)){
633        AV_ZERO128(sl->mb_luma_dc[p]+0);
634        AV_ZERO128(sl->mb_luma_dc[p]+8);
635        AV_ZERO128(sl->mb_luma_dc[p]+16);
636        AV_ZERO128(sl->mb_luma_dc[p]+24);
637        if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
638            return -1; //FIXME continue if partitioned and other return -1 too
639        }
640
641        av_assert2((cbp&15) == 0 || (cbp&15) == 15);
642
643        if(cbp&15){
644            for(i8x8=0; i8x8<4; i8x8++){
645                for(i4x4=0; i4x4<4; i4x4++){
646                    const int index= i4x4 + 4*i8x8 + p*16;
647                    if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
648                        index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){
649                        return -1;
650                    }
651                }
652            }
653            return 0xf;
654        }else{
655            fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
656            return 0;
657        }
658    }else{
659        int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
660        /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
661        int new_cbp = 0;
662        for(i8x8=0; i8x8<4; i8x8++){
663            if(cbp & (1<<i8x8)){
664                if(IS_8x8DCT(mb_type)){
665                    int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
666                    uint8_t *nnz;
667                    for(i4x4=0; i4x4<4; i4x4++){
668                        const int index= i4x4 + 4*i8x8 + p*16;
669                        if( decode_residual(h, sl, gb, buf, index, scan8x8+16*i4x4,
670                                            h->ps.pps->dequant8_coeff[cqm][qscale], 16) < 0 )
671                            return -1;
672                    }
673                    nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
674                    nnz[0] += nnz[1] + nnz[8] + nnz[9];
675                    new_cbp |= !!nnz[0] << i8x8;
676                }else{
677                    for(i4x4=0; i4x4<4; i4x4++){
678                        const int index= i4x4 + 4*i8x8 + p*16;
679                        if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
680                                            scan, h->ps.pps->dequant4_coeff[cqm][qscale], 16) < 0 ){
681                            return -1;
682                        }
683                        new_cbp |= sl->non_zero_count_cache[scan8[index]] << i8x8;
684                    }
685                }
686            }else{
687                uint8_t * const nnz = &sl->non_zero_count_cache[scan8[4 * i8x8 + p * 16]];
688                nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
689            }
690        }
691        return new_cbp;
692    }
693}
694
695int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
696{
697    int mb_xy;
698    int partition_count;
699    unsigned int mb_type, cbp;
700    int dct8x8_allowed = h->ps.pps->transform_8x8_mode;
701    const int decode_chroma = h->ps.sps->chroma_format_idc == 1 || h->ps.sps->chroma_format_idc == 2;
702    const int pixel_shift = h->pixel_shift;
703
704    mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
705
706    ff_tlog(h->avctx, "pic:%d mb:%d/%d\n", h->poc.frame_num, sl->mb_x, sl->mb_y);
707    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
708                down the code */
709    if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
710        if (sl->mb_skip_run == -1) {
711            unsigned mb_skip_run = get_ue_golomb_long(&sl->gb);
712            if (mb_skip_run > h->mb_num) {
713                av_log(h->avctx, AV_LOG_ERROR, "mb_skip_run %d is invalid\n", mb_skip_run);
714                return AVERROR_INVALIDDATA;
715            }
716            sl->mb_skip_run = mb_skip_run;
717        }
718
719        if (sl->mb_skip_run--) {
720            if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
721                if (sl->mb_skip_run == 0)
722                    sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
723            }
724            decode_mb_skip(h, sl);
725            return 0;
726        }
727    }
728    if (FRAME_MBAFF(h)) {
729        if ((sl->mb_y & 1) == 0)
730            sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
731    }
732
733    sl->prev_mb_skipped = 0;
734
735    mb_type= get_ue_golomb(&sl->gb);
736    if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
737        if(mb_type < 23){
738            partition_count = ff_h264_b_mb_type_info[mb_type].partition_count;
739            mb_type         = ff_h264_b_mb_type_info[mb_type].type;
740        }else{
741            mb_type -= 23;
742            goto decode_intra_mb;
743        }
744    } else if (sl->slice_type_nos == AV_PICTURE_TYPE_P) {
745        if(mb_type < 5){
746            partition_count = ff_h264_p_mb_type_info[mb_type].partition_count;
747            mb_type         = ff_h264_p_mb_type_info[mb_type].type;
748        }else{
749            mb_type -= 5;
750            goto decode_intra_mb;
751        }
752    }else{
753       av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
754        if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
755            mb_type--;
756decode_intra_mb:
757        if(mb_type > 25){
758            av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
759            return -1;
760        }
761        partition_count=0;
762        cbp                      = ff_h264_i_mb_type_info[mb_type].cbp;
763        sl->intra16x16_pred_mode = ff_h264_i_mb_type_info[mb_type].pred_mode;
764        mb_type                  = ff_h264_i_mb_type_info[mb_type].type;
765    }
766
767    if (MB_FIELD(sl))
768        mb_type |= MB_TYPE_INTERLACED;
769
770    h->slice_table[mb_xy] = sl->slice_num;
771
772    if(IS_INTRA_PCM(mb_type)){
773        const int mb_size = ff_h264_mb_sizes[h->ps.sps->chroma_format_idc] *
774                            h->ps.sps->bit_depth_luma;
775
776        // We assume these blocks are very rare so we do not optimize it.
777        sl->intra_pcm_ptr = align_get_bits(&sl->gb);
778        if (get_bits_left(&sl->gb) < mb_size) {
779            av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
780            return AVERROR_INVALIDDATA;
781        }
782        skip_bits_long(&sl->gb, mb_size);
783
784        // In deblocking, the quantizer is 0
785        h->cur_pic.qscale_table[mb_xy] = 0;
786        // All coeffs are present
787        memset(h->non_zero_count[mb_xy], 16, 48);
788
789        h->cur_pic.mb_type[mb_xy] = mb_type;
790        return 0;
791    }
792
793    fill_decode_neighbors(h, sl, mb_type);
794    fill_decode_caches(h, sl, mb_type);
795
796    //mb_pred
797    if(IS_INTRA(mb_type)){
798        int pred_mode;
799//            init_top_left_availability(h);
800        if(IS_INTRA4x4(mb_type)){
801            int i;
802            int di = 1;
803            if(dct8x8_allowed && get_bits1(&sl->gb)){
804                mb_type |= MB_TYPE_8x8DCT;
805                di = 4;
806            }
807
808//                fill_intra4x4_pred_table(h);
809            for(i=0; i<16; i+=di){
810                int mode = pred_intra_mode(h, sl, i);
811
812                if(!get_bits1(&sl->gb)){
813                    const int rem_mode= get_bits(&sl->gb, 3);
814                    mode = rem_mode + (rem_mode >= mode);
815                }
816
817                if(di==4)
818                    fill_rectangle(&sl->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1);
819                else
820                    sl->intra4x4_pred_mode_cache[scan8[i]] = mode;
821            }
822            write_back_intra_pred_mode(h, sl);
823            if (ff_h264_check_intra4x4_pred_mode(sl->intra4x4_pred_mode_cache, h->avctx,
824                                                 sl->top_samples_available, sl->left_samples_available) < 0)
825                return -1;
826        }else{
827            sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
828                                                                     sl->left_samples_available, sl->intra16x16_pred_mode, 0);
829            if (sl->intra16x16_pred_mode < 0)
830                return -1;
831        }
832        if(decode_chroma){
833            pred_mode= ff_h264_check_intra_pred_mode(h->avctx, sl->top_samples_available,
834                                                     sl->left_samples_available, get_ue_golomb_31(&sl->gb), 1);
835            if(pred_mode < 0)
836                return -1;
837            sl->chroma_pred_mode = pred_mode;
838        } else {
839            sl->chroma_pred_mode = DC_128_PRED8x8;
840        }
841    }else if(partition_count==4){
842        int i, j, sub_partition_count[4], list, ref[2][4];
843
844        if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
845            for(i=0; i<4; i++){
846                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
847                if(sl->sub_mb_type[i] >=13){
848                    av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
849                    return -1;
850                }
851                sub_partition_count[i] = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
852                sl->sub_mb_type[i]     = ff_h264_b_sub_mb_type_info[sl->sub_mb_type[i]].type;
853            }
854            if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
855                ff_h264_pred_direct_motion(h, sl, &mb_type);
856                sl->ref_cache[0][scan8[4]] =
857                sl->ref_cache[1][scan8[4]] =
858                sl->ref_cache[0][scan8[12]] =
859                sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
860            }
861        }else{
862            av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
863            for(i=0; i<4; i++){
864                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
865                if(sl->sub_mb_type[i] >=4){
866                    av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
867                    return -1;
868                }
869                sub_partition_count[i] = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].partition_count;
870                sl->sub_mb_type[i]     = ff_h264_p_sub_mb_type_info[sl->sub_mb_type[i]].type;
871            }
872        }
873
874        for (list = 0; list < sl->list_count; list++) {
875            int ref_count = IS_REF0(mb_type) ? 1 : sl->ref_count[list] << MB_MBAFF(sl);
876            for(i=0; i<4; i++){
877                if(IS_DIRECT(sl->sub_mb_type[i])) continue;
878                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
879                    unsigned int tmp;
880                    if(ref_count == 1){
881                        tmp= 0;
882                    }else if(ref_count == 2){
883                        tmp= get_bits1(&sl->gb)^1;
884                    }else{
885                        tmp= get_ue_golomb_31(&sl->gb);
886                        if(tmp>=ref_count){
887                            av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
888                            return -1;
889                        }
890                    }
891                    ref[list][i]= tmp;
892                }else{
893                 //FIXME
894                    ref[list][i] = -1;
895                }
896            }
897        }
898
899        if(dct8x8_allowed)
900            dct8x8_allowed = get_dct8x8_allowed(h, sl);
901
902        for (list = 0; list < sl->list_count; list++) {
903            for(i=0; i<4; i++){
904                if(IS_DIRECT(sl->sub_mb_type[i])) {
905                    sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
906                    continue;
907                }
908                sl->ref_cache[list][ scan8[4*i]   ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
909                sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
910
911                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
912                    const int sub_mb_type= sl->sub_mb_type[i];
913                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
914                    for(j=0; j<sub_partition_count[i]; j++){
915                        int mx, my;
916                        const int index= 4*i + block_width*j;
917                        int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
918                        pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
919                        mx += (unsigned)get_se_golomb(&sl->gb);
920                        my += (unsigned)get_se_golomb(&sl->gb);
921                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
922
923                        if(IS_SUB_8X8(sub_mb_type)){
924                            mv_cache[ 1 ][0]=
925                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
926                            mv_cache[ 1 ][1]=
927                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
928                        }else if(IS_SUB_8X4(sub_mb_type)){
929                            mv_cache[ 1 ][0]= mx;
930                            mv_cache[ 1 ][1]= my;
931                        }else if(IS_SUB_4X8(sub_mb_type)){
932                            mv_cache[ 8 ][0]= mx;
933                            mv_cache[ 8 ][1]= my;
934                        }
935                        mv_cache[ 0 ][0]= mx;
936                        mv_cache[ 0 ][1]= my;
937                    }
938                }else{
939                    uint32_t *p= (uint32_t *)&sl->mv_cache[list][ scan8[4*i] ][0];
940                    p[0] = p[1]=
941                    p[8] = p[9]= 0;
942                }
943            }
944        }
945    }else if(IS_DIRECT(mb_type)){
946        ff_h264_pred_direct_motion(h, sl, &mb_type);
947        dct8x8_allowed &= h->ps.sps->direct_8x8_inference_flag;
948    }else{
949        int list, mx, my, i;
950         //FIXME we should set ref_idx_l? to 0 if we use that later ...
951        if(IS_16X16(mb_type)){
952            for (list = 0; list < sl->list_count; list++) {
953                    unsigned int val;
954                    if(IS_DIR(mb_type, 0, list)){
955                        unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
956                        if (rc == 1) {
957                            val= 0;
958                        } else if (rc == 2) {
959                            val= get_bits1(&sl->gb)^1;
960                        }else{
961                            val= get_ue_golomb_31(&sl->gb);
962                            if (val >= rc) {
963                                av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
964                                return -1;
965                            }
966                        }
967                    fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
968                    }
969            }
970            for (list = 0; list < sl->list_count; list++) {
971                if(IS_DIR(mb_type, 0, list)){
972                    pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
973                    mx += (unsigned)get_se_golomb(&sl->gb);
974                    my += (unsigned)get_se_golomb(&sl->gb);
975                    ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
976
977                    fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
978                }
979            }
980        }
981        else if(IS_16X8(mb_type)){
982            for (list = 0; list < sl->list_count; list++) {
983                    for(i=0; i<2; i++){
984                        unsigned int val;
985                        if(IS_DIR(mb_type, i, list)){
986                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
987                            if (rc == 1) {
988                                val= 0;
989                            } else if (rc == 2) {
990                                val= get_bits1(&sl->gb)^1;
991                            }else{
992                                val= get_ue_golomb_31(&sl->gb);
993                                if (val >= rc) {
994                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
995                                    return -1;
996                                }
997                            }
998                        }else
999                            val= LIST_NOT_USED&0xFF;
1000                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
1001                    }
1002            }
1003            for (list = 0; list < sl->list_count; list++) {
1004                for(i=0; i<2; i++){
1005                    unsigned int val;
1006                    if(IS_DIR(mb_type, i, list)){
1007                        pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1008                        mx += (unsigned)get_se_golomb(&sl->gb);
1009                        my += (unsigned)get_se_golomb(&sl->gb);
1010                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1011
1012                        val= pack16to32(mx,my);
1013                    }else
1014                        val=0;
1015                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1016                }
1017            }
1018        }else{
1019            av_assert2(IS_8X16(mb_type));
1020            for (list = 0; list < sl->list_count; list++) {
1021                    for(i=0; i<2; i++){
1022                        unsigned int val;
1023                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1024                            unsigned rc = sl->ref_count[list] << MB_MBAFF(sl);
1025                            if (rc == 1) {
1026                                val= 0;
1027                            } else if (rc == 2) {
1028                                val= get_bits1(&sl->gb)^1;
1029                            }else{
1030                                val= get_ue_golomb_31(&sl->gb);
1031                                if (val >= rc) {
1032                                    av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1033                                    return -1;
1034                                }
1035                            }
1036                        }else
1037                            val= LIST_NOT_USED&0xFF;
1038                        fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1039                    }
1040            }
1041            for (list = 0; list < sl->list_count; list++) {
1042                for(i=0; i<2; i++){
1043                    unsigned int val;
1044                    if(IS_DIR(mb_type, i, list)){
1045                        pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1046                        mx += (unsigned)get_se_golomb(&sl->gb);
1047                        my += (unsigned)get_se_golomb(&sl->gb);
1048                        ff_tlog(h->avctx, "final mv:%d %d\n", mx, my);
1049
1050                        val= pack16to32(mx,my);
1051                    }else
1052                        val=0;
1053                    fill_rectangle(sl->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1054                }
1055            }
1056        }
1057    }
1058
1059    if(IS_INTER(mb_type))
1060        write_back_motion(h, sl, mb_type);
1061
1062    if(!IS_INTRA16x16(mb_type)){
1063        cbp= get_ue_golomb(&sl->gb);
1064
1065        if(decode_chroma){
1066            if(cbp > 47){
1067                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1068                return -1;
1069            }
1070            if (IS_INTRA4x4(mb_type))
1071                cbp = ff_h264_golomb_to_intra4x4_cbp[cbp];
1072            else
1073                cbp = ff_h264_golomb_to_inter_cbp[cbp];
1074        }else{
1075            if(cbp > 15){
1076                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
1077                return -1;
1078            }
1079            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1080            else                     cbp= golomb_to_inter_cbp_gray[cbp];
1081        }
1082    } else {
1083        if (!decode_chroma && cbp>15) {
1084            av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
1085            return AVERROR_INVALIDDATA;
1086        }
1087    }
1088
1089    if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1090        mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
1091    }
1092    sl->cbp=
1093    h->cbp_table[mb_xy]= cbp;
1094    h->cur_pic.mb_type[mb_xy] = mb_type;
1095
1096    if(cbp || IS_INTRA16x16(mb_type)){
1097        int i4x4, i8x8, chroma_idx;
1098        int dquant;
1099        int ret;
1100        GetBitContext *gb = &sl->gb;
1101        const uint8_t *scan, *scan8x8;
1102        const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8);
1103
1104        dquant= get_se_golomb(&sl->gb);
1105
1106        sl->qscale += (unsigned)dquant;
1107
1108        if (((unsigned)sl->qscale) > max_qp){
1109            if (sl->qscale < 0) sl->qscale += max_qp + 1;
1110            else                sl->qscale -= max_qp+1;
1111            if (((unsigned)sl->qscale) > max_qp){
1112                av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
1113                sl->qscale = max_qp;
1114                return -1;
1115            }
1116        }
1117
1118        sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale);
1119        sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale);
1120
1121        if(IS_INTERLACED(mb_type)){
1122            scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1123            scan    = sl->qscale ? h->field_scan : h->field_scan_q0;
1124        }else{
1125            scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1126            scan    = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1127        }
1128
1129        if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) {
1130            return -1;
1131        }
1132        h->cbp_table[mb_xy] |= ret << 12;
1133        if (CHROMA444(h)) {
1134            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ) {
1135                return -1;
1136            }
1137            if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
1138                return -1;
1139            }
1140        } else {
1141            const int num_c8x8 = h->ps.sps->chroma_format_idc;
1142
1143            if(cbp&0x30){
1144                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1145                    if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1146                                        CHROMA_DC_BLOCK_INDEX + chroma_idx,
1147                                        CHROMA422(h) ? ff_h264_chroma422_dc_scan : ff_h264_chroma_dc_scan,
1148                                        NULL, 4 * num_c8x8) < 0) {
1149                        return -1;
1150                    }
1151            }
1152
1153            if(cbp&0x20){
1154                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1155                    const uint32_t *qmul = h->ps.pps->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
1156                    int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1157                    for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
1158                        for (i4x4 = 0; i4x4 < 4; i4x4++) {
1159                            const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1160                            if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
1161                                return -1;
1162                            mb += 16 << pixel_shift;
1163                        }
1164                    }
1165                }
1166            }else{
1167                fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1168                fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1169            }
1170        }
1171    }else{
1172        fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1173        fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1174        fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1175    }
1176    h->cur_pic.qscale_table[mb_xy] = sl->qscale;
1177    write_back_non_zero_count(h, sl);
1178
1179    return 0;
1180}
1181