1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * VP9 compatible video decoder
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5cabdff1aSopenharmony_ci * Copyright (C) 2013 Clément Bœsch <u pkh me>
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * This file is part of FFmpeg.
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci * Lesser General Public License for more details.
18cabdff1aSopenharmony_ci *
19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci */
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#include "libavutil/avassert.h"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci#include "threadframe.h"
27cabdff1aSopenharmony_ci#include "vp56.h"
28cabdff1aSopenharmony_ci#include "vp9.h"
29cabdff1aSopenharmony_ci#include "vp9data.h"
30cabdff1aSopenharmony_ci#include "vp9dec.h"
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_cistatic av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
33cabdff1aSopenharmony_ci                                       ptrdiff_t stride, int v)
34cabdff1aSopenharmony_ci{
35cabdff1aSopenharmony_ci    switch (w) {
36cabdff1aSopenharmony_ci    case 1:
37cabdff1aSopenharmony_ci        do {
38cabdff1aSopenharmony_ci            *ptr = v;
39cabdff1aSopenharmony_ci            ptr += stride;
40cabdff1aSopenharmony_ci        } while (--h);
41cabdff1aSopenharmony_ci        break;
42cabdff1aSopenharmony_ci    case 2: {
43cabdff1aSopenharmony_ci        int v16 = v * 0x0101;
44cabdff1aSopenharmony_ci        do {
45cabdff1aSopenharmony_ci            AV_WN16A(ptr, v16);
46cabdff1aSopenharmony_ci            ptr += stride;
47cabdff1aSopenharmony_ci        } while (--h);
48cabdff1aSopenharmony_ci        break;
49cabdff1aSopenharmony_ci    }
50cabdff1aSopenharmony_ci    case 4: {
51cabdff1aSopenharmony_ci        uint32_t v32 = v * 0x01010101;
52cabdff1aSopenharmony_ci        do {
53cabdff1aSopenharmony_ci            AV_WN32A(ptr, v32);
54cabdff1aSopenharmony_ci            ptr += stride;
55cabdff1aSopenharmony_ci        } while (--h);
56cabdff1aSopenharmony_ci        break;
57cabdff1aSopenharmony_ci    }
58cabdff1aSopenharmony_ci    case 8: {
59cabdff1aSopenharmony_ci#if HAVE_FAST_64BIT
60cabdff1aSopenharmony_ci        uint64_t v64 = v * 0x0101010101010101ULL;
61cabdff1aSopenharmony_ci        do {
62cabdff1aSopenharmony_ci            AV_WN64A(ptr, v64);
63cabdff1aSopenharmony_ci            ptr += stride;
64cabdff1aSopenharmony_ci        } while (--h);
65cabdff1aSopenharmony_ci#else
66cabdff1aSopenharmony_ci        uint32_t v32 = v * 0x01010101;
67cabdff1aSopenharmony_ci        do {
68cabdff1aSopenharmony_ci            AV_WN32A(ptr,     v32);
69cabdff1aSopenharmony_ci            AV_WN32A(ptr + 4, v32);
70cabdff1aSopenharmony_ci            ptr += stride;
71cabdff1aSopenharmony_ci        } while (--h);
72cabdff1aSopenharmony_ci#endif
73cabdff1aSopenharmony_ci        break;
74cabdff1aSopenharmony_ci    }
75cabdff1aSopenharmony_ci    }
76cabdff1aSopenharmony_ci}
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_cistatic void decode_mode(VP9TileData *td)
79cabdff1aSopenharmony_ci{
80cabdff1aSopenharmony_ci    static const uint8_t left_ctx[N_BS_SIZES] = {
81cabdff1aSopenharmony_ci        0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
82cabdff1aSopenharmony_ci    };
83cabdff1aSopenharmony_ci    static const uint8_t above_ctx[N_BS_SIZES] = {
84cabdff1aSopenharmony_ci        0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
85cabdff1aSopenharmony_ci    };
86cabdff1aSopenharmony_ci    static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
87cabdff1aSopenharmony_ci        TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
88cabdff1aSopenharmony_ci        TX_16X16, TX_8X8,   TX_8X8,   TX_8X8,   TX_4X4,   TX_4X4,  TX_4X4
89cabdff1aSopenharmony_ci    };
90cabdff1aSopenharmony_ci    VP9Context *s = td->s;
91cabdff1aSopenharmony_ci    VP9Block *b = td->b;
92cabdff1aSopenharmony_ci    int row = td->row, col = td->col, row7 = td->row7;
93cabdff1aSopenharmony_ci    enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
94cabdff1aSopenharmony_ci    int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
95cabdff1aSopenharmony_ci    int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
96cabdff1aSopenharmony_ci    int have_a = row > 0, have_l = col > td->tile_col_start;
97cabdff1aSopenharmony_ci    int vref, filter_id;
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_ci    if (!s->s.h.segmentation.enabled) {
100cabdff1aSopenharmony_ci        b->seg_id = 0;
101cabdff1aSopenharmony_ci    } else if (s->s.h.keyframe || s->s.h.intraonly) {
102cabdff1aSopenharmony_ci        b->seg_id = !s->s.h.segmentation.update_map ? 0 :
103cabdff1aSopenharmony_ci                    vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree, s->s.h.segmentation.prob);
104cabdff1aSopenharmony_ci    } else if (!s->s.h.segmentation.update_map ||
105cabdff1aSopenharmony_ci               (s->s.h.segmentation.temporal &&
106cabdff1aSopenharmony_ci                vp56_rac_get_prob_branchy(td->c,
107cabdff1aSopenharmony_ci                    s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
108cabdff1aSopenharmony_ci                                    td->left_segpred_ctx[row7]]))) {
109cabdff1aSopenharmony_ci        if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
110cabdff1aSopenharmony_ci            int pred = 8, x;
111cabdff1aSopenharmony_ci            uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
112cabdff1aSopenharmony_ci
113cabdff1aSopenharmony_ci            if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
114cabdff1aSopenharmony_ci                ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
115cabdff1aSopenharmony_ci            for (y = 0; y < h4; y++) {
116cabdff1aSopenharmony_ci                int idx_base = (y + row) * 8 * s->sb_cols + col;
117cabdff1aSopenharmony_ci                for (x = 0; x < w4; x++)
118cabdff1aSopenharmony_ci                    pred = FFMIN(pred, refsegmap[idx_base + x]);
119cabdff1aSopenharmony_ci            }
120cabdff1aSopenharmony_ci            av_assert1(pred < 8);
121cabdff1aSopenharmony_ci            b->seg_id = pred;
122cabdff1aSopenharmony_ci        } else {
123cabdff1aSopenharmony_ci            b->seg_id = 0;
124cabdff1aSopenharmony_ci        }
125cabdff1aSopenharmony_ci
126cabdff1aSopenharmony_ci        memset(&s->above_segpred_ctx[col], 1, w4);
127cabdff1aSopenharmony_ci        memset(&td->left_segpred_ctx[row7], 1, h4);
128cabdff1aSopenharmony_ci    } else {
129cabdff1aSopenharmony_ci        b->seg_id = vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree,
130cabdff1aSopenharmony_ci                                     s->s.h.segmentation.prob);
131cabdff1aSopenharmony_ci
132cabdff1aSopenharmony_ci        memset(&s->above_segpred_ctx[col], 0, w4);
133cabdff1aSopenharmony_ci        memset(&td->left_segpred_ctx[row7], 0, h4);
134cabdff1aSopenharmony_ci    }
135cabdff1aSopenharmony_ci    if (s->s.h.segmentation.enabled &&
136cabdff1aSopenharmony_ci        (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
137cabdff1aSopenharmony_ci        setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
138cabdff1aSopenharmony_ci                  bw4, bh4, 8 * s->sb_cols, b->seg_id);
139cabdff1aSopenharmony_ci    }
140cabdff1aSopenharmony_ci
141cabdff1aSopenharmony_ci    b->skip = s->s.h.segmentation.enabled &&
142cabdff1aSopenharmony_ci        s->s.h.segmentation.feat[b->seg_id].skip_enabled;
143cabdff1aSopenharmony_ci    if (!b->skip) {
144cabdff1aSopenharmony_ci        int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col];
145cabdff1aSopenharmony_ci        b->skip = vp56_rac_get_prob(td->c, s->prob.p.skip[c]);
146cabdff1aSopenharmony_ci        td->counts.skip[c][b->skip]++;
147cabdff1aSopenharmony_ci    }
148cabdff1aSopenharmony_ci
149cabdff1aSopenharmony_ci    if (s->s.h.keyframe || s->s.h.intraonly) {
150cabdff1aSopenharmony_ci        b->intra = 1;
151cabdff1aSopenharmony_ci    } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
152cabdff1aSopenharmony_ci        b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
153cabdff1aSopenharmony_ci    } else {
154cabdff1aSopenharmony_ci        int c, bit;
155cabdff1aSopenharmony_ci
156cabdff1aSopenharmony_ci        if (have_a && have_l) {
157cabdff1aSopenharmony_ci            c = s->above_intra_ctx[col] + td->left_intra_ctx[row7];
158cabdff1aSopenharmony_ci            c += (c == 2);
159cabdff1aSopenharmony_ci        } else {
160cabdff1aSopenharmony_ci            c = have_a ? 2 * s->above_intra_ctx[col] :
161cabdff1aSopenharmony_ci                have_l ? 2 * td->left_intra_ctx[row7] : 0;
162cabdff1aSopenharmony_ci        }
163cabdff1aSopenharmony_ci        bit = vp56_rac_get_prob(td->c, s->prob.p.intra[c]);
164cabdff1aSopenharmony_ci        td->counts.intra[c][bit]++;
165cabdff1aSopenharmony_ci        b->intra = !bit;
166cabdff1aSopenharmony_ci    }
167cabdff1aSopenharmony_ci
168cabdff1aSopenharmony_ci    if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
169cabdff1aSopenharmony_ci        int c;
170cabdff1aSopenharmony_ci        if (have_a) {
171cabdff1aSopenharmony_ci            if (have_l) {
172cabdff1aSopenharmony_ci                c = (s->above_skip_ctx[col] ? max_tx :
173cabdff1aSopenharmony_ci                     s->above_txfm_ctx[col]) +
174cabdff1aSopenharmony_ci                    (td->left_skip_ctx[row7] ? max_tx :
175cabdff1aSopenharmony_ci                     td->left_txfm_ctx[row7]) > max_tx;
176cabdff1aSopenharmony_ci            } else {
177cabdff1aSopenharmony_ci                c = s->above_skip_ctx[col] ? 1 :
178cabdff1aSopenharmony_ci                    (s->above_txfm_ctx[col] * 2 > max_tx);
179cabdff1aSopenharmony_ci            }
180cabdff1aSopenharmony_ci        } else if (have_l) {
181cabdff1aSopenharmony_ci            c = td->left_skip_ctx[row7] ? 1 :
182cabdff1aSopenharmony_ci                (td->left_txfm_ctx[row7] * 2 > max_tx);
183cabdff1aSopenharmony_ci        } else {
184cabdff1aSopenharmony_ci            c = 1;
185cabdff1aSopenharmony_ci        }
186cabdff1aSopenharmony_ci        switch (max_tx) {
187cabdff1aSopenharmony_ci        case TX_32X32:
188cabdff1aSopenharmony_ci            b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][0]);
189cabdff1aSopenharmony_ci            if (b->tx) {
190cabdff1aSopenharmony_ci                b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][1]);
191cabdff1aSopenharmony_ci                if (b->tx == 2)
192cabdff1aSopenharmony_ci                    b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][2]);
193cabdff1aSopenharmony_ci            }
194cabdff1aSopenharmony_ci            td->counts.tx32p[c][b->tx]++;
195cabdff1aSopenharmony_ci            break;
196cabdff1aSopenharmony_ci        case TX_16X16:
197cabdff1aSopenharmony_ci            b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][0]);
198cabdff1aSopenharmony_ci            if (b->tx)
199cabdff1aSopenharmony_ci                b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][1]);
200cabdff1aSopenharmony_ci            td->counts.tx16p[c][b->tx]++;
201cabdff1aSopenharmony_ci            break;
202cabdff1aSopenharmony_ci        case TX_8X8:
203cabdff1aSopenharmony_ci            b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx8p[c]);
204cabdff1aSopenharmony_ci            td->counts.tx8p[c][b->tx]++;
205cabdff1aSopenharmony_ci            break;
206cabdff1aSopenharmony_ci        case TX_4X4:
207cabdff1aSopenharmony_ci            b->tx = TX_4X4;
208cabdff1aSopenharmony_ci            break;
209cabdff1aSopenharmony_ci        }
210cabdff1aSopenharmony_ci    } else {
211cabdff1aSopenharmony_ci        b->tx = FFMIN(max_tx, s->s.h.txfmmode);
212cabdff1aSopenharmony_ci    }
213cabdff1aSopenharmony_ci
214cabdff1aSopenharmony_ci    if (s->s.h.keyframe || s->s.h.intraonly) {
215cabdff1aSopenharmony_ci        uint8_t *a = &s->above_mode_ctx[col * 2];
216cabdff1aSopenharmony_ci        uint8_t *l = &td->left_mode_ctx[(row7) << 1];
217cabdff1aSopenharmony_ci
218cabdff1aSopenharmony_ci        b->comp = 0;
219cabdff1aSopenharmony_ci        if (b->bs > BS_8x8) {
220cabdff1aSopenharmony_ci            // FIXME the memory storage intermediates here aren't really
221cabdff1aSopenharmony_ci            // necessary, they're just there to make the code slightly
222cabdff1aSopenharmony_ci            // simpler for now
223cabdff1aSopenharmony_ci            b->mode[0] =
224cabdff1aSopenharmony_ci            a[0]       = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
225cabdff1aSopenharmony_ci                                          ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
226cabdff1aSopenharmony_ci            if (b->bs != BS_8x4) {
227cabdff1aSopenharmony_ci                b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
228cabdff1aSopenharmony_ci                                              ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
229cabdff1aSopenharmony_ci                l[0]       =
230cabdff1aSopenharmony_ci                a[1]       = b->mode[1];
231cabdff1aSopenharmony_ci            } else {
232cabdff1aSopenharmony_ci                l[0]       =
233cabdff1aSopenharmony_ci                a[1]       =
234cabdff1aSopenharmony_ci                b->mode[1] = b->mode[0];
235cabdff1aSopenharmony_ci            }
236cabdff1aSopenharmony_ci            if (b->bs != BS_4x8) {
237cabdff1aSopenharmony_ci                b->mode[2] =
238cabdff1aSopenharmony_ci                a[0]       = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
239cabdff1aSopenharmony_ci                                              ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
240cabdff1aSopenharmony_ci                if (b->bs != BS_8x4) {
241cabdff1aSopenharmony_ci                    b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
242cabdff1aSopenharmony_ci                                                  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
243cabdff1aSopenharmony_ci                    l[1]       =
244cabdff1aSopenharmony_ci                    a[1]       = b->mode[3];
245cabdff1aSopenharmony_ci                } else {
246cabdff1aSopenharmony_ci                    l[1]       =
247cabdff1aSopenharmony_ci                    a[1]       =
248cabdff1aSopenharmony_ci                    b->mode[3] = b->mode[2];
249cabdff1aSopenharmony_ci                }
250cabdff1aSopenharmony_ci            } else {
251cabdff1aSopenharmony_ci                b->mode[2] = b->mode[0];
252cabdff1aSopenharmony_ci                l[1]       =
253cabdff1aSopenharmony_ci                a[1]       =
254cabdff1aSopenharmony_ci                b->mode[3] = b->mode[1];
255cabdff1aSopenharmony_ci            }
256cabdff1aSopenharmony_ci        } else {
257cabdff1aSopenharmony_ci            b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
258cabdff1aSopenharmony_ci                                          ff_vp9_default_kf_ymode_probs[*a][*l]);
259cabdff1aSopenharmony_ci            b->mode[3] =
260cabdff1aSopenharmony_ci            b->mode[2] =
261cabdff1aSopenharmony_ci            b->mode[1] = b->mode[0];
262cabdff1aSopenharmony_ci            // FIXME this can probably be optimized
263cabdff1aSopenharmony_ci            memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
264cabdff1aSopenharmony_ci            memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
265cabdff1aSopenharmony_ci        }
266cabdff1aSopenharmony_ci        b->uvmode = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
267cabdff1aSopenharmony_ci                                     ff_vp9_default_kf_uvmode_probs[b->mode[3]]);
268cabdff1aSopenharmony_ci    } else if (b->intra) {
269cabdff1aSopenharmony_ci        b->comp = 0;
270cabdff1aSopenharmony_ci        if (b->bs > BS_8x8) {
271cabdff1aSopenharmony_ci            b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
272cabdff1aSopenharmony_ci                                          s->prob.p.y_mode[0]);
273cabdff1aSopenharmony_ci            td->counts.y_mode[0][b->mode[0]]++;
274cabdff1aSopenharmony_ci            if (b->bs != BS_8x4) {
275cabdff1aSopenharmony_ci                b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
276cabdff1aSopenharmony_ci                                              s->prob.p.y_mode[0]);
277cabdff1aSopenharmony_ci                td->counts.y_mode[0][b->mode[1]]++;
278cabdff1aSopenharmony_ci            } else {
279cabdff1aSopenharmony_ci                b->mode[1] = b->mode[0];
280cabdff1aSopenharmony_ci            }
281cabdff1aSopenharmony_ci            if (b->bs != BS_4x8) {
282cabdff1aSopenharmony_ci                b->mode[2] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
283cabdff1aSopenharmony_ci                                              s->prob.p.y_mode[0]);
284cabdff1aSopenharmony_ci                td->counts.y_mode[0][b->mode[2]]++;
285cabdff1aSopenharmony_ci                if (b->bs != BS_8x4) {
286cabdff1aSopenharmony_ci                    b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
287cabdff1aSopenharmony_ci                                                  s->prob.p.y_mode[0]);
288cabdff1aSopenharmony_ci                    td->counts.y_mode[0][b->mode[3]]++;
289cabdff1aSopenharmony_ci                } else {
290cabdff1aSopenharmony_ci                    b->mode[3] = b->mode[2];
291cabdff1aSopenharmony_ci                }
292cabdff1aSopenharmony_ci            } else {
293cabdff1aSopenharmony_ci                b->mode[2] = b->mode[0];
294cabdff1aSopenharmony_ci                b->mode[3] = b->mode[1];
295cabdff1aSopenharmony_ci            }
296cabdff1aSopenharmony_ci        } else {
297cabdff1aSopenharmony_ci            static const uint8_t size_group[10] = {
298cabdff1aSopenharmony_ci                3, 3, 3, 3, 2, 2, 2, 1, 1, 1
299cabdff1aSopenharmony_ci            };
300cabdff1aSopenharmony_ci            int sz = size_group[b->bs];
301cabdff1aSopenharmony_ci
302cabdff1aSopenharmony_ci            b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
303cabdff1aSopenharmony_ci                                          s->prob.p.y_mode[sz]);
304cabdff1aSopenharmony_ci            b->mode[1] =
305cabdff1aSopenharmony_ci            b->mode[2] =
306cabdff1aSopenharmony_ci            b->mode[3] = b->mode[0];
307cabdff1aSopenharmony_ci            td->counts.y_mode[sz][b->mode[3]]++;
308cabdff1aSopenharmony_ci        }
309cabdff1aSopenharmony_ci        b->uvmode = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree,
310cabdff1aSopenharmony_ci                                     s->prob.p.uv_mode[b->mode[3]]);
311cabdff1aSopenharmony_ci        td->counts.uv_mode[b->mode[3]][b->uvmode]++;
312cabdff1aSopenharmony_ci    } else {
313cabdff1aSopenharmony_ci        static const uint8_t inter_mode_ctx_lut[14][14] = {
314cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
315cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
316cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
317cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
318cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
319cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
320cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
321cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
322cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
323cabdff1aSopenharmony_ci            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
324cabdff1aSopenharmony_ci            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
325cabdff1aSopenharmony_ci            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
326cabdff1aSopenharmony_ci            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
327cabdff1aSopenharmony_ci            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
328cabdff1aSopenharmony_ci        };
329cabdff1aSopenharmony_ci
330cabdff1aSopenharmony_ci        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
331cabdff1aSopenharmony_ci            av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
332cabdff1aSopenharmony_ci            b->comp = 0;
333cabdff1aSopenharmony_ci            b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
334cabdff1aSopenharmony_ci        } else {
335cabdff1aSopenharmony_ci            // read comp_pred flag
336cabdff1aSopenharmony_ci            if (s->s.h.comppredmode != PRED_SWITCHABLE) {
337cabdff1aSopenharmony_ci                b->comp = s->s.h.comppredmode == PRED_COMPREF;
338cabdff1aSopenharmony_ci            } else {
339cabdff1aSopenharmony_ci                int c;
340cabdff1aSopenharmony_ci
341cabdff1aSopenharmony_ci                // FIXME add intra as ref=0xff (or -1) to make these easier?
342cabdff1aSopenharmony_ci                if (have_a) {
343cabdff1aSopenharmony_ci                    if (have_l) {
344cabdff1aSopenharmony_ci                        if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) {
345cabdff1aSopenharmony_ci                            c = 4;
346cabdff1aSopenharmony_ci                        } else if (s->above_comp_ctx[col]) {
347cabdff1aSopenharmony_ci                            c = 2 + (td->left_intra_ctx[row7] ||
348cabdff1aSopenharmony_ci                                     td->left_ref_ctx[row7] == s->s.h.fixcompref);
349cabdff1aSopenharmony_ci                        } else if (td->left_comp_ctx[row7]) {
350cabdff1aSopenharmony_ci                            c = 2 + (s->above_intra_ctx[col] ||
351cabdff1aSopenharmony_ci                                     s->above_ref_ctx[col] == s->s.h.fixcompref);
352cabdff1aSopenharmony_ci                        } else {
353cabdff1aSopenharmony_ci                            c = (!s->above_intra_ctx[col] &&
354cabdff1aSopenharmony_ci                                 s->above_ref_ctx[col] == s->s.h.fixcompref) ^
355cabdff1aSopenharmony_ci                                (!td->left_intra_ctx[row7] &&
356cabdff1aSopenharmony_ci                                 td->left_ref_ctx[row & 7] == s->s.h.fixcompref);
357cabdff1aSopenharmony_ci                        }
358cabdff1aSopenharmony_ci                    } else {
359cabdff1aSopenharmony_ci                        c = s->above_comp_ctx[col] ? 3 :
360cabdff1aSopenharmony_ci                        (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
361cabdff1aSopenharmony_ci                    }
362cabdff1aSopenharmony_ci                } else if (have_l) {
363cabdff1aSopenharmony_ci                    c = td->left_comp_ctx[row7] ? 3 :
364cabdff1aSopenharmony_ci                    (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref);
365cabdff1aSopenharmony_ci                } else {
366cabdff1aSopenharmony_ci                    c = 1;
367cabdff1aSopenharmony_ci                }
368cabdff1aSopenharmony_ci                b->comp = vp56_rac_get_prob(td->c, s->prob.p.comp[c]);
369cabdff1aSopenharmony_ci                td->counts.comp[c][b->comp]++;
370cabdff1aSopenharmony_ci            }
371cabdff1aSopenharmony_ci
372cabdff1aSopenharmony_ci            // read actual references
373cabdff1aSopenharmony_ci            // FIXME probably cache a few variables here to prevent repetitive
374cabdff1aSopenharmony_ci            // memory accesses below
375cabdff1aSopenharmony_ci            if (b->comp) { /* two references */
376cabdff1aSopenharmony_ci                int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
377cabdff1aSopenharmony_ci
378cabdff1aSopenharmony_ci                b->ref[fix_idx] = s->s.h.fixcompref;
379cabdff1aSopenharmony_ci                // FIXME can this codeblob be replaced by some sort of LUT?
380cabdff1aSopenharmony_ci                if (have_a) {
381cabdff1aSopenharmony_ci                    if (have_l) {
382cabdff1aSopenharmony_ci                        if (s->above_intra_ctx[col]) {
383cabdff1aSopenharmony_ci                            if (td->left_intra_ctx[row7]) {
384cabdff1aSopenharmony_ci                                c = 2;
385cabdff1aSopenharmony_ci                            } else {
386cabdff1aSopenharmony_ci                                c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
387cabdff1aSopenharmony_ci                            }
388cabdff1aSopenharmony_ci                        } else if (td->left_intra_ctx[row7]) {
389cabdff1aSopenharmony_ci                            c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
390cabdff1aSopenharmony_ci                        } else {
391cabdff1aSopenharmony_ci                            int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
392cabdff1aSopenharmony_ci
393cabdff1aSopenharmony_ci                            if (refl == refa && refa == s->s.h.varcompref[1]) {
394cabdff1aSopenharmony_ci                                c = 0;
395cabdff1aSopenharmony_ci                            } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
396cabdff1aSopenharmony_ci                                if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
397cabdff1aSopenharmony_ci                                    (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
398cabdff1aSopenharmony_ci                                    c = 4;
399cabdff1aSopenharmony_ci                                } else {
400cabdff1aSopenharmony_ci                                    c = (refa == refl) ? 3 : 1;
401cabdff1aSopenharmony_ci                                }
402cabdff1aSopenharmony_ci                            } else if (!td->left_comp_ctx[row7]) {
403cabdff1aSopenharmony_ci                                if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
404cabdff1aSopenharmony_ci                                    c = 1;
405cabdff1aSopenharmony_ci                                } else {
406cabdff1aSopenharmony_ci                                    c = (refl == s->s.h.varcompref[1] &&
407cabdff1aSopenharmony_ci                                         refa != s->s.h.varcompref[1]) ? 2 : 4;
408cabdff1aSopenharmony_ci                                }
409cabdff1aSopenharmony_ci                            } else if (!s->above_comp_ctx[col]) {
410cabdff1aSopenharmony_ci                                if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
411cabdff1aSopenharmony_ci                                    c = 1;
412cabdff1aSopenharmony_ci                                } else {
413cabdff1aSopenharmony_ci                                    c = (refa == s->s.h.varcompref[1] &&
414cabdff1aSopenharmony_ci                                         refl != s->s.h.varcompref[1]) ? 2 : 4;
415cabdff1aSopenharmony_ci                                }
416cabdff1aSopenharmony_ci                            } else {
417cabdff1aSopenharmony_ci                                c = (refl == refa) ? 4 : 2;
418cabdff1aSopenharmony_ci                            }
419cabdff1aSopenharmony_ci                        }
420cabdff1aSopenharmony_ci                    } else {
421cabdff1aSopenharmony_ci                        if (s->above_intra_ctx[col]) {
422cabdff1aSopenharmony_ci                            c = 2;
423cabdff1aSopenharmony_ci                        } else if (s->above_comp_ctx[col]) {
424cabdff1aSopenharmony_ci                            c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
425cabdff1aSopenharmony_ci                        } else {
426cabdff1aSopenharmony_ci                            c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
427cabdff1aSopenharmony_ci                        }
428cabdff1aSopenharmony_ci                    }
429cabdff1aSopenharmony_ci                } else if (have_l) {
430cabdff1aSopenharmony_ci                    if (td->left_intra_ctx[row7]) {
431cabdff1aSopenharmony_ci                        c = 2;
432cabdff1aSopenharmony_ci                    } else if (td->left_comp_ctx[row7]) {
433cabdff1aSopenharmony_ci                        c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
434cabdff1aSopenharmony_ci                    } else {
435cabdff1aSopenharmony_ci                        c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
436cabdff1aSopenharmony_ci                    }
437cabdff1aSopenharmony_ci                } else {
438cabdff1aSopenharmony_ci                    c = 2;
439cabdff1aSopenharmony_ci                }
440cabdff1aSopenharmony_ci                bit = vp56_rac_get_prob(td->c, s->prob.p.comp_ref[c]);
441cabdff1aSopenharmony_ci                b->ref[var_idx] = s->s.h.varcompref[bit];
442cabdff1aSopenharmony_ci                td->counts.comp_ref[c][bit]++;
443cabdff1aSopenharmony_ci            } else /* single reference */ {
444cabdff1aSopenharmony_ci                int bit, c;
445cabdff1aSopenharmony_ci
446cabdff1aSopenharmony_ci                if (have_a && !s->above_intra_ctx[col]) {
447cabdff1aSopenharmony_ci                    if (have_l && !td->left_intra_ctx[row7]) {
448cabdff1aSopenharmony_ci                        if (td->left_comp_ctx[row7]) {
449cabdff1aSopenharmony_ci                            if (s->above_comp_ctx[col]) {
450cabdff1aSopenharmony_ci                                c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] ||
451cabdff1aSopenharmony_ci                                         !s->above_ref_ctx[col]);
452cabdff1aSopenharmony_ci                            } else {
453cabdff1aSopenharmony_ci                                c = (3 * !s->above_ref_ctx[col]) +
454cabdff1aSopenharmony_ci                                    (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
455cabdff1aSopenharmony_ci                            }
456cabdff1aSopenharmony_ci                        } else if (s->above_comp_ctx[col]) {
457cabdff1aSopenharmony_ci                            c = (3 * !td->left_ref_ctx[row7]) +
458cabdff1aSopenharmony_ci                                (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
459cabdff1aSopenharmony_ci                        } else {
460cabdff1aSopenharmony_ci                            c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
461cabdff1aSopenharmony_ci                        }
462cabdff1aSopenharmony_ci                    } else if (s->above_intra_ctx[col]) {
463cabdff1aSopenharmony_ci                        c = 2;
464cabdff1aSopenharmony_ci                    } else if (s->above_comp_ctx[col]) {
465cabdff1aSopenharmony_ci                        c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
466cabdff1aSopenharmony_ci                    } else {
467cabdff1aSopenharmony_ci                        c = 4 * (!s->above_ref_ctx[col]);
468cabdff1aSopenharmony_ci                    }
469cabdff1aSopenharmony_ci                } else if (have_l && !td->left_intra_ctx[row7]) {
470cabdff1aSopenharmony_ci                    if (td->left_intra_ctx[row7]) {
471cabdff1aSopenharmony_ci                        c = 2;
472cabdff1aSopenharmony_ci                    } else if (td->left_comp_ctx[row7]) {
473cabdff1aSopenharmony_ci                        c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
474cabdff1aSopenharmony_ci                    } else {
475cabdff1aSopenharmony_ci                        c = 4 * (!td->left_ref_ctx[row7]);
476cabdff1aSopenharmony_ci                    }
477cabdff1aSopenharmony_ci                } else {
478cabdff1aSopenharmony_ci                    c = 2;
479cabdff1aSopenharmony_ci                }
480cabdff1aSopenharmony_ci                bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][0]);
481cabdff1aSopenharmony_ci                td->counts.single_ref[c][0][bit]++;
482cabdff1aSopenharmony_ci                if (!bit) {
483cabdff1aSopenharmony_ci                    b->ref[0] = 0;
484cabdff1aSopenharmony_ci                } else {
485cabdff1aSopenharmony_ci                    // FIXME can this codeblob be replaced by some sort of LUT?
486cabdff1aSopenharmony_ci                    if (have_a) {
487cabdff1aSopenharmony_ci                        if (have_l) {
488cabdff1aSopenharmony_ci                            if (td->left_intra_ctx[row7]) {
489cabdff1aSopenharmony_ci                                if (s->above_intra_ctx[col]) {
490cabdff1aSopenharmony_ci                                    c = 2;
491cabdff1aSopenharmony_ci                                } else if (s->above_comp_ctx[col]) {
492cabdff1aSopenharmony_ci                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
493cabdff1aSopenharmony_ci                                                 s->above_ref_ctx[col] == 1);
494cabdff1aSopenharmony_ci                                } else if (!s->above_ref_ctx[col]) {
495cabdff1aSopenharmony_ci                                    c = 3;
496cabdff1aSopenharmony_ci                                } else {
497cabdff1aSopenharmony_ci                                    c = 4 * (s->above_ref_ctx[col] == 1);
498cabdff1aSopenharmony_ci                                }
499cabdff1aSopenharmony_ci                            } else if (s->above_intra_ctx[col]) {
500cabdff1aSopenharmony_ci                                if (td->left_intra_ctx[row7]) {
501cabdff1aSopenharmony_ci                                    c = 2;
502cabdff1aSopenharmony_ci                                } else if (td->left_comp_ctx[row7]) {
503cabdff1aSopenharmony_ci                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
504cabdff1aSopenharmony_ci                                                 td->left_ref_ctx[row7] == 1);
505cabdff1aSopenharmony_ci                                } else if (!td->left_ref_ctx[row7]) {
506cabdff1aSopenharmony_ci                                    c = 3;
507cabdff1aSopenharmony_ci                                } else {
508cabdff1aSopenharmony_ci                                    c = 4 * (td->left_ref_ctx[row7] == 1);
509cabdff1aSopenharmony_ci                                }
510cabdff1aSopenharmony_ci                            } else if (s->above_comp_ctx[col]) {
511cabdff1aSopenharmony_ci                                if (td->left_comp_ctx[row7]) {
512cabdff1aSopenharmony_ci                                    if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
513cabdff1aSopenharmony_ci                                        c = 3 * (s->s.h.fixcompref == 1 ||
514cabdff1aSopenharmony_ci                                                 td->left_ref_ctx[row7] == 1);
515cabdff1aSopenharmony_ci                                    } else {
516cabdff1aSopenharmony_ci                                        c = 2;
517cabdff1aSopenharmony_ci                                    }
518cabdff1aSopenharmony_ci                                } else if (!td->left_ref_ctx[row7]) {
519cabdff1aSopenharmony_ci                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
520cabdff1aSopenharmony_ci                                                 s->above_ref_ctx[col] == 1);
521cabdff1aSopenharmony_ci                                } else {
522cabdff1aSopenharmony_ci                                    c = 3 * (td->left_ref_ctx[row7] == 1) +
523cabdff1aSopenharmony_ci                                    (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
524cabdff1aSopenharmony_ci                                }
525cabdff1aSopenharmony_ci                            } else if (td->left_comp_ctx[row7]) {
526cabdff1aSopenharmony_ci                                if (!s->above_ref_ctx[col]) {
527cabdff1aSopenharmony_ci                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
528cabdff1aSopenharmony_ci                                                 td->left_ref_ctx[row7] == 1);
529cabdff1aSopenharmony_ci                                } else {
530cabdff1aSopenharmony_ci                                    c = 3 * (s->above_ref_ctx[col] == 1) +
531cabdff1aSopenharmony_ci                                    (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
532cabdff1aSopenharmony_ci                                }
533cabdff1aSopenharmony_ci                            } else if (!s->above_ref_ctx[col]) {
534cabdff1aSopenharmony_ci                                if (!td->left_ref_ctx[row7]) {
535cabdff1aSopenharmony_ci                                    c = 3;
536cabdff1aSopenharmony_ci                                } else {
537cabdff1aSopenharmony_ci                                    c = 4 * (td->left_ref_ctx[row7] == 1);
538cabdff1aSopenharmony_ci                                }
539cabdff1aSopenharmony_ci                            } else if (!td->left_ref_ctx[row7]) {
540cabdff1aSopenharmony_ci                                c = 4 * (s->above_ref_ctx[col] == 1);
541cabdff1aSopenharmony_ci                            } else {
542cabdff1aSopenharmony_ci                                c = 2 * (td->left_ref_ctx[row7] == 1) +
543cabdff1aSopenharmony_ci                                    2 * (s->above_ref_ctx[col] == 1);
544cabdff1aSopenharmony_ci                            }
545cabdff1aSopenharmony_ci                        } else {
546cabdff1aSopenharmony_ci                            if (s->above_intra_ctx[col] ||
547cabdff1aSopenharmony_ci                                (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
548cabdff1aSopenharmony_ci                                c = 2;
549cabdff1aSopenharmony_ci                            } else if (s->above_comp_ctx[col]) {
550cabdff1aSopenharmony_ci                                c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
551cabdff1aSopenharmony_ci                            } else {
552cabdff1aSopenharmony_ci                                c = 4 * (s->above_ref_ctx[col] == 1);
553cabdff1aSopenharmony_ci                            }
554cabdff1aSopenharmony_ci                        }
555cabdff1aSopenharmony_ci                    } else if (have_l) {
556cabdff1aSopenharmony_ci                        if (td->left_intra_ctx[row7] ||
557cabdff1aSopenharmony_ci                            (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) {
558cabdff1aSopenharmony_ci                            c = 2;
559cabdff1aSopenharmony_ci                        } else if (td->left_comp_ctx[row7]) {
560cabdff1aSopenharmony_ci                            c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
561cabdff1aSopenharmony_ci                        } else {
562cabdff1aSopenharmony_ci                            c = 4 * (td->left_ref_ctx[row7] == 1);
563cabdff1aSopenharmony_ci                        }
564cabdff1aSopenharmony_ci                    } else {
565cabdff1aSopenharmony_ci                        c = 2;
566cabdff1aSopenharmony_ci                    }
567cabdff1aSopenharmony_ci                    bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][1]);
568cabdff1aSopenharmony_ci                    td->counts.single_ref[c][1][bit]++;
569cabdff1aSopenharmony_ci                    b->ref[0] = 1 + bit;
570cabdff1aSopenharmony_ci                }
571cabdff1aSopenharmony_ci            }
572cabdff1aSopenharmony_ci        }
573cabdff1aSopenharmony_ci
574cabdff1aSopenharmony_ci        if (b->bs <= BS_8x8) {
575cabdff1aSopenharmony_ci            if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
576cabdff1aSopenharmony_ci                b->mode[0] =
577cabdff1aSopenharmony_ci                b->mode[1] =
578cabdff1aSopenharmony_ci                b->mode[2] =
579cabdff1aSopenharmony_ci                b->mode[3] = ZEROMV;
580cabdff1aSopenharmony_ci            } else {
581cabdff1aSopenharmony_ci                static const uint8_t off[10] = {
582cabdff1aSopenharmony_ci                    3, 0, 0, 1, 0, 0, 0, 0, 0, 0
583cabdff1aSopenharmony_ci                };
584cabdff1aSopenharmony_ci
585cabdff1aSopenharmony_ci                // FIXME this needs to use the LUT tables from find_ref_mvs
586cabdff1aSopenharmony_ci                // because not all are -1,0/0,-1
587cabdff1aSopenharmony_ci                int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
588cabdff1aSopenharmony_ci                                          [td->left_mode_ctx[row7 + off[b->bs]]];
589cabdff1aSopenharmony_ci
590cabdff1aSopenharmony_ci                b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
591cabdff1aSopenharmony_ci                                              s->prob.p.mv_mode[c]);
592cabdff1aSopenharmony_ci                b->mode[1] =
593cabdff1aSopenharmony_ci                b->mode[2] =
594cabdff1aSopenharmony_ci                b->mode[3] = b->mode[0];
595cabdff1aSopenharmony_ci                td->counts.mv_mode[c][b->mode[0] - 10]++;
596cabdff1aSopenharmony_ci            }
597cabdff1aSopenharmony_ci        }
598cabdff1aSopenharmony_ci
599cabdff1aSopenharmony_ci        if (s->s.h.filtermode == FILTER_SWITCHABLE) {
600cabdff1aSopenharmony_ci            int c;
601cabdff1aSopenharmony_ci
602cabdff1aSopenharmony_ci            if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
603cabdff1aSopenharmony_ci                if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
604cabdff1aSopenharmony_ci                    c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ?
605cabdff1aSopenharmony_ci                        td->left_filter_ctx[row7] : 3;
606cabdff1aSopenharmony_ci                } else {
607cabdff1aSopenharmony_ci                    c = s->above_filter_ctx[col];
608cabdff1aSopenharmony_ci                }
609cabdff1aSopenharmony_ci            } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
610cabdff1aSopenharmony_ci                c = td->left_filter_ctx[row7];
611cabdff1aSopenharmony_ci            } else {
612cabdff1aSopenharmony_ci                c = 3;
613cabdff1aSopenharmony_ci            }
614cabdff1aSopenharmony_ci
615cabdff1aSopenharmony_ci            filter_id = vp8_rac_get_tree(td->c, ff_vp9_filter_tree,
616cabdff1aSopenharmony_ci                                         s->prob.p.filter[c]);
617cabdff1aSopenharmony_ci            td->counts.filter[c][filter_id]++;
618cabdff1aSopenharmony_ci            b->filter = ff_vp9_filter_lut[filter_id];
619cabdff1aSopenharmony_ci        } else {
620cabdff1aSopenharmony_ci            b->filter = s->s.h.filtermode;
621cabdff1aSopenharmony_ci        }
622cabdff1aSopenharmony_ci
623cabdff1aSopenharmony_ci        if (b->bs > BS_8x8) {
624cabdff1aSopenharmony_ci            int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]];
625cabdff1aSopenharmony_ci
626cabdff1aSopenharmony_ci            b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
627cabdff1aSopenharmony_ci                                          s->prob.p.mv_mode[c]);
628cabdff1aSopenharmony_ci            td->counts.mv_mode[c][b->mode[0] - 10]++;
629cabdff1aSopenharmony_ci            ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0);
630cabdff1aSopenharmony_ci
631cabdff1aSopenharmony_ci            if (b->bs != BS_8x4) {
632cabdff1aSopenharmony_ci                b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
633cabdff1aSopenharmony_ci                                              s->prob.p.mv_mode[c]);
634cabdff1aSopenharmony_ci                td->counts.mv_mode[c][b->mode[1] - 10]++;
635cabdff1aSopenharmony_ci                ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1);
636cabdff1aSopenharmony_ci            } else {
637cabdff1aSopenharmony_ci                b->mode[1] = b->mode[0];
638cabdff1aSopenharmony_ci                AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
639cabdff1aSopenharmony_ci                AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
640cabdff1aSopenharmony_ci            }
641cabdff1aSopenharmony_ci
642cabdff1aSopenharmony_ci            if (b->bs != BS_4x8) {
643cabdff1aSopenharmony_ci                b->mode[2] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
644cabdff1aSopenharmony_ci                                              s->prob.p.mv_mode[c]);
645cabdff1aSopenharmony_ci                td->counts.mv_mode[c][b->mode[2] - 10]++;
646cabdff1aSopenharmony_ci                ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2);
647cabdff1aSopenharmony_ci
648cabdff1aSopenharmony_ci                if (b->bs != BS_8x4) {
649cabdff1aSopenharmony_ci                    b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree,
650cabdff1aSopenharmony_ci                                                  s->prob.p.mv_mode[c]);
651cabdff1aSopenharmony_ci                    td->counts.mv_mode[c][b->mode[3] - 10]++;
652cabdff1aSopenharmony_ci                    ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3);
653cabdff1aSopenharmony_ci                } else {
654cabdff1aSopenharmony_ci                    b->mode[3] = b->mode[2];
655cabdff1aSopenharmony_ci                    AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
656cabdff1aSopenharmony_ci                    AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
657cabdff1aSopenharmony_ci                }
658cabdff1aSopenharmony_ci            } else {
659cabdff1aSopenharmony_ci                b->mode[2] = b->mode[0];
660cabdff1aSopenharmony_ci                AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
661cabdff1aSopenharmony_ci                AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
662cabdff1aSopenharmony_ci                b->mode[3] = b->mode[1];
663cabdff1aSopenharmony_ci                AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
664cabdff1aSopenharmony_ci                AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
665cabdff1aSopenharmony_ci            }
666cabdff1aSopenharmony_ci        } else {
667cabdff1aSopenharmony_ci            ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1);
668cabdff1aSopenharmony_ci            AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
669cabdff1aSopenharmony_ci            AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
670cabdff1aSopenharmony_ci            AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
671cabdff1aSopenharmony_ci            AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
672cabdff1aSopenharmony_ci            AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
673cabdff1aSopenharmony_ci            AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
674cabdff1aSopenharmony_ci        }
675cabdff1aSopenharmony_ci
676cabdff1aSopenharmony_ci        vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
677cabdff1aSopenharmony_ci    }
678cabdff1aSopenharmony_ci
679cabdff1aSopenharmony_ci#if HAVE_FAST_64BIT
680cabdff1aSopenharmony_ci#define SPLAT_CTX(var, val, n) \
681cabdff1aSopenharmony_ci    switch (n) { \
682cabdff1aSopenharmony_ci    case 1:  var = val;                                    break; \
683cabdff1aSopenharmony_ci    case 2:  AV_WN16A(&var, val *             0x0101);     break; \
684cabdff1aSopenharmony_ci    case 4:  AV_WN32A(&var, val *         0x01010101);     break; \
685cabdff1aSopenharmony_ci    case 8:  AV_WN64A(&var, val * 0x0101010101010101ULL);  break; \
686cabdff1aSopenharmony_ci    case 16: { \
687cabdff1aSopenharmony_ci        uint64_t v64 = val * 0x0101010101010101ULL; \
688cabdff1aSopenharmony_ci        AV_WN64A(              &var,     v64); \
689cabdff1aSopenharmony_ci        AV_WN64A(&((uint8_t *) &var)[8], v64); \
690cabdff1aSopenharmony_ci        break; \
691cabdff1aSopenharmony_ci    } \
692cabdff1aSopenharmony_ci    }
693cabdff1aSopenharmony_ci#else
694cabdff1aSopenharmony_ci#define SPLAT_CTX(var, val, n) \
695cabdff1aSopenharmony_ci    switch (n) { \
696cabdff1aSopenharmony_ci    case 1:  var = val;                         break; \
697cabdff1aSopenharmony_ci    case 2:  AV_WN16A(&var, val *     0x0101);  break; \
698cabdff1aSopenharmony_ci    case 4:  AV_WN32A(&var, val * 0x01010101);  break; \
699cabdff1aSopenharmony_ci    case 8: { \
700cabdff1aSopenharmony_ci        uint32_t v32 = val * 0x01010101; \
701cabdff1aSopenharmony_ci        AV_WN32A(              &var,     v32); \
702cabdff1aSopenharmony_ci        AV_WN32A(&((uint8_t *) &var)[4], v32); \
703cabdff1aSopenharmony_ci        break; \
704cabdff1aSopenharmony_ci    } \
705cabdff1aSopenharmony_ci    case 16: { \
706cabdff1aSopenharmony_ci        uint32_t v32 = val * 0x01010101; \
707cabdff1aSopenharmony_ci        AV_WN32A(              &var,      v32); \
708cabdff1aSopenharmony_ci        AV_WN32A(&((uint8_t *) &var)[4],  v32); \
709cabdff1aSopenharmony_ci        AV_WN32A(&((uint8_t *) &var)[8],  v32); \
710cabdff1aSopenharmony_ci        AV_WN32A(&((uint8_t *) &var)[12], v32); \
711cabdff1aSopenharmony_ci        break; \
712cabdff1aSopenharmony_ci    } \
713cabdff1aSopenharmony_ci    }
714cabdff1aSopenharmony_ci#endif
715cabdff1aSopenharmony_ci
716cabdff1aSopenharmony_ci    switch (ff_vp9_bwh_tab[1][b->bs][0]) {
717cabdff1aSopenharmony_ci#define SET_CTXS(perf, dir, off, n) \
718cabdff1aSopenharmony_ci    do { \
719cabdff1aSopenharmony_ci        SPLAT_CTX(perf->dir##_skip_ctx[off],      b->skip,          n); \
720cabdff1aSopenharmony_ci        SPLAT_CTX(perf->dir##_txfm_ctx[off],      b->tx,            n); \
721cabdff1aSopenharmony_ci        SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
722cabdff1aSopenharmony_ci        if (!s->s.h.keyframe && !s->s.h.intraonly) { \
723cabdff1aSopenharmony_ci            SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra,   n); \
724cabdff1aSopenharmony_ci            SPLAT_CTX(perf->dir##_comp_ctx[off],  b->comp,    n); \
725cabdff1aSopenharmony_ci            SPLAT_CTX(perf->dir##_mode_ctx[off],  b->mode[3], n); \
726cabdff1aSopenharmony_ci            if (!b->intra) { \
727cabdff1aSopenharmony_ci                SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \
728cabdff1aSopenharmony_ci                if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
729cabdff1aSopenharmony_ci                    SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \
730cabdff1aSopenharmony_ci                } \
731cabdff1aSopenharmony_ci            } \
732cabdff1aSopenharmony_ci        } \
733cabdff1aSopenharmony_ci    } while (0)
734cabdff1aSopenharmony_ci    case 1: SET_CTXS(s, above, col, 1); break;
735cabdff1aSopenharmony_ci    case 2: SET_CTXS(s, above, col, 2); break;
736cabdff1aSopenharmony_ci    case 4: SET_CTXS(s, above, col, 4); break;
737cabdff1aSopenharmony_ci    case 8: SET_CTXS(s, above, col, 8); break;
738cabdff1aSopenharmony_ci    }
739cabdff1aSopenharmony_ci    switch (ff_vp9_bwh_tab[1][b->bs][1]) {
740cabdff1aSopenharmony_ci    case 1: SET_CTXS(td, left, row7, 1); break;
741cabdff1aSopenharmony_ci    case 2: SET_CTXS(td, left, row7, 2); break;
742cabdff1aSopenharmony_ci    case 4: SET_CTXS(td, left, row7, 4); break;
743cabdff1aSopenharmony_ci    case 8: SET_CTXS(td, left, row7, 8); break;
744cabdff1aSopenharmony_ci    }
745cabdff1aSopenharmony_ci#undef SPLAT_CTX
746cabdff1aSopenharmony_ci#undef SET_CTXS
747cabdff1aSopenharmony_ci
748cabdff1aSopenharmony_ci    if (!s->s.h.keyframe && !s->s.h.intraonly) {
749cabdff1aSopenharmony_ci        if (b->bs > BS_8x8) {
750cabdff1aSopenharmony_ci            int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
751cabdff1aSopenharmony_ci
752cabdff1aSopenharmony_ci            AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
753cabdff1aSopenharmony_ci            AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
754cabdff1aSopenharmony_ci            AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0);
755cabdff1aSopenharmony_ci            AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1);
756cabdff1aSopenharmony_ci            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
757cabdff1aSopenharmony_ci            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
758cabdff1aSopenharmony_ci            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
759cabdff1aSopenharmony_ci            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
760cabdff1aSopenharmony_ci        } else {
761cabdff1aSopenharmony_ci            int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
762cabdff1aSopenharmony_ci
763cabdff1aSopenharmony_ci            for (n = 0; n < w4 * 2; n++) {
764cabdff1aSopenharmony_ci                AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
765cabdff1aSopenharmony_ci                AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
766cabdff1aSopenharmony_ci            }
767cabdff1aSopenharmony_ci            for (n = 0; n < h4 * 2; n++) {
768cabdff1aSopenharmony_ci                AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0);
769cabdff1aSopenharmony_ci                AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1);
770cabdff1aSopenharmony_ci            }
771cabdff1aSopenharmony_ci        }
772cabdff1aSopenharmony_ci    }
773cabdff1aSopenharmony_ci
774cabdff1aSopenharmony_ci    // FIXME kinda ugly
775cabdff1aSopenharmony_ci    for (y = 0; y < h4; y++) {
776cabdff1aSopenharmony_ci        int x, o = (row + y) * s->sb_cols * 8 + col;
777cabdff1aSopenharmony_ci        VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
778cabdff1aSopenharmony_ci
779cabdff1aSopenharmony_ci        if (b->intra) {
780cabdff1aSopenharmony_ci            for (x = 0; x < w4; x++) {
781cabdff1aSopenharmony_ci                mv[x].ref[0] =
782cabdff1aSopenharmony_ci                mv[x].ref[1] = -1;
783cabdff1aSopenharmony_ci            }
784cabdff1aSopenharmony_ci        } else if (b->comp) {
785cabdff1aSopenharmony_ci            for (x = 0; x < w4; x++) {
786cabdff1aSopenharmony_ci                mv[x].ref[0] = b->ref[0];
787cabdff1aSopenharmony_ci                mv[x].ref[1] = b->ref[1];
788cabdff1aSopenharmony_ci                AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
789cabdff1aSopenharmony_ci                AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
790cabdff1aSopenharmony_ci            }
791cabdff1aSopenharmony_ci        } else {
792cabdff1aSopenharmony_ci            for (x = 0; x < w4; x++) {
793cabdff1aSopenharmony_ci                mv[x].ref[0] = b->ref[0];
794cabdff1aSopenharmony_ci                mv[x].ref[1] = -1;
795cabdff1aSopenharmony_ci                AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
796cabdff1aSopenharmony_ci            }
797cabdff1aSopenharmony_ci        }
798cabdff1aSopenharmony_ci    }
799cabdff1aSopenharmony_ci}
800cabdff1aSopenharmony_ci
801cabdff1aSopenharmony_ci// FIXME merge cnt/eob arguments?
802cabdff1aSopenharmony_cistatic av_always_inline int
803cabdff1aSopenharmony_cidecode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
804cabdff1aSopenharmony_ci                        int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
805cabdff1aSopenharmony_ci                        unsigned (*eob)[6][2], uint8_t (*p)[6][11],
806cabdff1aSopenharmony_ci                        int nnz, const int16_t *scan, const int16_t (*nb)[2],
807cabdff1aSopenharmony_ci                        const int16_t *band_counts, int16_t *qmul)
808cabdff1aSopenharmony_ci{
809cabdff1aSopenharmony_ci    int i = 0, band = 0, band_left = band_counts[band];
810cabdff1aSopenharmony_ci    const uint8_t *tp = p[0][nnz];
811cabdff1aSopenharmony_ci    uint8_t cache[1024];
812cabdff1aSopenharmony_ci
813cabdff1aSopenharmony_ci    do {
814cabdff1aSopenharmony_ci        int val, rc;
815cabdff1aSopenharmony_ci
816cabdff1aSopenharmony_ci        val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
817cabdff1aSopenharmony_ci        eob[band][nnz][val]++;
818cabdff1aSopenharmony_ci        if (!val)
819cabdff1aSopenharmony_ci            break;
820cabdff1aSopenharmony_ci
821cabdff1aSopenharmony_ciskip_eob:
822cabdff1aSopenharmony_ci        if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
823cabdff1aSopenharmony_ci            cnt[band][nnz][0]++;
824cabdff1aSopenharmony_ci            if (!--band_left)
825cabdff1aSopenharmony_ci                band_left = band_counts[++band];
826cabdff1aSopenharmony_ci            cache[scan[i]] = 0;
827cabdff1aSopenharmony_ci            nnz            = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
828cabdff1aSopenharmony_ci            tp             = p[band][nnz];
829cabdff1aSopenharmony_ci            if (++i == n_coeffs)
830cabdff1aSopenharmony_ci                break;  //invalid input; blocks should end with EOB
831cabdff1aSopenharmony_ci            goto skip_eob;
832cabdff1aSopenharmony_ci        }
833cabdff1aSopenharmony_ci
834cabdff1aSopenharmony_ci        rc = scan[i];
835cabdff1aSopenharmony_ci        if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
836cabdff1aSopenharmony_ci            cnt[band][nnz][1]++;
837cabdff1aSopenharmony_ci            val       = 1;
838cabdff1aSopenharmony_ci            cache[rc] = 1;
839cabdff1aSopenharmony_ci        } else {
840cabdff1aSopenharmony_ci            cnt[band][nnz][2]++;
841cabdff1aSopenharmony_ci            if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
842cabdff1aSopenharmony_ci                if (!vp56_rac_get_prob_branchy(c, tp[4])) {
843cabdff1aSopenharmony_ci                    cache[rc] = val = 2;
844cabdff1aSopenharmony_ci                } else {
845cabdff1aSopenharmony_ci                    val       = 3 + vp56_rac_get_prob(c, tp[5]);
846cabdff1aSopenharmony_ci                    cache[rc] = 3;
847cabdff1aSopenharmony_ci                }
848cabdff1aSopenharmony_ci            } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
849cabdff1aSopenharmony_ci                cache[rc] = 4;
850cabdff1aSopenharmony_ci                if (!vp56_rac_get_prob_branchy(c, tp[7])) {
851cabdff1aSopenharmony_ci                    val  =  vp56_rac_get_prob(c, 159) + 5;
852cabdff1aSopenharmony_ci                } else {
853cabdff1aSopenharmony_ci                    val  = (vp56_rac_get_prob(c, 165) << 1) + 7;
854cabdff1aSopenharmony_ci                    val +=  vp56_rac_get_prob(c, 145);
855cabdff1aSopenharmony_ci                }
856cabdff1aSopenharmony_ci            } else { // cat 3-6
857cabdff1aSopenharmony_ci                cache[rc] = 5;
858cabdff1aSopenharmony_ci                if (!vp56_rac_get_prob_branchy(c, tp[8])) {
859cabdff1aSopenharmony_ci                    if (!vp56_rac_get_prob_branchy(c, tp[9])) {
860cabdff1aSopenharmony_ci                        val  = 11 + (vp56_rac_get_prob(c, 173) << 2);
861cabdff1aSopenharmony_ci                        val +=      (vp56_rac_get_prob(c, 148) << 1);
862cabdff1aSopenharmony_ci                        val +=       vp56_rac_get_prob(c, 140);
863cabdff1aSopenharmony_ci                    } else {
864cabdff1aSopenharmony_ci                        val  = 19 + (vp56_rac_get_prob(c, 176) << 3);
865cabdff1aSopenharmony_ci                        val +=      (vp56_rac_get_prob(c, 155) << 2);
866cabdff1aSopenharmony_ci                        val +=      (vp56_rac_get_prob(c, 140) << 1);
867cabdff1aSopenharmony_ci                        val +=       vp56_rac_get_prob(c, 135);
868cabdff1aSopenharmony_ci                    }
869cabdff1aSopenharmony_ci                } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
870cabdff1aSopenharmony_ci                    val  = (vp56_rac_get_prob(c, 180) << 4) + 35;
871cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 157) << 3);
872cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 141) << 2);
873cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 134) << 1);
874cabdff1aSopenharmony_ci                    val +=  vp56_rac_get_prob(c, 130);
875cabdff1aSopenharmony_ci                } else {
876cabdff1aSopenharmony_ci                    val = 67;
877cabdff1aSopenharmony_ci                    if (!is8bitsperpixel) {
878cabdff1aSopenharmony_ci                        if (bpp == 12) {
879cabdff1aSopenharmony_ci                            val += vp56_rac_get_prob(c, 255) << 17;
880cabdff1aSopenharmony_ci                            val += vp56_rac_get_prob(c, 255) << 16;
881cabdff1aSopenharmony_ci                        }
882cabdff1aSopenharmony_ci                        val +=  (vp56_rac_get_prob(c, 255) << 15);
883cabdff1aSopenharmony_ci                        val +=  (vp56_rac_get_prob(c, 255) << 14);
884cabdff1aSopenharmony_ci                    }
885cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 254) << 13);
886cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 254) << 12);
887cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 254) << 11);
888cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 252) << 10);
889cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 249) << 9);
890cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 243) << 8);
891cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 230) << 7);
892cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 196) << 6);
893cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 177) << 5);
894cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 153) << 4);
895cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 140) << 3);
896cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 133) << 2);
897cabdff1aSopenharmony_ci                    val += (vp56_rac_get_prob(c, 130) << 1);
898cabdff1aSopenharmony_ci                    val +=  vp56_rac_get_prob(c, 129);
899cabdff1aSopenharmony_ci                }
900cabdff1aSopenharmony_ci            }
901cabdff1aSopenharmony_ci        }
902cabdff1aSopenharmony_ci#define STORE_COEF(c, i, v) do { \
903cabdff1aSopenharmony_ci    if (is8bitsperpixel) { \
904cabdff1aSopenharmony_ci        c[i] = v; \
905cabdff1aSopenharmony_ci    } else { \
906cabdff1aSopenharmony_ci        AV_WN32A(&c[i * 2], v); \
907cabdff1aSopenharmony_ci    } \
908cabdff1aSopenharmony_ci} while (0)
909cabdff1aSopenharmony_ci        if (!--band_left)
910cabdff1aSopenharmony_ci            band_left = band_counts[++band];
911cabdff1aSopenharmony_ci        if (is_tx32x32)
912cabdff1aSopenharmony_ci            STORE_COEF(coef, rc, (int)((vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2);
913cabdff1aSopenharmony_ci        else
914cabdff1aSopenharmony_ci            STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]);
915cabdff1aSopenharmony_ci        nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
916cabdff1aSopenharmony_ci        tp = p[band][nnz];
917cabdff1aSopenharmony_ci    } while (++i < n_coeffs);
918cabdff1aSopenharmony_ci
919cabdff1aSopenharmony_ci    return i;
920cabdff1aSopenharmony_ci}
921cabdff1aSopenharmony_ci
922cabdff1aSopenharmony_cistatic int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
923cabdff1aSopenharmony_ci                                unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
924cabdff1aSopenharmony_ci                                uint8_t (*p)[6][11], int nnz, const int16_t *scan,
925cabdff1aSopenharmony_ci                                const int16_t (*nb)[2], const int16_t *band_counts,
926cabdff1aSopenharmony_ci                                int16_t *qmul)
927cabdff1aSopenharmony_ci{
928cabdff1aSopenharmony_ci    return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
929cabdff1aSopenharmony_ci                                   nnz, scan, nb, band_counts, qmul);
930cabdff1aSopenharmony_ci}
931cabdff1aSopenharmony_ci
932cabdff1aSopenharmony_cistatic int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
933cabdff1aSopenharmony_ci                                  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
934cabdff1aSopenharmony_ci                                  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
935cabdff1aSopenharmony_ci                                  const int16_t (*nb)[2], const int16_t *band_counts,
936cabdff1aSopenharmony_ci                                  int16_t *qmul)
937cabdff1aSopenharmony_ci{
938cabdff1aSopenharmony_ci    return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
939cabdff1aSopenharmony_ci                                   nnz, scan, nb, band_counts, qmul);
940cabdff1aSopenharmony_ci}
941cabdff1aSopenharmony_ci
942cabdff1aSopenharmony_cistatic int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
943cabdff1aSopenharmony_ci                                 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
944cabdff1aSopenharmony_ci                                 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
945cabdff1aSopenharmony_ci                                 const int16_t (*nb)[2], const int16_t *band_counts,
946cabdff1aSopenharmony_ci                                 int16_t *qmul)
947cabdff1aSopenharmony_ci{
948cabdff1aSopenharmony_ci    return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p,
949cabdff1aSopenharmony_ci                                   nnz, scan, nb, band_counts, qmul);
950cabdff1aSopenharmony_ci}
951cabdff1aSopenharmony_ci
952cabdff1aSopenharmony_cistatic int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
953cabdff1aSopenharmony_ci                                   unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
954cabdff1aSopenharmony_ci                                   uint8_t (*p)[6][11], int nnz, const int16_t *scan,
955cabdff1aSopenharmony_ci                                   const int16_t (*nb)[2], const int16_t *band_counts,
956cabdff1aSopenharmony_ci                                   int16_t *qmul)
957cabdff1aSopenharmony_ci{
958cabdff1aSopenharmony_ci    return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p,
959cabdff1aSopenharmony_ci                                   nnz, scan, nb, band_counts, qmul);
960cabdff1aSopenharmony_ci}
961cabdff1aSopenharmony_ci
962cabdff1aSopenharmony_cistatic av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
963cabdff1aSopenharmony_ci{
964cabdff1aSopenharmony_ci    VP9Context *s = td->s;
965cabdff1aSopenharmony_ci    VP9Block *b = td->b;
966cabdff1aSopenharmony_ci    int row = td->row, col = td->col;
967cabdff1aSopenharmony_ci    uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
968cabdff1aSopenharmony_ci    unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra];
969cabdff1aSopenharmony_ci    unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra];
970cabdff1aSopenharmony_ci    int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1;
971cabdff1aSopenharmony_ci    int end_x = FFMIN(2 * (s->cols - col), w4);
972cabdff1aSopenharmony_ci    int end_y = FFMIN(2 * (s->rows - row), h4);
973cabdff1aSopenharmony_ci    int n, pl, x, y, ret;
974cabdff1aSopenharmony_ci    int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
975cabdff1aSopenharmony_ci    int tx = 4 * s->s.h.lossless + b->tx;
976cabdff1aSopenharmony_ci    const int16_t * const *yscans = ff_vp9_scans[tx];
977cabdff1aSopenharmony_ci    const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx];
978cabdff1aSopenharmony_ci    const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
979cabdff1aSopenharmony_ci    const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
980cabdff1aSopenharmony_ci    uint8_t *a = &s->above_y_nnz_ctx[col * 2];
981cabdff1aSopenharmony_ci    uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1];
982cabdff1aSopenharmony_ci    static const int16_t band_counts[4][8] = {
983cabdff1aSopenharmony_ci        { 1, 2, 3, 4,  3,   16 - 13 },
984cabdff1aSopenharmony_ci        { 1, 2, 3, 4, 11,   64 - 21 },
985cabdff1aSopenharmony_ci        { 1, 2, 3, 4, 11,  256 - 21 },
986cabdff1aSopenharmony_ci        { 1, 2, 3, 4, 11, 1024 - 21 },
987cabdff1aSopenharmony_ci    };
988cabdff1aSopenharmony_ci    const int16_t *y_band_counts = band_counts[b->tx];
989cabdff1aSopenharmony_ci    const int16_t *uv_band_counts = band_counts[b->uvtx];
990cabdff1aSopenharmony_ci    int bytesperpixel = is8bitsperpixel ? 1 : 2;
991cabdff1aSopenharmony_ci    int total_coeff = 0;
992cabdff1aSopenharmony_ci
993cabdff1aSopenharmony_ci#define MERGE(la, end, step, rd) \
994cabdff1aSopenharmony_ci    for (n = 0; n < end; n += step) \
995cabdff1aSopenharmony_ci        la[n] = !!rd(&la[n])
996cabdff1aSopenharmony_ci#define MERGE_CTX(step, rd) \
997cabdff1aSopenharmony_ci    do { \
998cabdff1aSopenharmony_ci        MERGE(l, end_y, step, rd); \
999cabdff1aSopenharmony_ci        MERGE(a, end_x, step, rd); \
1000cabdff1aSopenharmony_ci    } while (0)
1001cabdff1aSopenharmony_ci
1002cabdff1aSopenharmony_ci#define DECODE_Y_COEF_LOOP(step, mode_index, v) \
1003cabdff1aSopenharmony_ci    for (n = 0, y = 0; y < end_y; y += step) { \
1004cabdff1aSopenharmony_ci        for (x = 0; x < end_x; x += step, n += step * step) { \
1005cabdff1aSopenharmony_ci            enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \
1006cabdff1aSopenharmony_ci            ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1007cabdff1aSopenharmony_ci                                    (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \
1008cabdff1aSopenharmony_ci                                     c, e, p, a[x] + l[y], yscans[txtp], \
1009cabdff1aSopenharmony_ci                                     ynbs[txtp], y_band_counts, qmul[0]); \
1010cabdff1aSopenharmony_ci            a[x] = l[y] = !!ret; \
1011cabdff1aSopenharmony_ci            total_coeff |= !!ret; \
1012cabdff1aSopenharmony_ci            if (step >= 4) { \
1013cabdff1aSopenharmony_ci                AV_WN16A(&td->eob[n], ret); \
1014cabdff1aSopenharmony_ci            } else { \
1015cabdff1aSopenharmony_ci                td->eob[n] = ret; \
1016cabdff1aSopenharmony_ci            } \
1017cabdff1aSopenharmony_ci        } \
1018cabdff1aSopenharmony_ci    }
1019cabdff1aSopenharmony_ci
1020cabdff1aSopenharmony_ci#define SPLAT(la, end, step, cond) \
1021cabdff1aSopenharmony_ci    if (step == 2) { \
1022cabdff1aSopenharmony_ci        for (n = 1; n < end; n += step) \
1023cabdff1aSopenharmony_ci            la[n] = la[n - 1]; \
1024cabdff1aSopenharmony_ci    } else if (step == 4) { \
1025cabdff1aSopenharmony_ci        if (cond) { \
1026cabdff1aSopenharmony_ci            for (n = 0; n < end; n += step) \
1027cabdff1aSopenharmony_ci                AV_WN32A(&la[n], la[n] * 0x01010101); \
1028cabdff1aSopenharmony_ci        } else { \
1029cabdff1aSopenharmony_ci            for (n = 0; n < end; n += step) \
1030cabdff1aSopenharmony_ci                memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
1031cabdff1aSopenharmony_ci        } \
1032cabdff1aSopenharmony_ci    } else /* step == 8 */ { \
1033cabdff1aSopenharmony_ci        if (cond) { \
1034cabdff1aSopenharmony_ci            if (HAVE_FAST_64BIT) { \
1035cabdff1aSopenharmony_ci                for (n = 0; n < end; n += step) \
1036cabdff1aSopenharmony_ci                    AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
1037cabdff1aSopenharmony_ci            } else { \
1038cabdff1aSopenharmony_ci                for (n = 0; n < end; n += step) { \
1039cabdff1aSopenharmony_ci                    uint32_t v32 = la[n] * 0x01010101; \
1040cabdff1aSopenharmony_ci                    AV_WN32A(&la[n],     v32); \
1041cabdff1aSopenharmony_ci                    AV_WN32A(&la[n + 4], v32); \
1042cabdff1aSopenharmony_ci                } \
1043cabdff1aSopenharmony_ci            } \
1044cabdff1aSopenharmony_ci        } else { \
1045cabdff1aSopenharmony_ci            for (n = 0; n < end; n += step) \
1046cabdff1aSopenharmony_ci                memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
1047cabdff1aSopenharmony_ci        } \
1048cabdff1aSopenharmony_ci    }
1049cabdff1aSopenharmony_ci#define SPLAT_CTX(step) \
1050cabdff1aSopenharmony_ci    do { \
1051cabdff1aSopenharmony_ci        SPLAT(a, end_x, step, end_x == w4); \
1052cabdff1aSopenharmony_ci        SPLAT(l, end_y, step, end_y == h4); \
1053cabdff1aSopenharmony_ci    } while (0)
1054cabdff1aSopenharmony_ci
1055cabdff1aSopenharmony_ci    /* y tokens */
1056cabdff1aSopenharmony_ci    switch (b->tx) {
1057cabdff1aSopenharmony_ci    case TX_4X4:
1058cabdff1aSopenharmony_ci        DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
1059cabdff1aSopenharmony_ci        break;
1060cabdff1aSopenharmony_ci    case TX_8X8:
1061cabdff1aSopenharmony_ci        MERGE_CTX(2, AV_RN16A);
1062cabdff1aSopenharmony_ci        DECODE_Y_COEF_LOOP(2, 0,);
1063cabdff1aSopenharmony_ci        SPLAT_CTX(2);
1064cabdff1aSopenharmony_ci        break;
1065cabdff1aSopenharmony_ci    case TX_16X16:
1066cabdff1aSopenharmony_ci        MERGE_CTX(4, AV_RN32A);
1067cabdff1aSopenharmony_ci        DECODE_Y_COEF_LOOP(4, 0,);
1068cabdff1aSopenharmony_ci        SPLAT_CTX(4);
1069cabdff1aSopenharmony_ci        break;
1070cabdff1aSopenharmony_ci    case TX_32X32:
1071cabdff1aSopenharmony_ci        MERGE_CTX(8, AV_RN64A);
1072cabdff1aSopenharmony_ci        DECODE_Y_COEF_LOOP(8, 0, 32);
1073cabdff1aSopenharmony_ci        SPLAT_CTX(8);
1074cabdff1aSopenharmony_ci        break;
1075cabdff1aSopenharmony_ci    }
1076cabdff1aSopenharmony_ci
1077cabdff1aSopenharmony_ci#define DECODE_UV_COEF_LOOP(step, v) \
1078cabdff1aSopenharmony_ci    for (n = 0, y = 0; y < end_y; y += step) { \
1079cabdff1aSopenharmony_ci        for (x = 0; x < end_x; x += step, n += step * step) { \
1080cabdff1aSopenharmony_ci            ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1081cabdff1aSopenharmony_ci                                    (td, td->uvblock[pl] + 16 * n * bytesperpixel, \
1082cabdff1aSopenharmony_ci                                     16 * step * step, c, e, p, a[x] + l[y], \
1083cabdff1aSopenharmony_ci                                     uvscan, uvnb, uv_band_counts, qmul[1]); \
1084cabdff1aSopenharmony_ci            a[x] = l[y] = !!ret; \
1085cabdff1aSopenharmony_ci            total_coeff |= !!ret; \
1086cabdff1aSopenharmony_ci            if (step >= 4) { \
1087cabdff1aSopenharmony_ci                AV_WN16A(&td->uveob[pl][n], ret); \
1088cabdff1aSopenharmony_ci            } else { \
1089cabdff1aSopenharmony_ci                td->uveob[pl][n] = ret; \
1090cabdff1aSopenharmony_ci            } \
1091cabdff1aSopenharmony_ci        } \
1092cabdff1aSopenharmony_ci    }
1093cabdff1aSopenharmony_ci
1094cabdff1aSopenharmony_ci    p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1095cabdff1aSopenharmony_ci    c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1096cabdff1aSopenharmony_ci    e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1097cabdff1aSopenharmony_ci    w4 >>= s->ss_h;
1098cabdff1aSopenharmony_ci    end_x >>= s->ss_h;
1099cabdff1aSopenharmony_ci    h4 >>= s->ss_v;
1100cabdff1aSopenharmony_ci    end_y >>= s->ss_v;
1101cabdff1aSopenharmony_ci    for (pl = 0; pl < 2; pl++) {
1102cabdff1aSopenharmony_ci        a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
1103cabdff1aSopenharmony_ci        l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
1104cabdff1aSopenharmony_ci        switch (b->uvtx) {
1105cabdff1aSopenharmony_ci        case TX_4X4:
1106cabdff1aSopenharmony_ci            DECODE_UV_COEF_LOOP(1,);
1107cabdff1aSopenharmony_ci            break;
1108cabdff1aSopenharmony_ci        case TX_8X8:
1109cabdff1aSopenharmony_ci            MERGE_CTX(2, AV_RN16A);
1110cabdff1aSopenharmony_ci            DECODE_UV_COEF_LOOP(2,);
1111cabdff1aSopenharmony_ci            SPLAT_CTX(2);
1112cabdff1aSopenharmony_ci            break;
1113cabdff1aSopenharmony_ci        case TX_16X16:
1114cabdff1aSopenharmony_ci            MERGE_CTX(4, AV_RN32A);
1115cabdff1aSopenharmony_ci            DECODE_UV_COEF_LOOP(4,);
1116cabdff1aSopenharmony_ci            SPLAT_CTX(4);
1117cabdff1aSopenharmony_ci            break;
1118cabdff1aSopenharmony_ci        case TX_32X32:
1119cabdff1aSopenharmony_ci            MERGE_CTX(8, AV_RN64A);
1120cabdff1aSopenharmony_ci            DECODE_UV_COEF_LOOP(8, 32);
1121cabdff1aSopenharmony_ci            SPLAT_CTX(8);
1122cabdff1aSopenharmony_ci            break;
1123cabdff1aSopenharmony_ci        }
1124cabdff1aSopenharmony_ci    }
1125cabdff1aSopenharmony_ci
1126cabdff1aSopenharmony_ci    return total_coeff;
1127cabdff1aSopenharmony_ci}
1128cabdff1aSopenharmony_ci
1129cabdff1aSopenharmony_cistatic int decode_coeffs_8bpp(VP9TileData *td)
1130cabdff1aSopenharmony_ci{
1131cabdff1aSopenharmony_ci    return decode_coeffs(td, 1);
1132cabdff1aSopenharmony_ci}
1133cabdff1aSopenharmony_ci
1134cabdff1aSopenharmony_cistatic int decode_coeffs_16bpp(VP9TileData *td)
1135cabdff1aSopenharmony_ci{
1136cabdff1aSopenharmony_ci    return decode_coeffs(td, 0);
1137cabdff1aSopenharmony_ci}
1138cabdff1aSopenharmony_ci
1139cabdff1aSopenharmony_cistatic av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
1140cabdff1aSopenharmony_ci                                        int row_and_7, int col_and_7,
1141cabdff1aSopenharmony_ci                                        int w, int h, int col_end, int row_end,
1142cabdff1aSopenharmony_ci                                        enum TxfmMode tx, int skip_inter)
1143cabdff1aSopenharmony_ci{
1144cabdff1aSopenharmony_ci    static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
1145cabdff1aSopenharmony_ci    static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
1146cabdff1aSopenharmony_ci
1147cabdff1aSopenharmony_ci    // FIXME I'm pretty sure all loops can be replaced by a single LUT if
1148cabdff1aSopenharmony_ci    // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
1149cabdff1aSopenharmony_ci    // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
1150cabdff1aSopenharmony_ci    // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
1151cabdff1aSopenharmony_ci
1152cabdff1aSopenharmony_ci    // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
1153cabdff1aSopenharmony_ci    // edges. This means that for UV, we work on two subsampled blocks at
1154cabdff1aSopenharmony_ci    // a time, and we only use the topleft block's mode information to set
1155cabdff1aSopenharmony_ci    // things like block strength. Thus, for any block size smaller than
1156cabdff1aSopenharmony_ci    // 16x16, ignore the odd portion of the block.
1157cabdff1aSopenharmony_ci    if (tx == TX_4X4 && (ss_v | ss_h)) {
1158cabdff1aSopenharmony_ci        if (h == ss_v) {
1159cabdff1aSopenharmony_ci            if (row_and_7 & 1)
1160cabdff1aSopenharmony_ci                return;
1161cabdff1aSopenharmony_ci            if (!row_end)
1162cabdff1aSopenharmony_ci                h += 1;
1163cabdff1aSopenharmony_ci        }
1164cabdff1aSopenharmony_ci        if (w == ss_h) {
1165cabdff1aSopenharmony_ci            if (col_and_7 & 1)
1166cabdff1aSopenharmony_ci                return;
1167cabdff1aSopenharmony_ci            if (!col_end)
1168cabdff1aSopenharmony_ci                w += 1;
1169cabdff1aSopenharmony_ci        }
1170cabdff1aSopenharmony_ci    }
1171cabdff1aSopenharmony_ci
1172cabdff1aSopenharmony_ci    if (tx == TX_4X4 && !skip_inter) {
1173cabdff1aSopenharmony_ci        int t = 1 << col_and_7, m_col = (t << w) - t, y;
1174cabdff1aSopenharmony_ci        // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
1175cabdff1aSopenharmony_ci        int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
1176cabdff1aSopenharmony_ci
1177cabdff1aSopenharmony_ci        for (y = row_and_7; y < h + row_and_7; y++) {
1178cabdff1aSopenharmony_ci            int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
1179cabdff1aSopenharmony_ci
1180cabdff1aSopenharmony_ci            mask[0][y][1] |= m_row_8;
1181cabdff1aSopenharmony_ci            mask[0][y][2] |= m_row_4;
1182cabdff1aSopenharmony_ci            // for odd lines, if the odd col is not being filtered,
1183cabdff1aSopenharmony_ci            // skip odd row also:
1184cabdff1aSopenharmony_ci            // .---. <-- a
1185cabdff1aSopenharmony_ci            // |   |
1186cabdff1aSopenharmony_ci            // |___| <-- b
1187cabdff1aSopenharmony_ci            // ^   ^
1188cabdff1aSopenharmony_ci            // c   d
1189cabdff1aSopenharmony_ci            //
1190cabdff1aSopenharmony_ci            // if a/c are even row/col and b/d are odd, and d is skipped,
1191cabdff1aSopenharmony_ci            // e.g. right edge of size-66x66.webm, then skip b also (bug)
1192cabdff1aSopenharmony_ci            if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
1193cabdff1aSopenharmony_ci                mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
1194cabdff1aSopenharmony_ci            } else {
1195cabdff1aSopenharmony_ci                mask[1][y][col_mask_id] |= m_col;
1196cabdff1aSopenharmony_ci            }
1197cabdff1aSopenharmony_ci            if (!ss_h)
1198cabdff1aSopenharmony_ci                mask[0][y][3] |= m_col;
1199cabdff1aSopenharmony_ci            if (!ss_v) {
1200cabdff1aSopenharmony_ci                if (ss_h && (col_end & 1))
1201cabdff1aSopenharmony_ci                    mask[1][y][3] |= (t << (w - 1)) - t;
1202cabdff1aSopenharmony_ci                else
1203cabdff1aSopenharmony_ci                    mask[1][y][3] |= m_col;
1204cabdff1aSopenharmony_ci            }
1205cabdff1aSopenharmony_ci        }
1206cabdff1aSopenharmony_ci    } else {
1207cabdff1aSopenharmony_ci        int y, t = 1 << col_and_7, m_col = (t << w) - t;
1208cabdff1aSopenharmony_ci
1209cabdff1aSopenharmony_ci        if (!skip_inter) {
1210cabdff1aSopenharmony_ci            int mask_id = (tx == TX_8X8);
1211cabdff1aSopenharmony_ci            int l2 = tx + ss_h - 1, step1d;
1212cabdff1aSopenharmony_ci            static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
1213cabdff1aSopenharmony_ci            int m_row = m_col & masks[l2];
1214cabdff1aSopenharmony_ci
1215cabdff1aSopenharmony_ci            // at odd UV col/row edges tx16/tx32 loopfilter edges, force
1216cabdff1aSopenharmony_ci            // 8wd loopfilter to prevent going off the visible edge.
1217cabdff1aSopenharmony_ci            if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
1218cabdff1aSopenharmony_ci                int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
1219cabdff1aSopenharmony_ci                int m_row_8 = m_row - m_row_16;
1220cabdff1aSopenharmony_ci
1221cabdff1aSopenharmony_ci                for (y = row_and_7; y < h + row_and_7; y++) {
1222cabdff1aSopenharmony_ci                    mask[0][y][0] |= m_row_16;
1223cabdff1aSopenharmony_ci                    mask[0][y][1] |= m_row_8;
1224cabdff1aSopenharmony_ci                }
1225cabdff1aSopenharmony_ci            } else {
1226cabdff1aSopenharmony_ci                for (y = row_and_7; y < h + row_and_7; y++)
1227cabdff1aSopenharmony_ci                    mask[0][y][mask_id] |= m_row;
1228cabdff1aSopenharmony_ci            }
1229cabdff1aSopenharmony_ci
1230cabdff1aSopenharmony_ci            l2 = tx + ss_v - 1;
1231cabdff1aSopenharmony_ci            step1d = 1 << l2;
1232cabdff1aSopenharmony_ci            if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
1233cabdff1aSopenharmony_ci                for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
1234cabdff1aSopenharmony_ci                    mask[1][y][0] |= m_col;
1235cabdff1aSopenharmony_ci                if (y - row_and_7 == h - 1)
1236cabdff1aSopenharmony_ci                    mask[1][y][1] |= m_col;
1237cabdff1aSopenharmony_ci            } else {
1238cabdff1aSopenharmony_ci                for (y = row_and_7; y < h + row_and_7; y += step1d)
1239cabdff1aSopenharmony_ci                    mask[1][y][mask_id] |= m_col;
1240cabdff1aSopenharmony_ci            }
1241cabdff1aSopenharmony_ci        } else if (tx != TX_4X4) {
1242cabdff1aSopenharmony_ci            int mask_id;
1243cabdff1aSopenharmony_ci
1244cabdff1aSopenharmony_ci            mask_id = (tx == TX_8X8) || (h == ss_v);
1245cabdff1aSopenharmony_ci            mask[1][row_and_7][mask_id] |= m_col;
1246cabdff1aSopenharmony_ci            mask_id = (tx == TX_8X8) || (w == ss_h);
1247cabdff1aSopenharmony_ci            for (y = row_and_7; y < h + row_and_7; y++)
1248cabdff1aSopenharmony_ci                mask[0][y][mask_id] |= t;
1249cabdff1aSopenharmony_ci        } else {
1250cabdff1aSopenharmony_ci            int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
1251cabdff1aSopenharmony_ci
1252cabdff1aSopenharmony_ci            for (y = row_and_7; y < h + row_and_7; y++) {
1253cabdff1aSopenharmony_ci                mask[0][y][2] |= t4;
1254cabdff1aSopenharmony_ci                mask[0][y][1] |= t8;
1255cabdff1aSopenharmony_ci            }
1256cabdff1aSopenharmony_ci            mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
1257cabdff1aSopenharmony_ci        }
1258cabdff1aSopenharmony_ci    }
1259cabdff1aSopenharmony_ci}
1260cabdff1aSopenharmony_ci
1261cabdff1aSopenharmony_civoid ff_vp9_decode_block(VP9TileData *td, int row, int col,
1262cabdff1aSopenharmony_ci                         VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
1263cabdff1aSopenharmony_ci                         enum BlockLevel bl, enum BlockPartition bp)
1264cabdff1aSopenharmony_ci{
1265cabdff1aSopenharmony_ci    VP9Context *s = td->s;
1266cabdff1aSopenharmony_ci    VP9Block *b = td->b;
1267cabdff1aSopenharmony_ci    enum BlockSize bs = bl * 3 + bp;
1268cabdff1aSopenharmony_ci    int bytesperpixel = s->bytesperpixel;
1269cabdff1aSopenharmony_ci    int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl;
1270cabdff1aSopenharmony_ci    int emu[2];
1271cabdff1aSopenharmony_ci    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1272cabdff1aSopenharmony_ci
1273cabdff1aSopenharmony_ci    td->row = row;
1274cabdff1aSopenharmony_ci    td->row7 = row & 7;
1275cabdff1aSopenharmony_ci    td->col = col;
1276cabdff1aSopenharmony_ci    td->col7 = col & 7;
1277cabdff1aSopenharmony_ci
1278cabdff1aSopenharmony_ci    td->min_mv.x = -(128 + col * 64);
1279cabdff1aSopenharmony_ci    td->min_mv.y = -(128 + row * 64);
1280cabdff1aSopenharmony_ci    td->max_mv.x = 128 + (s->cols - col - w4) * 64;
1281cabdff1aSopenharmony_ci    td->max_mv.y = 128 + (s->rows - row - h4) * 64;
1282cabdff1aSopenharmony_ci
1283cabdff1aSopenharmony_ci    if (s->pass < 2) {
1284cabdff1aSopenharmony_ci        b->bs = bs;
1285cabdff1aSopenharmony_ci        b->bl = bl;
1286cabdff1aSopenharmony_ci        b->bp = bp;
1287cabdff1aSopenharmony_ci        decode_mode(td);
1288cabdff1aSopenharmony_ci        b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
1289cabdff1aSopenharmony_ci                           (s->ss_v && h4 * 2 == (1 << b->tx)));
1290cabdff1aSopenharmony_ci
1291cabdff1aSopenharmony_ci        if (td->block_structure) {
1292cabdff1aSopenharmony_ci            td->block_structure[td->nb_block_structure].row = row;
1293cabdff1aSopenharmony_ci            td->block_structure[td->nb_block_structure].col = col;
1294cabdff1aSopenharmony_ci            td->block_structure[td->nb_block_structure].block_size_idx_x = av_log2(w4);
1295cabdff1aSopenharmony_ci            td->block_structure[td->nb_block_structure].block_size_idx_y = av_log2(h4);
1296cabdff1aSopenharmony_ci            td->nb_block_structure++;
1297cabdff1aSopenharmony_ci        }
1298cabdff1aSopenharmony_ci
1299cabdff1aSopenharmony_ci        if (!b->skip) {
1300cabdff1aSopenharmony_ci            int has_coeffs;
1301cabdff1aSopenharmony_ci
1302cabdff1aSopenharmony_ci            if (bytesperpixel == 1) {
1303cabdff1aSopenharmony_ci                has_coeffs = decode_coeffs_8bpp(td);
1304cabdff1aSopenharmony_ci            } else {
1305cabdff1aSopenharmony_ci                has_coeffs = decode_coeffs_16bpp(td);
1306cabdff1aSopenharmony_ci            }
1307cabdff1aSopenharmony_ci            if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
1308cabdff1aSopenharmony_ci                b->skip = 1;
1309cabdff1aSopenharmony_ci                memset(&s->above_skip_ctx[col], 1, w4);
1310cabdff1aSopenharmony_ci                memset(&td->left_skip_ctx[td->row7], 1, h4);
1311cabdff1aSopenharmony_ci            }
1312cabdff1aSopenharmony_ci        } else {
1313cabdff1aSopenharmony_ci            int row7 = td->row7;
1314cabdff1aSopenharmony_ci
1315cabdff1aSopenharmony_ci#define SPLAT_ZERO_CTX(v, n) \
1316cabdff1aSopenharmony_ci    switch (n) { \
1317cabdff1aSopenharmony_ci    case 1:  v = 0;          break; \
1318cabdff1aSopenharmony_ci    case 2:  AV_ZERO16(&v);  break; \
1319cabdff1aSopenharmony_ci    case 4:  AV_ZERO32(&v);  break; \
1320cabdff1aSopenharmony_ci    case 8:  AV_ZERO64(&v);  break; \
1321cabdff1aSopenharmony_ci    case 16: AV_ZERO128(&v); break; \
1322cabdff1aSopenharmony_ci    }
1323cabdff1aSopenharmony_ci#define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
1324cabdff1aSopenharmony_ci    do { \
1325cabdff1aSopenharmony_ci        SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \
1326cabdff1aSopenharmony_ci        if (s->ss_##dir2) { \
1327cabdff1aSopenharmony_ci            SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \
1328cabdff1aSopenharmony_ci            SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \
1329cabdff1aSopenharmony_ci        } else { \
1330cabdff1aSopenharmony_ci            SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \
1331cabdff1aSopenharmony_ci            SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \
1332cabdff1aSopenharmony_ci        } \
1333cabdff1aSopenharmony_ci    } while (0)
1334cabdff1aSopenharmony_ci
1335cabdff1aSopenharmony_ci            switch (w4) {
1336cabdff1aSopenharmony_ci            case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break;
1337cabdff1aSopenharmony_ci            case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break;
1338cabdff1aSopenharmony_ci            case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break;
1339cabdff1aSopenharmony_ci            case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break;
1340cabdff1aSopenharmony_ci            }
1341cabdff1aSopenharmony_ci            switch (h4) {
1342cabdff1aSopenharmony_ci            case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break;
1343cabdff1aSopenharmony_ci            case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break;
1344cabdff1aSopenharmony_ci            case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break;
1345cabdff1aSopenharmony_ci            case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break;
1346cabdff1aSopenharmony_ci            }
1347cabdff1aSopenharmony_ci        }
1348cabdff1aSopenharmony_ci
1349cabdff1aSopenharmony_ci        if (s->pass == 1) {
1350cabdff1aSopenharmony_ci            s->td[0].b++;
1351cabdff1aSopenharmony_ci            s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1352cabdff1aSopenharmony_ci            s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1353cabdff1aSopenharmony_ci            s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1354cabdff1aSopenharmony_ci            s->td[0].eob += 4 * w4 * h4;
1355cabdff1aSopenharmony_ci            s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1356cabdff1aSopenharmony_ci            s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1357cabdff1aSopenharmony_ci
1358cabdff1aSopenharmony_ci            return;
1359cabdff1aSopenharmony_ci        }
1360cabdff1aSopenharmony_ci    }
1361cabdff1aSopenharmony_ci
1362cabdff1aSopenharmony_ci    // emulated overhangs if the stride of the target buffer can't hold. This
1363cabdff1aSopenharmony_ci    // makes it possible to support emu-edge and so on even if we have large block
1364cabdff1aSopenharmony_ci    // overhangs
1365cabdff1aSopenharmony_ci    emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
1366cabdff1aSopenharmony_ci             (row + h4) > s->rows;
1367cabdff1aSopenharmony_ci    emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
1368cabdff1aSopenharmony_ci             (row + h4) > s->rows;
1369cabdff1aSopenharmony_ci    if (emu[0]) {
1370cabdff1aSopenharmony_ci        td->dst[0] = td->tmp_y;
1371cabdff1aSopenharmony_ci        td->y_stride = 128;
1372cabdff1aSopenharmony_ci    } else {
1373cabdff1aSopenharmony_ci        td->dst[0] = f->data[0] + yoff;
1374cabdff1aSopenharmony_ci        td->y_stride = f->linesize[0];
1375cabdff1aSopenharmony_ci    }
1376cabdff1aSopenharmony_ci    if (emu[1]) {
1377cabdff1aSopenharmony_ci        td->dst[1] = td->tmp_uv[0];
1378cabdff1aSopenharmony_ci        td->dst[2] = td->tmp_uv[1];
1379cabdff1aSopenharmony_ci        td->uv_stride = 128;
1380cabdff1aSopenharmony_ci    } else {
1381cabdff1aSopenharmony_ci        td->dst[1] = f->data[1] + uvoff;
1382cabdff1aSopenharmony_ci        td->dst[2] = f->data[2] + uvoff;
1383cabdff1aSopenharmony_ci        td->uv_stride = f->linesize[1];
1384cabdff1aSopenharmony_ci    }
1385cabdff1aSopenharmony_ci    if (b->intra) {
1386cabdff1aSopenharmony_ci        if (s->s.h.bpp > 8) {
1387cabdff1aSopenharmony_ci            ff_vp9_intra_recon_16bpp(td, yoff, uvoff);
1388cabdff1aSopenharmony_ci        } else {
1389cabdff1aSopenharmony_ci            ff_vp9_intra_recon_8bpp(td, yoff, uvoff);
1390cabdff1aSopenharmony_ci        }
1391cabdff1aSopenharmony_ci    } else {
1392cabdff1aSopenharmony_ci        if (s->s.h.bpp > 8) {
1393cabdff1aSopenharmony_ci            ff_vp9_inter_recon_16bpp(td);
1394cabdff1aSopenharmony_ci        } else {
1395cabdff1aSopenharmony_ci            ff_vp9_inter_recon_8bpp(td);
1396cabdff1aSopenharmony_ci        }
1397cabdff1aSopenharmony_ci    }
1398cabdff1aSopenharmony_ci    if (emu[0]) {
1399cabdff1aSopenharmony_ci        int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
1400cabdff1aSopenharmony_ci
1401cabdff1aSopenharmony_ci        for (n = 0; o < w; n++) {
1402cabdff1aSopenharmony_ci            int bw = 64 >> n;
1403cabdff1aSopenharmony_ci
1404cabdff1aSopenharmony_ci            av_assert2(n <= 4);
1405cabdff1aSopenharmony_ci            if (w & bw) {
1406cabdff1aSopenharmony_ci                s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
1407cabdff1aSopenharmony_ci                                         td->tmp_y + o * bytesperpixel, 128, h, 0, 0);
1408cabdff1aSopenharmony_ci                o += bw;
1409cabdff1aSopenharmony_ci            }
1410cabdff1aSopenharmony_ci        }
1411cabdff1aSopenharmony_ci    }
1412cabdff1aSopenharmony_ci    if (emu[1]) {
1413cabdff1aSopenharmony_ci        int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
1414cabdff1aSopenharmony_ci        int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
1415cabdff1aSopenharmony_ci
1416cabdff1aSopenharmony_ci        for (n = s->ss_h; o < w; n++) {
1417cabdff1aSopenharmony_ci            int bw = 64 >> n;
1418cabdff1aSopenharmony_ci
1419cabdff1aSopenharmony_ci            av_assert2(n <= 4);
1420cabdff1aSopenharmony_ci            if (w & bw) {
1421cabdff1aSopenharmony_ci                s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
1422cabdff1aSopenharmony_ci                                         td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
1423cabdff1aSopenharmony_ci                s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
1424cabdff1aSopenharmony_ci                                         td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
1425cabdff1aSopenharmony_ci                o += bw;
1426cabdff1aSopenharmony_ci            }
1427cabdff1aSopenharmony_ci        }
1428cabdff1aSopenharmony_ci    }
1429cabdff1aSopenharmony_ci
1430cabdff1aSopenharmony_ci    // pick filter level and find edges to apply filter to
1431cabdff1aSopenharmony_ci    if (s->s.h.filter.level &&
1432cabdff1aSopenharmony_ci        (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
1433cabdff1aSopenharmony_ci                                                      [b->mode[3] != ZEROMV]) > 0) {
1434cabdff1aSopenharmony_ci        int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
1435cabdff1aSopenharmony_ci        int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7;
1436cabdff1aSopenharmony_ci
1437cabdff1aSopenharmony_ci        setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
1438cabdff1aSopenharmony_ci        mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
1439cabdff1aSopenharmony_ci        if (s->ss_h || s->ss_v)
1440cabdff1aSopenharmony_ci            mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
1441cabdff1aSopenharmony_ci                       s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
1442cabdff1aSopenharmony_ci                       s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
1443cabdff1aSopenharmony_ci                       b->uvtx, skip_inter);
1444cabdff1aSopenharmony_ci    }
1445cabdff1aSopenharmony_ci
1446cabdff1aSopenharmony_ci    if (s->pass == 2) {
1447cabdff1aSopenharmony_ci        s->td[0].b++;
1448cabdff1aSopenharmony_ci        s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1449cabdff1aSopenharmony_ci        s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1450cabdff1aSopenharmony_ci        s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1451cabdff1aSopenharmony_ci        s->td[0].eob += 4 * w4 * h4;
1452cabdff1aSopenharmony_ci        s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1453cabdff1aSopenharmony_ci        s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1454cabdff1aSopenharmony_ci    }
1455cabdff1aSopenharmony_ci}
1456