1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * VP9 compatible video decoder 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> 5cabdff1aSopenharmony_ci * Copyright (C) 2013 Clément Bœsch <u pkh me> 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * This file is part of FFmpeg. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci * Lesser General Public License for more details. 18cabdff1aSopenharmony_ci * 19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "libavutil/avassert.h" 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#include "threadframe.h" 27cabdff1aSopenharmony_ci#include "vp56.h" 28cabdff1aSopenharmony_ci#include "vp9.h" 29cabdff1aSopenharmony_ci#include "vp9data.h" 30cabdff1aSopenharmony_ci#include "vp9dec.h" 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_cistatic av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, 33cabdff1aSopenharmony_ci ptrdiff_t stride, int v) 34cabdff1aSopenharmony_ci{ 35cabdff1aSopenharmony_ci switch (w) { 36cabdff1aSopenharmony_ci case 1: 37cabdff1aSopenharmony_ci do { 38cabdff1aSopenharmony_ci *ptr = v; 39cabdff1aSopenharmony_ci ptr += stride; 40cabdff1aSopenharmony_ci } while (--h); 41cabdff1aSopenharmony_ci break; 42cabdff1aSopenharmony_ci case 2: { 43cabdff1aSopenharmony_ci int v16 = v * 0x0101; 44cabdff1aSopenharmony_ci do { 45cabdff1aSopenharmony_ci AV_WN16A(ptr, v16); 46cabdff1aSopenharmony_ci ptr += stride; 47cabdff1aSopenharmony_ci } while (--h); 48cabdff1aSopenharmony_ci break; 49cabdff1aSopenharmony_ci } 50cabdff1aSopenharmony_ci case 4: { 51cabdff1aSopenharmony_ci uint32_t v32 = v * 0x01010101; 52cabdff1aSopenharmony_ci do { 53cabdff1aSopenharmony_ci AV_WN32A(ptr, v32); 54cabdff1aSopenharmony_ci ptr += stride; 55cabdff1aSopenharmony_ci } while (--h); 56cabdff1aSopenharmony_ci break; 57cabdff1aSopenharmony_ci } 58cabdff1aSopenharmony_ci case 8: { 59cabdff1aSopenharmony_ci#if HAVE_FAST_64BIT 60cabdff1aSopenharmony_ci uint64_t v64 = v * 0x0101010101010101ULL; 61cabdff1aSopenharmony_ci do { 62cabdff1aSopenharmony_ci AV_WN64A(ptr, v64); 63cabdff1aSopenharmony_ci ptr += stride; 64cabdff1aSopenharmony_ci } while (--h); 65cabdff1aSopenharmony_ci#else 66cabdff1aSopenharmony_ci uint32_t v32 = v * 0x01010101; 67cabdff1aSopenharmony_ci do { 68cabdff1aSopenharmony_ci AV_WN32A(ptr, v32); 69cabdff1aSopenharmony_ci AV_WN32A(ptr + 4, v32); 70cabdff1aSopenharmony_ci ptr += stride; 71cabdff1aSopenharmony_ci } while (--h); 72cabdff1aSopenharmony_ci#endif 73cabdff1aSopenharmony_ci break; 74cabdff1aSopenharmony_ci } 75cabdff1aSopenharmony_ci } 76cabdff1aSopenharmony_ci} 77cabdff1aSopenharmony_ci 78cabdff1aSopenharmony_cistatic void decode_mode(VP9TileData *td) 79cabdff1aSopenharmony_ci{ 80cabdff1aSopenharmony_ci static const uint8_t left_ctx[N_BS_SIZES] = { 81cabdff1aSopenharmony_ci 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf 82cabdff1aSopenharmony_ci }; 83cabdff1aSopenharmony_ci static const uint8_t above_ctx[N_BS_SIZES] = { 84cabdff1aSopenharmony_ci 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf 85cabdff1aSopenharmony_ci }; 86cabdff1aSopenharmony_ci static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = { 87cabdff1aSopenharmony_ci TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16, 88cabdff1aSopenharmony_ci TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4 89cabdff1aSopenharmony_ci }; 90cabdff1aSopenharmony_ci VP9Context *s = td->s; 91cabdff1aSopenharmony_ci VP9Block *b = td->b; 92cabdff1aSopenharmony_ci int row = td->row, col = td->col, row7 = td->row7; 93cabdff1aSopenharmony_ci enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs]; 94cabdff1aSopenharmony_ci int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4); 95cabdff1aSopenharmony_ci int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y; 96cabdff1aSopenharmony_ci int have_a = row > 0, have_l = col > td->tile_col_start; 97cabdff1aSopenharmony_ci int vref, filter_id; 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_ci if (!s->s.h.segmentation.enabled) { 100cabdff1aSopenharmony_ci b->seg_id = 0; 101cabdff1aSopenharmony_ci } else if (s->s.h.keyframe || s->s.h.intraonly) { 102cabdff1aSopenharmony_ci b->seg_id = !s->s.h.segmentation.update_map ? 0 : 103cabdff1aSopenharmony_ci vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree, s->s.h.segmentation.prob); 104cabdff1aSopenharmony_ci } else if (!s->s.h.segmentation.update_map || 105cabdff1aSopenharmony_ci (s->s.h.segmentation.temporal && 106cabdff1aSopenharmony_ci vp56_rac_get_prob_branchy(td->c, 107cabdff1aSopenharmony_ci s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] + 108cabdff1aSopenharmony_ci td->left_segpred_ctx[row7]]))) { 109cabdff1aSopenharmony_ci if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) { 110cabdff1aSopenharmony_ci int pred = 8, x; 111cabdff1aSopenharmony_ci uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map; 112cabdff1aSopenharmony_ci 113cabdff1aSopenharmony_ci if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass) 114cabdff1aSopenharmony_ci ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0); 115cabdff1aSopenharmony_ci for (y = 0; y < h4; y++) { 116cabdff1aSopenharmony_ci int idx_base = (y + row) * 8 * s->sb_cols + col; 117cabdff1aSopenharmony_ci for (x = 0; x < w4; x++) 118cabdff1aSopenharmony_ci pred = FFMIN(pred, refsegmap[idx_base + x]); 119cabdff1aSopenharmony_ci } 120cabdff1aSopenharmony_ci av_assert1(pred < 8); 121cabdff1aSopenharmony_ci b->seg_id = pred; 122cabdff1aSopenharmony_ci } else { 123cabdff1aSopenharmony_ci b->seg_id = 0; 124cabdff1aSopenharmony_ci } 125cabdff1aSopenharmony_ci 126cabdff1aSopenharmony_ci memset(&s->above_segpred_ctx[col], 1, w4); 127cabdff1aSopenharmony_ci memset(&td->left_segpred_ctx[row7], 1, h4); 128cabdff1aSopenharmony_ci } else { 129cabdff1aSopenharmony_ci b->seg_id = vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree, 130cabdff1aSopenharmony_ci s->s.h.segmentation.prob); 131cabdff1aSopenharmony_ci 132cabdff1aSopenharmony_ci memset(&s->above_segpred_ctx[col], 0, w4); 133cabdff1aSopenharmony_ci memset(&td->left_segpred_ctx[row7], 0, h4); 134cabdff1aSopenharmony_ci } 135cabdff1aSopenharmony_ci if (s->s.h.segmentation.enabled && 136cabdff1aSopenharmony_ci (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) { 137cabdff1aSopenharmony_ci setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col], 138cabdff1aSopenharmony_ci bw4, bh4, 8 * s->sb_cols, b->seg_id); 139cabdff1aSopenharmony_ci } 140cabdff1aSopenharmony_ci 141cabdff1aSopenharmony_ci b->skip = s->s.h.segmentation.enabled && 142cabdff1aSopenharmony_ci s->s.h.segmentation.feat[b->seg_id].skip_enabled; 143cabdff1aSopenharmony_ci if (!b->skip) { 144cabdff1aSopenharmony_ci int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col]; 145cabdff1aSopenharmony_ci b->skip = vp56_rac_get_prob(td->c, s->prob.p.skip[c]); 146cabdff1aSopenharmony_ci td->counts.skip[c][b->skip]++; 147cabdff1aSopenharmony_ci } 148cabdff1aSopenharmony_ci 149cabdff1aSopenharmony_ci if (s->s.h.keyframe || s->s.h.intraonly) { 150cabdff1aSopenharmony_ci b->intra = 1; 151cabdff1aSopenharmony_ci } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) { 152cabdff1aSopenharmony_ci b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val; 153cabdff1aSopenharmony_ci } else { 154cabdff1aSopenharmony_ci int c, bit; 155cabdff1aSopenharmony_ci 156cabdff1aSopenharmony_ci if (have_a && have_l) { 157cabdff1aSopenharmony_ci c = s->above_intra_ctx[col] + td->left_intra_ctx[row7]; 158cabdff1aSopenharmony_ci c += (c == 2); 159cabdff1aSopenharmony_ci } else { 160cabdff1aSopenharmony_ci c = have_a ? 2 * s->above_intra_ctx[col] : 161cabdff1aSopenharmony_ci have_l ? 2 * td->left_intra_ctx[row7] : 0; 162cabdff1aSopenharmony_ci } 163cabdff1aSopenharmony_ci bit = vp56_rac_get_prob(td->c, s->prob.p.intra[c]); 164cabdff1aSopenharmony_ci td->counts.intra[c][bit]++; 165cabdff1aSopenharmony_ci b->intra = !bit; 166cabdff1aSopenharmony_ci } 167cabdff1aSopenharmony_ci 168cabdff1aSopenharmony_ci if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) { 169cabdff1aSopenharmony_ci int c; 170cabdff1aSopenharmony_ci if (have_a) { 171cabdff1aSopenharmony_ci if (have_l) { 172cabdff1aSopenharmony_ci c = (s->above_skip_ctx[col] ? max_tx : 173cabdff1aSopenharmony_ci s->above_txfm_ctx[col]) + 174cabdff1aSopenharmony_ci (td->left_skip_ctx[row7] ? max_tx : 175cabdff1aSopenharmony_ci td->left_txfm_ctx[row7]) > max_tx; 176cabdff1aSopenharmony_ci } else { 177cabdff1aSopenharmony_ci c = s->above_skip_ctx[col] ? 1 : 178cabdff1aSopenharmony_ci (s->above_txfm_ctx[col] * 2 > max_tx); 179cabdff1aSopenharmony_ci } 180cabdff1aSopenharmony_ci } else if (have_l) { 181cabdff1aSopenharmony_ci c = td->left_skip_ctx[row7] ? 1 : 182cabdff1aSopenharmony_ci (td->left_txfm_ctx[row7] * 2 > max_tx); 183cabdff1aSopenharmony_ci } else { 184cabdff1aSopenharmony_ci c = 1; 185cabdff1aSopenharmony_ci } 186cabdff1aSopenharmony_ci switch (max_tx) { 187cabdff1aSopenharmony_ci case TX_32X32: 188cabdff1aSopenharmony_ci b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][0]); 189cabdff1aSopenharmony_ci if (b->tx) { 190cabdff1aSopenharmony_ci b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][1]); 191cabdff1aSopenharmony_ci if (b->tx == 2) 192cabdff1aSopenharmony_ci b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][2]); 193cabdff1aSopenharmony_ci } 194cabdff1aSopenharmony_ci td->counts.tx32p[c][b->tx]++; 195cabdff1aSopenharmony_ci break; 196cabdff1aSopenharmony_ci case TX_16X16: 197cabdff1aSopenharmony_ci b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][0]); 198cabdff1aSopenharmony_ci if (b->tx) 199cabdff1aSopenharmony_ci b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][1]); 200cabdff1aSopenharmony_ci td->counts.tx16p[c][b->tx]++; 201cabdff1aSopenharmony_ci break; 202cabdff1aSopenharmony_ci case TX_8X8: 203cabdff1aSopenharmony_ci b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx8p[c]); 204cabdff1aSopenharmony_ci td->counts.tx8p[c][b->tx]++; 205cabdff1aSopenharmony_ci break; 206cabdff1aSopenharmony_ci case TX_4X4: 207cabdff1aSopenharmony_ci b->tx = TX_4X4; 208cabdff1aSopenharmony_ci break; 209cabdff1aSopenharmony_ci } 210cabdff1aSopenharmony_ci } else { 211cabdff1aSopenharmony_ci b->tx = FFMIN(max_tx, s->s.h.txfmmode); 212cabdff1aSopenharmony_ci } 213cabdff1aSopenharmony_ci 214cabdff1aSopenharmony_ci if (s->s.h.keyframe || s->s.h.intraonly) { 215cabdff1aSopenharmony_ci uint8_t *a = &s->above_mode_ctx[col * 2]; 216cabdff1aSopenharmony_ci uint8_t *l = &td->left_mode_ctx[(row7) << 1]; 217cabdff1aSopenharmony_ci 218cabdff1aSopenharmony_ci b->comp = 0; 219cabdff1aSopenharmony_ci if (b->bs > BS_8x8) { 220cabdff1aSopenharmony_ci // FIXME the memory storage intermediates here aren't really 221cabdff1aSopenharmony_ci // necessary, they're just there to make the code slightly 222cabdff1aSopenharmony_ci // simpler for now 223cabdff1aSopenharmony_ci b->mode[0] = 224cabdff1aSopenharmony_ci a[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 225cabdff1aSopenharmony_ci ff_vp9_default_kf_ymode_probs[a[0]][l[0]]); 226cabdff1aSopenharmony_ci if (b->bs != BS_8x4) { 227cabdff1aSopenharmony_ci b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 228cabdff1aSopenharmony_ci ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]); 229cabdff1aSopenharmony_ci l[0] = 230cabdff1aSopenharmony_ci a[1] = b->mode[1]; 231cabdff1aSopenharmony_ci } else { 232cabdff1aSopenharmony_ci l[0] = 233cabdff1aSopenharmony_ci a[1] = 234cabdff1aSopenharmony_ci b->mode[1] = b->mode[0]; 235cabdff1aSopenharmony_ci } 236cabdff1aSopenharmony_ci if (b->bs != BS_4x8) { 237cabdff1aSopenharmony_ci b->mode[2] = 238cabdff1aSopenharmony_ci a[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 239cabdff1aSopenharmony_ci ff_vp9_default_kf_ymode_probs[a[0]][l[1]]); 240cabdff1aSopenharmony_ci if (b->bs != BS_8x4) { 241cabdff1aSopenharmony_ci b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 242cabdff1aSopenharmony_ci ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]); 243cabdff1aSopenharmony_ci l[1] = 244cabdff1aSopenharmony_ci a[1] = b->mode[3]; 245cabdff1aSopenharmony_ci } else { 246cabdff1aSopenharmony_ci l[1] = 247cabdff1aSopenharmony_ci a[1] = 248cabdff1aSopenharmony_ci b->mode[3] = b->mode[2]; 249cabdff1aSopenharmony_ci } 250cabdff1aSopenharmony_ci } else { 251cabdff1aSopenharmony_ci b->mode[2] = b->mode[0]; 252cabdff1aSopenharmony_ci l[1] = 253cabdff1aSopenharmony_ci a[1] = 254cabdff1aSopenharmony_ci b->mode[3] = b->mode[1]; 255cabdff1aSopenharmony_ci } 256cabdff1aSopenharmony_ci } else { 257cabdff1aSopenharmony_ci b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 258cabdff1aSopenharmony_ci ff_vp9_default_kf_ymode_probs[*a][*l]); 259cabdff1aSopenharmony_ci b->mode[3] = 260cabdff1aSopenharmony_ci b->mode[2] = 261cabdff1aSopenharmony_ci b->mode[1] = b->mode[0]; 262cabdff1aSopenharmony_ci // FIXME this can probably be optimized 263cabdff1aSopenharmony_ci memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]); 264cabdff1aSopenharmony_ci memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]); 265cabdff1aSopenharmony_ci } 266cabdff1aSopenharmony_ci b->uvmode = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 267cabdff1aSopenharmony_ci ff_vp9_default_kf_uvmode_probs[b->mode[3]]); 268cabdff1aSopenharmony_ci } else if (b->intra) { 269cabdff1aSopenharmony_ci b->comp = 0; 270cabdff1aSopenharmony_ci if (b->bs > BS_8x8) { 271cabdff1aSopenharmony_ci b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 272cabdff1aSopenharmony_ci s->prob.p.y_mode[0]); 273cabdff1aSopenharmony_ci td->counts.y_mode[0][b->mode[0]]++; 274cabdff1aSopenharmony_ci if (b->bs != BS_8x4) { 275cabdff1aSopenharmony_ci b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 276cabdff1aSopenharmony_ci s->prob.p.y_mode[0]); 277cabdff1aSopenharmony_ci td->counts.y_mode[0][b->mode[1]]++; 278cabdff1aSopenharmony_ci } else { 279cabdff1aSopenharmony_ci b->mode[1] = b->mode[0]; 280cabdff1aSopenharmony_ci } 281cabdff1aSopenharmony_ci if (b->bs != BS_4x8) { 282cabdff1aSopenharmony_ci b->mode[2] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 283cabdff1aSopenharmony_ci s->prob.p.y_mode[0]); 284cabdff1aSopenharmony_ci td->counts.y_mode[0][b->mode[2]]++; 285cabdff1aSopenharmony_ci if (b->bs != BS_8x4) { 286cabdff1aSopenharmony_ci b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 287cabdff1aSopenharmony_ci s->prob.p.y_mode[0]); 288cabdff1aSopenharmony_ci td->counts.y_mode[0][b->mode[3]]++; 289cabdff1aSopenharmony_ci } else { 290cabdff1aSopenharmony_ci b->mode[3] = b->mode[2]; 291cabdff1aSopenharmony_ci } 292cabdff1aSopenharmony_ci } else { 293cabdff1aSopenharmony_ci b->mode[2] = b->mode[0]; 294cabdff1aSopenharmony_ci b->mode[3] = b->mode[1]; 295cabdff1aSopenharmony_ci } 296cabdff1aSopenharmony_ci } else { 297cabdff1aSopenharmony_ci static const uint8_t size_group[10] = { 298cabdff1aSopenharmony_ci 3, 3, 3, 3, 2, 2, 2, 1, 1, 1 299cabdff1aSopenharmony_ci }; 300cabdff1aSopenharmony_ci int sz = size_group[b->bs]; 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 303cabdff1aSopenharmony_ci s->prob.p.y_mode[sz]); 304cabdff1aSopenharmony_ci b->mode[1] = 305cabdff1aSopenharmony_ci b->mode[2] = 306cabdff1aSopenharmony_ci b->mode[3] = b->mode[0]; 307cabdff1aSopenharmony_ci td->counts.y_mode[sz][b->mode[3]]++; 308cabdff1aSopenharmony_ci } 309cabdff1aSopenharmony_ci b->uvmode = vp8_rac_get_tree(td->c, ff_vp9_intramode_tree, 310cabdff1aSopenharmony_ci s->prob.p.uv_mode[b->mode[3]]); 311cabdff1aSopenharmony_ci td->counts.uv_mode[b->mode[3]][b->uvmode]++; 312cabdff1aSopenharmony_ci } else { 313cabdff1aSopenharmony_ci static const uint8_t inter_mode_ctx_lut[14][14] = { 314cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 315cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 316cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 317cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 318cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 319cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 320cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 321cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 322cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 323cabdff1aSopenharmony_ci { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 }, 324cabdff1aSopenharmony_ci { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 }, 325cabdff1aSopenharmony_ci { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 }, 326cabdff1aSopenharmony_ci { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 }, 327cabdff1aSopenharmony_ci { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 }, 328cabdff1aSopenharmony_ci }; 329cabdff1aSopenharmony_ci 330cabdff1aSopenharmony_ci if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) { 331cabdff1aSopenharmony_ci av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0); 332cabdff1aSopenharmony_ci b->comp = 0; 333cabdff1aSopenharmony_ci b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1; 334cabdff1aSopenharmony_ci } else { 335cabdff1aSopenharmony_ci // read comp_pred flag 336cabdff1aSopenharmony_ci if (s->s.h.comppredmode != PRED_SWITCHABLE) { 337cabdff1aSopenharmony_ci b->comp = s->s.h.comppredmode == PRED_COMPREF; 338cabdff1aSopenharmony_ci } else { 339cabdff1aSopenharmony_ci int c; 340cabdff1aSopenharmony_ci 341cabdff1aSopenharmony_ci // FIXME add intra as ref=0xff (or -1) to make these easier? 342cabdff1aSopenharmony_ci if (have_a) { 343cabdff1aSopenharmony_ci if (have_l) { 344cabdff1aSopenharmony_ci if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) { 345cabdff1aSopenharmony_ci c = 4; 346cabdff1aSopenharmony_ci } else if (s->above_comp_ctx[col]) { 347cabdff1aSopenharmony_ci c = 2 + (td->left_intra_ctx[row7] || 348cabdff1aSopenharmony_ci td->left_ref_ctx[row7] == s->s.h.fixcompref); 349cabdff1aSopenharmony_ci } else if (td->left_comp_ctx[row7]) { 350cabdff1aSopenharmony_ci c = 2 + (s->above_intra_ctx[col] || 351cabdff1aSopenharmony_ci s->above_ref_ctx[col] == s->s.h.fixcompref); 352cabdff1aSopenharmony_ci } else { 353cabdff1aSopenharmony_ci c = (!s->above_intra_ctx[col] && 354cabdff1aSopenharmony_ci s->above_ref_ctx[col] == s->s.h.fixcompref) ^ 355cabdff1aSopenharmony_ci (!td->left_intra_ctx[row7] && 356cabdff1aSopenharmony_ci td->left_ref_ctx[row & 7] == s->s.h.fixcompref); 357cabdff1aSopenharmony_ci } 358cabdff1aSopenharmony_ci } else { 359cabdff1aSopenharmony_ci c = s->above_comp_ctx[col] ? 3 : 360cabdff1aSopenharmony_ci (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref); 361cabdff1aSopenharmony_ci } 362cabdff1aSopenharmony_ci } else if (have_l) { 363cabdff1aSopenharmony_ci c = td->left_comp_ctx[row7] ? 3 : 364cabdff1aSopenharmony_ci (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref); 365cabdff1aSopenharmony_ci } else { 366cabdff1aSopenharmony_ci c = 1; 367cabdff1aSopenharmony_ci } 368cabdff1aSopenharmony_ci b->comp = vp56_rac_get_prob(td->c, s->prob.p.comp[c]); 369cabdff1aSopenharmony_ci td->counts.comp[c][b->comp]++; 370cabdff1aSopenharmony_ci } 371cabdff1aSopenharmony_ci 372cabdff1aSopenharmony_ci // read actual references 373cabdff1aSopenharmony_ci // FIXME probably cache a few variables here to prevent repetitive 374cabdff1aSopenharmony_ci // memory accesses below 375cabdff1aSopenharmony_ci if (b->comp) { /* two references */ 376cabdff1aSopenharmony_ci int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit; 377cabdff1aSopenharmony_ci 378cabdff1aSopenharmony_ci b->ref[fix_idx] = s->s.h.fixcompref; 379cabdff1aSopenharmony_ci // FIXME can this codeblob be replaced by some sort of LUT? 380cabdff1aSopenharmony_ci if (have_a) { 381cabdff1aSopenharmony_ci if (have_l) { 382cabdff1aSopenharmony_ci if (s->above_intra_ctx[col]) { 383cabdff1aSopenharmony_ci if (td->left_intra_ctx[row7]) { 384cabdff1aSopenharmony_ci c = 2; 385cabdff1aSopenharmony_ci } else { 386cabdff1aSopenharmony_ci c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]); 387cabdff1aSopenharmony_ci } 388cabdff1aSopenharmony_ci } else if (td->left_intra_ctx[row7]) { 389cabdff1aSopenharmony_ci c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); 390cabdff1aSopenharmony_ci } else { 391cabdff1aSopenharmony_ci int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col]; 392cabdff1aSopenharmony_ci 393cabdff1aSopenharmony_ci if (refl == refa && refa == s->s.h.varcompref[1]) { 394cabdff1aSopenharmony_ci c = 0; 395cabdff1aSopenharmony_ci } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) { 396cabdff1aSopenharmony_ci if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) || 397cabdff1aSopenharmony_ci (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) { 398cabdff1aSopenharmony_ci c = 4; 399cabdff1aSopenharmony_ci } else { 400cabdff1aSopenharmony_ci c = (refa == refl) ? 3 : 1; 401cabdff1aSopenharmony_ci } 402cabdff1aSopenharmony_ci } else if (!td->left_comp_ctx[row7]) { 403cabdff1aSopenharmony_ci if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) { 404cabdff1aSopenharmony_ci c = 1; 405cabdff1aSopenharmony_ci } else { 406cabdff1aSopenharmony_ci c = (refl == s->s.h.varcompref[1] && 407cabdff1aSopenharmony_ci refa != s->s.h.varcompref[1]) ? 2 : 4; 408cabdff1aSopenharmony_ci } 409cabdff1aSopenharmony_ci } else if (!s->above_comp_ctx[col]) { 410cabdff1aSopenharmony_ci if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) { 411cabdff1aSopenharmony_ci c = 1; 412cabdff1aSopenharmony_ci } else { 413cabdff1aSopenharmony_ci c = (refa == s->s.h.varcompref[1] && 414cabdff1aSopenharmony_ci refl != s->s.h.varcompref[1]) ? 2 : 4; 415cabdff1aSopenharmony_ci } 416cabdff1aSopenharmony_ci } else { 417cabdff1aSopenharmony_ci c = (refl == refa) ? 4 : 2; 418cabdff1aSopenharmony_ci } 419cabdff1aSopenharmony_ci } 420cabdff1aSopenharmony_ci } else { 421cabdff1aSopenharmony_ci if (s->above_intra_ctx[col]) { 422cabdff1aSopenharmony_ci c = 2; 423cabdff1aSopenharmony_ci } else if (s->above_comp_ctx[col]) { 424cabdff1aSopenharmony_ci c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); 425cabdff1aSopenharmony_ci } else { 426cabdff1aSopenharmony_ci c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]); 427cabdff1aSopenharmony_ci } 428cabdff1aSopenharmony_ci } 429cabdff1aSopenharmony_ci } else if (have_l) { 430cabdff1aSopenharmony_ci if (td->left_intra_ctx[row7]) { 431cabdff1aSopenharmony_ci c = 2; 432cabdff1aSopenharmony_ci } else if (td->left_comp_ctx[row7]) { 433cabdff1aSopenharmony_ci c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]); 434cabdff1aSopenharmony_ci } else { 435cabdff1aSopenharmony_ci c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]); 436cabdff1aSopenharmony_ci } 437cabdff1aSopenharmony_ci } else { 438cabdff1aSopenharmony_ci c = 2; 439cabdff1aSopenharmony_ci } 440cabdff1aSopenharmony_ci bit = vp56_rac_get_prob(td->c, s->prob.p.comp_ref[c]); 441cabdff1aSopenharmony_ci b->ref[var_idx] = s->s.h.varcompref[bit]; 442cabdff1aSopenharmony_ci td->counts.comp_ref[c][bit]++; 443cabdff1aSopenharmony_ci } else /* single reference */ { 444cabdff1aSopenharmony_ci int bit, c; 445cabdff1aSopenharmony_ci 446cabdff1aSopenharmony_ci if (have_a && !s->above_intra_ctx[col]) { 447cabdff1aSopenharmony_ci if (have_l && !td->left_intra_ctx[row7]) { 448cabdff1aSopenharmony_ci if (td->left_comp_ctx[row7]) { 449cabdff1aSopenharmony_ci if (s->above_comp_ctx[col]) { 450cabdff1aSopenharmony_ci c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] || 451cabdff1aSopenharmony_ci !s->above_ref_ctx[col]); 452cabdff1aSopenharmony_ci } else { 453cabdff1aSopenharmony_ci c = (3 * !s->above_ref_ctx[col]) + 454cabdff1aSopenharmony_ci (!s->s.h.fixcompref || !td->left_ref_ctx[row7]); 455cabdff1aSopenharmony_ci } 456cabdff1aSopenharmony_ci } else if (s->above_comp_ctx[col]) { 457cabdff1aSopenharmony_ci c = (3 * !td->left_ref_ctx[row7]) + 458cabdff1aSopenharmony_ci (!s->s.h.fixcompref || !s->above_ref_ctx[col]); 459cabdff1aSopenharmony_ci } else { 460cabdff1aSopenharmony_ci c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col]; 461cabdff1aSopenharmony_ci } 462cabdff1aSopenharmony_ci } else if (s->above_intra_ctx[col]) { 463cabdff1aSopenharmony_ci c = 2; 464cabdff1aSopenharmony_ci } else if (s->above_comp_ctx[col]) { 465cabdff1aSopenharmony_ci c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]); 466cabdff1aSopenharmony_ci } else { 467cabdff1aSopenharmony_ci c = 4 * (!s->above_ref_ctx[col]); 468cabdff1aSopenharmony_ci } 469cabdff1aSopenharmony_ci } else if (have_l && !td->left_intra_ctx[row7]) { 470cabdff1aSopenharmony_ci if (td->left_intra_ctx[row7]) { 471cabdff1aSopenharmony_ci c = 2; 472cabdff1aSopenharmony_ci } else if (td->left_comp_ctx[row7]) { 473cabdff1aSopenharmony_ci c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]); 474cabdff1aSopenharmony_ci } else { 475cabdff1aSopenharmony_ci c = 4 * (!td->left_ref_ctx[row7]); 476cabdff1aSopenharmony_ci } 477cabdff1aSopenharmony_ci } else { 478cabdff1aSopenharmony_ci c = 2; 479cabdff1aSopenharmony_ci } 480cabdff1aSopenharmony_ci bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][0]); 481cabdff1aSopenharmony_ci td->counts.single_ref[c][0][bit]++; 482cabdff1aSopenharmony_ci if (!bit) { 483cabdff1aSopenharmony_ci b->ref[0] = 0; 484cabdff1aSopenharmony_ci } else { 485cabdff1aSopenharmony_ci // FIXME can this codeblob be replaced by some sort of LUT? 486cabdff1aSopenharmony_ci if (have_a) { 487cabdff1aSopenharmony_ci if (have_l) { 488cabdff1aSopenharmony_ci if (td->left_intra_ctx[row7]) { 489cabdff1aSopenharmony_ci if (s->above_intra_ctx[col]) { 490cabdff1aSopenharmony_ci c = 2; 491cabdff1aSopenharmony_ci } else if (s->above_comp_ctx[col]) { 492cabdff1aSopenharmony_ci c = 1 + 2 * (s->s.h.fixcompref == 1 || 493cabdff1aSopenharmony_ci s->above_ref_ctx[col] == 1); 494cabdff1aSopenharmony_ci } else if (!s->above_ref_ctx[col]) { 495cabdff1aSopenharmony_ci c = 3; 496cabdff1aSopenharmony_ci } else { 497cabdff1aSopenharmony_ci c = 4 * (s->above_ref_ctx[col] == 1); 498cabdff1aSopenharmony_ci } 499cabdff1aSopenharmony_ci } else if (s->above_intra_ctx[col]) { 500cabdff1aSopenharmony_ci if (td->left_intra_ctx[row7]) { 501cabdff1aSopenharmony_ci c = 2; 502cabdff1aSopenharmony_ci } else if (td->left_comp_ctx[row7]) { 503cabdff1aSopenharmony_ci c = 1 + 2 * (s->s.h.fixcompref == 1 || 504cabdff1aSopenharmony_ci td->left_ref_ctx[row7] == 1); 505cabdff1aSopenharmony_ci } else if (!td->left_ref_ctx[row7]) { 506cabdff1aSopenharmony_ci c = 3; 507cabdff1aSopenharmony_ci } else { 508cabdff1aSopenharmony_ci c = 4 * (td->left_ref_ctx[row7] == 1); 509cabdff1aSopenharmony_ci } 510cabdff1aSopenharmony_ci } else if (s->above_comp_ctx[col]) { 511cabdff1aSopenharmony_ci if (td->left_comp_ctx[row7]) { 512cabdff1aSopenharmony_ci if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) { 513cabdff1aSopenharmony_ci c = 3 * (s->s.h.fixcompref == 1 || 514cabdff1aSopenharmony_ci td->left_ref_ctx[row7] == 1); 515cabdff1aSopenharmony_ci } else { 516cabdff1aSopenharmony_ci c = 2; 517cabdff1aSopenharmony_ci } 518cabdff1aSopenharmony_ci } else if (!td->left_ref_ctx[row7]) { 519cabdff1aSopenharmony_ci c = 1 + 2 * (s->s.h.fixcompref == 1 || 520cabdff1aSopenharmony_ci s->above_ref_ctx[col] == 1); 521cabdff1aSopenharmony_ci } else { 522cabdff1aSopenharmony_ci c = 3 * (td->left_ref_ctx[row7] == 1) + 523cabdff1aSopenharmony_ci (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1); 524cabdff1aSopenharmony_ci } 525cabdff1aSopenharmony_ci } else if (td->left_comp_ctx[row7]) { 526cabdff1aSopenharmony_ci if (!s->above_ref_ctx[col]) { 527cabdff1aSopenharmony_ci c = 1 + 2 * (s->s.h.fixcompref == 1 || 528cabdff1aSopenharmony_ci td->left_ref_ctx[row7] == 1); 529cabdff1aSopenharmony_ci } else { 530cabdff1aSopenharmony_ci c = 3 * (s->above_ref_ctx[col] == 1) + 531cabdff1aSopenharmony_ci (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1); 532cabdff1aSopenharmony_ci } 533cabdff1aSopenharmony_ci } else if (!s->above_ref_ctx[col]) { 534cabdff1aSopenharmony_ci if (!td->left_ref_ctx[row7]) { 535cabdff1aSopenharmony_ci c = 3; 536cabdff1aSopenharmony_ci } else { 537cabdff1aSopenharmony_ci c = 4 * (td->left_ref_ctx[row7] == 1); 538cabdff1aSopenharmony_ci } 539cabdff1aSopenharmony_ci } else if (!td->left_ref_ctx[row7]) { 540cabdff1aSopenharmony_ci c = 4 * (s->above_ref_ctx[col] == 1); 541cabdff1aSopenharmony_ci } else { 542cabdff1aSopenharmony_ci c = 2 * (td->left_ref_ctx[row7] == 1) + 543cabdff1aSopenharmony_ci 2 * (s->above_ref_ctx[col] == 1); 544cabdff1aSopenharmony_ci } 545cabdff1aSopenharmony_ci } else { 546cabdff1aSopenharmony_ci if (s->above_intra_ctx[col] || 547cabdff1aSopenharmony_ci (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) { 548cabdff1aSopenharmony_ci c = 2; 549cabdff1aSopenharmony_ci } else if (s->above_comp_ctx[col]) { 550cabdff1aSopenharmony_ci c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1); 551cabdff1aSopenharmony_ci } else { 552cabdff1aSopenharmony_ci c = 4 * (s->above_ref_ctx[col] == 1); 553cabdff1aSopenharmony_ci } 554cabdff1aSopenharmony_ci } 555cabdff1aSopenharmony_ci } else if (have_l) { 556cabdff1aSopenharmony_ci if (td->left_intra_ctx[row7] || 557cabdff1aSopenharmony_ci (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) { 558cabdff1aSopenharmony_ci c = 2; 559cabdff1aSopenharmony_ci } else if (td->left_comp_ctx[row7]) { 560cabdff1aSopenharmony_ci c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1); 561cabdff1aSopenharmony_ci } else { 562cabdff1aSopenharmony_ci c = 4 * (td->left_ref_ctx[row7] == 1); 563cabdff1aSopenharmony_ci } 564cabdff1aSopenharmony_ci } else { 565cabdff1aSopenharmony_ci c = 2; 566cabdff1aSopenharmony_ci } 567cabdff1aSopenharmony_ci bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][1]); 568cabdff1aSopenharmony_ci td->counts.single_ref[c][1][bit]++; 569cabdff1aSopenharmony_ci b->ref[0] = 1 + bit; 570cabdff1aSopenharmony_ci } 571cabdff1aSopenharmony_ci } 572cabdff1aSopenharmony_ci } 573cabdff1aSopenharmony_ci 574cabdff1aSopenharmony_ci if (b->bs <= BS_8x8) { 575cabdff1aSopenharmony_ci if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) { 576cabdff1aSopenharmony_ci b->mode[0] = 577cabdff1aSopenharmony_ci b->mode[1] = 578cabdff1aSopenharmony_ci b->mode[2] = 579cabdff1aSopenharmony_ci b->mode[3] = ZEROMV; 580cabdff1aSopenharmony_ci } else { 581cabdff1aSopenharmony_ci static const uint8_t off[10] = { 582cabdff1aSopenharmony_ci 3, 0, 0, 1, 0, 0, 0, 0, 0, 0 583cabdff1aSopenharmony_ci }; 584cabdff1aSopenharmony_ci 585cabdff1aSopenharmony_ci // FIXME this needs to use the LUT tables from find_ref_mvs 586cabdff1aSopenharmony_ci // because not all are -1,0/0,-1 587cabdff1aSopenharmony_ci int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]] 588cabdff1aSopenharmony_ci [td->left_mode_ctx[row7 + off[b->bs]]]; 589cabdff1aSopenharmony_ci 590cabdff1aSopenharmony_ci b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, 591cabdff1aSopenharmony_ci s->prob.p.mv_mode[c]); 592cabdff1aSopenharmony_ci b->mode[1] = 593cabdff1aSopenharmony_ci b->mode[2] = 594cabdff1aSopenharmony_ci b->mode[3] = b->mode[0]; 595cabdff1aSopenharmony_ci td->counts.mv_mode[c][b->mode[0] - 10]++; 596cabdff1aSopenharmony_ci } 597cabdff1aSopenharmony_ci } 598cabdff1aSopenharmony_ci 599cabdff1aSopenharmony_ci if (s->s.h.filtermode == FILTER_SWITCHABLE) { 600cabdff1aSopenharmony_ci int c; 601cabdff1aSopenharmony_ci 602cabdff1aSopenharmony_ci if (have_a && s->above_mode_ctx[col] >= NEARESTMV) { 603cabdff1aSopenharmony_ci if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) { 604cabdff1aSopenharmony_ci c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ? 605cabdff1aSopenharmony_ci td->left_filter_ctx[row7] : 3; 606cabdff1aSopenharmony_ci } else { 607cabdff1aSopenharmony_ci c = s->above_filter_ctx[col]; 608cabdff1aSopenharmony_ci } 609cabdff1aSopenharmony_ci } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) { 610cabdff1aSopenharmony_ci c = td->left_filter_ctx[row7]; 611cabdff1aSopenharmony_ci } else { 612cabdff1aSopenharmony_ci c = 3; 613cabdff1aSopenharmony_ci } 614cabdff1aSopenharmony_ci 615cabdff1aSopenharmony_ci filter_id = vp8_rac_get_tree(td->c, ff_vp9_filter_tree, 616cabdff1aSopenharmony_ci s->prob.p.filter[c]); 617cabdff1aSopenharmony_ci td->counts.filter[c][filter_id]++; 618cabdff1aSopenharmony_ci b->filter = ff_vp9_filter_lut[filter_id]; 619cabdff1aSopenharmony_ci } else { 620cabdff1aSopenharmony_ci b->filter = s->s.h.filtermode; 621cabdff1aSopenharmony_ci } 622cabdff1aSopenharmony_ci 623cabdff1aSopenharmony_ci if (b->bs > BS_8x8) { 624cabdff1aSopenharmony_ci int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]]; 625cabdff1aSopenharmony_ci 626cabdff1aSopenharmony_ci b->mode[0] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, 627cabdff1aSopenharmony_ci s->prob.p.mv_mode[c]); 628cabdff1aSopenharmony_ci td->counts.mv_mode[c][b->mode[0] - 10]++; 629cabdff1aSopenharmony_ci ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0); 630cabdff1aSopenharmony_ci 631cabdff1aSopenharmony_ci if (b->bs != BS_8x4) { 632cabdff1aSopenharmony_ci b->mode[1] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, 633cabdff1aSopenharmony_ci s->prob.p.mv_mode[c]); 634cabdff1aSopenharmony_ci td->counts.mv_mode[c][b->mode[1] - 10]++; 635cabdff1aSopenharmony_ci ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1); 636cabdff1aSopenharmony_ci } else { 637cabdff1aSopenharmony_ci b->mode[1] = b->mode[0]; 638cabdff1aSopenharmony_ci AV_COPY32(&b->mv[1][0], &b->mv[0][0]); 639cabdff1aSopenharmony_ci AV_COPY32(&b->mv[1][1], &b->mv[0][1]); 640cabdff1aSopenharmony_ci } 641cabdff1aSopenharmony_ci 642cabdff1aSopenharmony_ci if (b->bs != BS_4x8) { 643cabdff1aSopenharmony_ci b->mode[2] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, 644cabdff1aSopenharmony_ci s->prob.p.mv_mode[c]); 645cabdff1aSopenharmony_ci td->counts.mv_mode[c][b->mode[2] - 10]++; 646cabdff1aSopenharmony_ci ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2); 647cabdff1aSopenharmony_ci 648cabdff1aSopenharmony_ci if (b->bs != BS_8x4) { 649cabdff1aSopenharmony_ci b->mode[3] = vp8_rac_get_tree(td->c, ff_vp9_inter_mode_tree, 650cabdff1aSopenharmony_ci s->prob.p.mv_mode[c]); 651cabdff1aSopenharmony_ci td->counts.mv_mode[c][b->mode[3] - 10]++; 652cabdff1aSopenharmony_ci ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3); 653cabdff1aSopenharmony_ci } else { 654cabdff1aSopenharmony_ci b->mode[3] = b->mode[2]; 655cabdff1aSopenharmony_ci AV_COPY32(&b->mv[3][0], &b->mv[2][0]); 656cabdff1aSopenharmony_ci AV_COPY32(&b->mv[3][1], &b->mv[2][1]); 657cabdff1aSopenharmony_ci } 658cabdff1aSopenharmony_ci } else { 659cabdff1aSopenharmony_ci b->mode[2] = b->mode[0]; 660cabdff1aSopenharmony_ci AV_COPY32(&b->mv[2][0], &b->mv[0][0]); 661cabdff1aSopenharmony_ci AV_COPY32(&b->mv[2][1], &b->mv[0][1]); 662cabdff1aSopenharmony_ci b->mode[3] = b->mode[1]; 663cabdff1aSopenharmony_ci AV_COPY32(&b->mv[3][0], &b->mv[1][0]); 664cabdff1aSopenharmony_ci AV_COPY32(&b->mv[3][1], &b->mv[1][1]); 665cabdff1aSopenharmony_ci } 666cabdff1aSopenharmony_ci } else { 667cabdff1aSopenharmony_ci ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1); 668cabdff1aSopenharmony_ci AV_COPY32(&b->mv[1][0], &b->mv[0][0]); 669cabdff1aSopenharmony_ci AV_COPY32(&b->mv[2][0], &b->mv[0][0]); 670cabdff1aSopenharmony_ci AV_COPY32(&b->mv[3][0], &b->mv[0][0]); 671cabdff1aSopenharmony_ci AV_COPY32(&b->mv[1][1], &b->mv[0][1]); 672cabdff1aSopenharmony_ci AV_COPY32(&b->mv[2][1], &b->mv[0][1]); 673cabdff1aSopenharmony_ci AV_COPY32(&b->mv[3][1], &b->mv[0][1]); 674cabdff1aSopenharmony_ci } 675cabdff1aSopenharmony_ci 676cabdff1aSopenharmony_ci vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0]; 677cabdff1aSopenharmony_ci } 678cabdff1aSopenharmony_ci 679cabdff1aSopenharmony_ci#if HAVE_FAST_64BIT 680cabdff1aSopenharmony_ci#define SPLAT_CTX(var, val, n) \ 681cabdff1aSopenharmony_ci switch (n) { \ 682cabdff1aSopenharmony_ci case 1: var = val; break; \ 683cabdff1aSopenharmony_ci case 2: AV_WN16A(&var, val * 0x0101); break; \ 684cabdff1aSopenharmony_ci case 4: AV_WN32A(&var, val * 0x01010101); break; \ 685cabdff1aSopenharmony_ci case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \ 686cabdff1aSopenharmony_ci case 16: { \ 687cabdff1aSopenharmony_ci uint64_t v64 = val * 0x0101010101010101ULL; \ 688cabdff1aSopenharmony_ci AV_WN64A( &var, v64); \ 689cabdff1aSopenharmony_ci AV_WN64A(&((uint8_t *) &var)[8], v64); \ 690cabdff1aSopenharmony_ci break; \ 691cabdff1aSopenharmony_ci } \ 692cabdff1aSopenharmony_ci } 693cabdff1aSopenharmony_ci#else 694cabdff1aSopenharmony_ci#define SPLAT_CTX(var, val, n) \ 695cabdff1aSopenharmony_ci switch (n) { \ 696cabdff1aSopenharmony_ci case 1: var = val; break; \ 697cabdff1aSopenharmony_ci case 2: AV_WN16A(&var, val * 0x0101); break; \ 698cabdff1aSopenharmony_ci case 4: AV_WN32A(&var, val * 0x01010101); break; \ 699cabdff1aSopenharmony_ci case 8: { \ 700cabdff1aSopenharmony_ci uint32_t v32 = val * 0x01010101; \ 701cabdff1aSopenharmony_ci AV_WN32A( &var, v32); \ 702cabdff1aSopenharmony_ci AV_WN32A(&((uint8_t *) &var)[4], v32); \ 703cabdff1aSopenharmony_ci break; \ 704cabdff1aSopenharmony_ci } \ 705cabdff1aSopenharmony_ci case 16: { \ 706cabdff1aSopenharmony_ci uint32_t v32 = val * 0x01010101; \ 707cabdff1aSopenharmony_ci AV_WN32A( &var, v32); \ 708cabdff1aSopenharmony_ci AV_WN32A(&((uint8_t *) &var)[4], v32); \ 709cabdff1aSopenharmony_ci AV_WN32A(&((uint8_t *) &var)[8], v32); \ 710cabdff1aSopenharmony_ci AV_WN32A(&((uint8_t *) &var)[12], v32); \ 711cabdff1aSopenharmony_ci break; \ 712cabdff1aSopenharmony_ci } \ 713cabdff1aSopenharmony_ci } 714cabdff1aSopenharmony_ci#endif 715cabdff1aSopenharmony_ci 716cabdff1aSopenharmony_ci switch (ff_vp9_bwh_tab[1][b->bs][0]) { 717cabdff1aSopenharmony_ci#define SET_CTXS(perf, dir, off, n) \ 718cabdff1aSopenharmony_ci do { \ 719cabdff1aSopenharmony_ci SPLAT_CTX(perf->dir##_skip_ctx[off], b->skip, n); \ 720cabdff1aSopenharmony_ci SPLAT_CTX(perf->dir##_txfm_ctx[off], b->tx, n); \ 721cabdff1aSopenharmony_ci SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \ 722cabdff1aSopenharmony_ci if (!s->s.h.keyframe && !s->s.h.intraonly) { \ 723cabdff1aSopenharmony_ci SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra, n); \ 724cabdff1aSopenharmony_ci SPLAT_CTX(perf->dir##_comp_ctx[off], b->comp, n); \ 725cabdff1aSopenharmony_ci SPLAT_CTX(perf->dir##_mode_ctx[off], b->mode[3], n); \ 726cabdff1aSopenharmony_ci if (!b->intra) { \ 727cabdff1aSopenharmony_ci SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \ 728cabdff1aSopenharmony_ci if (s->s.h.filtermode == FILTER_SWITCHABLE) { \ 729cabdff1aSopenharmony_ci SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \ 730cabdff1aSopenharmony_ci } \ 731cabdff1aSopenharmony_ci } \ 732cabdff1aSopenharmony_ci } \ 733cabdff1aSopenharmony_ci } while (0) 734cabdff1aSopenharmony_ci case 1: SET_CTXS(s, above, col, 1); break; 735cabdff1aSopenharmony_ci case 2: SET_CTXS(s, above, col, 2); break; 736cabdff1aSopenharmony_ci case 4: SET_CTXS(s, above, col, 4); break; 737cabdff1aSopenharmony_ci case 8: SET_CTXS(s, above, col, 8); break; 738cabdff1aSopenharmony_ci } 739cabdff1aSopenharmony_ci switch (ff_vp9_bwh_tab[1][b->bs][1]) { 740cabdff1aSopenharmony_ci case 1: SET_CTXS(td, left, row7, 1); break; 741cabdff1aSopenharmony_ci case 2: SET_CTXS(td, left, row7, 2); break; 742cabdff1aSopenharmony_ci case 4: SET_CTXS(td, left, row7, 4); break; 743cabdff1aSopenharmony_ci case 8: SET_CTXS(td, left, row7, 8); break; 744cabdff1aSopenharmony_ci } 745cabdff1aSopenharmony_ci#undef SPLAT_CTX 746cabdff1aSopenharmony_ci#undef SET_CTXS 747cabdff1aSopenharmony_ci 748cabdff1aSopenharmony_ci if (!s->s.h.keyframe && !s->s.h.intraonly) { 749cabdff1aSopenharmony_ci if (b->bs > BS_8x8) { 750cabdff1aSopenharmony_ci int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]); 751cabdff1aSopenharmony_ci 752cabdff1aSopenharmony_ci AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]); 753cabdff1aSopenharmony_ci AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]); 754cabdff1aSopenharmony_ci AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0); 755cabdff1aSopenharmony_ci AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1); 756cabdff1aSopenharmony_ci AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]); 757cabdff1aSopenharmony_ci AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]); 758cabdff1aSopenharmony_ci AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0); 759cabdff1aSopenharmony_ci AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1); 760cabdff1aSopenharmony_ci } else { 761cabdff1aSopenharmony_ci int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]); 762cabdff1aSopenharmony_ci 763cabdff1aSopenharmony_ci for (n = 0; n < w4 * 2; n++) { 764cabdff1aSopenharmony_ci AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0); 765cabdff1aSopenharmony_ci AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1); 766cabdff1aSopenharmony_ci } 767cabdff1aSopenharmony_ci for (n = 0; n < h4 * 2; n++) { 768cabdff1aSopenharmony_ci AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0); 769cabdff1aSopenharmony_ci AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1); 770cabdff1aSopenharmony_ci } 771cabdff1aSopenharmony_ci } 772cabdff1aSopenharmony_ci } 773cabdff1aSopenharmony_ci 774cabdff1aSopenharmony_ci // FIXME kinda ugly 775cabdff1aSopenharmony_ci for (y = 0; y < h4; y++) { 776cabdff1aSopenharmony_ci int x, o = (row + y) * s->sb_cols * 8 + col; 777cabdff1aSopenharmony_ci VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o]; 778cabdff1aSopenharmony_ci 779cabdff1aSopenharmony_ci if (b->intra) { 780cabdff1aSopenharmony_ci for (x = 0; x < w4; x++) { 781cabdff1aSopenharmony_ci mv[x].ref[0] = 782cabdff1aSopenharmony_ci mv[x].ref[1] = -1; 783cabdff1aSopenharmony_ci } 784cabdff1aSopenharmony_ci } else if (b->comp) { 785cabdff1aSopenharmony_ci for (x = 0; x < w4; x++) { 786cabdff1aSopenharmony_ci mv[x].ref[0] = b->ref[0]; 787cabdff1aSopenharmony_ci mv[x].ref[1] = b->ref[1]; 788cabdff1aSopenharmony_ci AV_COPY32(&mv[x].mv[0], &b->mv[3][0]); 789cabdff1aSopenharmony_ci AV_COPY32(&mv[x].mv[1], &b->mv[3][1]); 790cabdff1aSopenharmony_ci } 791cabdff1aSopenharmony_ci } else { 792cabdff1aSopenharmony_ci for (x = 0; x < w4; x++) { 793cabdff1aSopenharmony_ci mv[x].ref[0] = b->ref[0]; 794cabdff1aSopenharmony_ci mv[x].ref[1] = -1; 795cabdff1aSopenharmony_ci AV_COPY32(&mv[x].mv[0], &b->mv[3][0]); 796cabdff1aSopenharmony_ci } 797cabdff1aSopenharmony_ci } 798cabdff1aSopenharmony_ci } 799cabdff1aSopenharmony_ci} 800cabdff1aSopenharmony_ci 801cabdff1aSopenharmony_ci// FIXME merge cnt/eob arguments? 802cabdff1aSopenharmony_cistatic av_always_inline int 803cabdff1aSopenharmony_cidecode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, 804cabdff1aSopenharmony_ci int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3], 805cabdff1aSopenharmony_ci unsigned (*eob)[6][2], uint8_t (*p)[6][11], 806cabdff1aSopenharmony_ci int nnz, const int16_t *scan, const int16_t (*nb)[2], 807cabdff1aSopenharmony_ci const int16_t *band_counts, int16_t *qmul) 808cabdff1aSopenharmony_ci{ 809cabdff1aSopenharmony_ci int i = 0, band = 0, band_left = band_counts[band]; 810cabdff1aSopenharmony_ci const uint8_t *tp = p[0][nnz]; 811cabdff1aSopenharmony_ci uint8_t cache[1024]; 812cabdff1aSopenharmony_ci 813cabdff1aSopenharmony_ci do { 814cabdff1aSopenharmony_ci int val, rc; 815cabdff1aSopenharmony_ci 816cabdff1aSopenharmony_ci val = vp56_rac_get_prob_branchy(c, tp[0]); // eob 817cabdff1aSopenharmony_ci eob[band][nnz][val]++; 818cabdff1aSopenharmony_ci if (!val) 819cabdff1aSopenharmony_ci break; 820cabdff1aSopenharmony_ci 821cabdff1aSopenharmony_ciskip_eob: 822cabdff1aSopenharmony_ci if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero 823cabdff1aSopenharmony_ci cnt[band][nnz][0]++; 824cabdff1aSopenharmony_ci if (!--band_left) 825cabdff1aSopenharmony_ci band_left = band_counts[++band]; 826cabdff1aSopenharmony_ci cache[scan[i]] = 0; 827cabdff1aSopenharmony_ci nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1; 828cabdff1aSopenharmony_ci tp = p[band][nnz]; 829cabdff1aSopenharmony_ci if (++i == n_coeffs) 830cabdff1aSopenharmony_ci break; //invalid input; blocks should end with EOB 831cabdff1aSopenharmony_ci goto skip_eob; 832cabdff1aSopenharmony_ci } 833cabdff1aSopenharmony_ci 834cabdff1aSopenharmony_ci rc = scan[i]; 835cabdff1aSopenharmony_ci if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one 836cabdff1aSopenharmony_ci cnt[band][nnz][1]++; 837cabdff1aSopenharmony_ci val = 1; 838cabdff1aSopenharmony_ci cache[rc] = 1; 839cabdff1aSopenharmony_ci } else { 840cabdff1aSopenharmony_ci cnt[band][nnz][2]++; 841cabdff1aSopenharmony_ci if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4 842cabdff1aSopenharmony_ci if (!vp56_rac_get_prob_branchy(c, tp[4])) { 843cabdff1aSopenharmony_ci cache[rc] = val = 2; 844cabdff1aSopenharmony_ci } else { 845cabdff1aSopenharmony_ci val = 3 + vp56_rac_get_prob(c, tp[5]); 846cabdff1aSopenharmony_ci cache[rc] = 3; 847cabdff1aSopenharmony_ci } 848cabdff1aSopenharmony_ci } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2 849cabdff1aSopenharmony_ci cache[rc] = 4; 850cabdff1aSopenharmony_ci if (!vp56_rac_get_prob_branchy(c, tp[7])) { 851cabdff1aSopenharmony_ci val = vp56_rac_get_prob(c, 159) + 5; 852cabdff1aSopenharmony_ci } else { 853cabdff1aSopenharmony_ci val = (vp56_rac_get_prob(c, 165) << 1) + 7; 854cabdff1aSopenharmony_ci val += vp56_rac_get_prob(c, 145); 855cabdff1aSopenharmony_ci } 856cabdff1aSopenharmony_ci } else { // cat 3-6 857cabdff1aSopenharmony_ci cache[rc] = 5; 858cabdff1aSopenharmony_ci if (!vp56_rac_get_prob_branchy(c, tp[8])) { 859cabdff1aSopenharmony_ci if (!vp56_rac_get_prob_branchy(c, tp[9])) { 860cabdff1aSopenharmony_ci val = 11 + (vp56_rac_get_prob(c, 173) << 2); 861cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 148) << 1); 862cabdff1aSopenharmony_ci val += vp56_rac_get_prob(c, 140); 863cabdff1aSopenharmony_ci } else { 864cabdff1aSopenharmony_ci val = 19 + (vp56_rac_get_prob(c, 176) << 3); 865cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 155) << 2); 866cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 140) << 1); 867cabdff1aSopenharmony_ci val += vp56_rac_get_prob(c, 135); 868cabdff1aSopenharmony_ci } 869cabdff1aSopenharmony_ci } else if (!vp56_rac_get_prob_branchy(c, tp[10])) { 870cabdff1aSopenharmony_ci val = (vp56_rac_get_prob(c, 180) << 4) + 35; 871cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 157) << 3); 872cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 141) << 2); 873cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 134) << 1); 874cabdff1aSopenharmony_ci val += vp56_rac_get_prob(c, 130); 875cabdff1aSopenharmony_ci } else { 876cabdff1aSopenharmony_ci val = 67; 877cabdff1aSopenharmony_ci if (!is8bitsperpixel) { 878cabdff1aSopenharmony_ci if (bpp == 12) { 879cabdff1aSopenharmony_ci val += vp56_rac_get_prob(c, 255) << 17; 880cabdff1aSopenharmony_ci val += vp56_rac_get_prob(c, 255) << 16; 881cabdff1aSopenharmony_ci } 882cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 255) << 15); 883cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 255) << 14); 884cabdff1aSopenharmony_ci } 885cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 254) << 13); 886cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 254) << 12); 887cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 254) << 11); 888cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 252) << 10); 889cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 249) << 9); 890cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 243) << 8); 891cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 230) << 7); 892cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 196) << 6); 893cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 177) << 5); 894cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 153) << 4); 895cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 140) << 3); 896cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 133) << 2); 897cabdff1aSopenharmony_ci val += (vp56_rac_get_prob(c, 130) << 1); 898cabdff1aSopenharmony_ci val += vp56_rac_get_prob(c, 129); 899cabdff1aSopenharmony_ci } 900cabdff1aSopenharmony_ci } 901cabdff1aSopenharmony_ci } 902cabdff1aSopenharmony_ci#define STORE_COEF(c, i, v) do { \ 903cabdff1aSopenharmony_ci if (is8bitsperpixel) { \ 904cabdff1aSopenharmony_ci c[i] = v; \ 905cabdff1aSopenharmony_ci } else { \ 906cabdff1aSopenharmony_ci AV_WN32A(&c[i * 2], v); \ 907cabdff1aSopenharmony_ci } \ 908cabdff1aSopenharmony_ci} while (0) 909cabdff1aSopenharmony_ci if (!--band_left) 910cabdff1aSopenharmony_ci band_left = band_counts[++band]; 911cabdff1aSopenharmony_ci if (is_tx32x32) 912cabdff1aSopenharmony_ci STORE_COEF(coef, rc, (int)((vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2); 913cabdff1aSopenharmony_ci else 914cabdff1aSopenharmony_ci STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]); 915cabdff1aSopenharmony_ci nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1; 916cabdff1aSopenharmony_ci tp = p[band][nnz]; 917cabdff1aSopenharmony_ci } while (++i < n_coeffs); 918cabdff1aSopenharmony_ci 919cabdff1aSopenharmony_ci return i; 920cabdff1aSopenharmony_ci} 921cabdff1aSopenharmony_ci 922cabdff1aSopenharmony_cistatic int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, 923cabdff1aSopenharmony_ci unsigned (*cnt)[6][3], unsigned (*eob)[6][2], 924cabdff1aSopenharmony_ci uint8_t (*p)[6][11], int nnz, const int16_t *scan, 925cabdff1aSopenharmony_ci const int16_t (*nb)[2], const int16_t *band_counts, 926cabdff1aSopenharmony_ci int16_t *qmul) 927cabdff1aSopenharmony_ci{ 928cabdff1aSopenharmony_ci return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p, 929cabdff1aSopenharmony_ci nnz, scan, nb, band_counts, qmul); 930cabdff1aSopenharmony_ci} 931cabdff1aSopenharmony_ci 932cabdff1aSopenharmony_cistatic int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, 933cabdff1aSopenharmony_ci unsigned (*cnt)[6][3], unsigned (*eob)[6][2], 934cabdff1aSopenharmony_ci uint8_t (*p)[6][11], int nnz, const int16_t *scan, 935cabdff1aSopenharmony_ci const int16_t (*nb)[2], const int16_t *band_counts, 936cabdff1aSopenharmony_ci int16_t *qmul) 937cabdff1aSopenharmony_ci{ 938cabdff1aSopenharmony_ci return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p, 939cabdff1aSopenharmony_ci nnz, scan, nb, band_counts, qmul); 940cabdff1aSopenharmony_ci} 941cabdff1aSopenharmony_ci 942cabdff1aSopenharmony_cistatic int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, 943cabdff1aSopenharmony_ci unsigned (*cnt)[6][3], unsigned (*eob)[6][2], 944cabdff1aSopenharmony_ci uint8_t (*p)[6][11], int nnz, const int16_t *scan, 945cabdff1aSopenharmony_ci const int16_t (*nb)[2], const int16_t *band_counts, 946cabdff1aSopenharmony_ci int16_t *qmul) 947cabdff1aSopenharmony_ci{ 948cabdff1aSopenharmony_ci return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p, 949cabdff1aSopenharmony_ci nnz, scan, nb, band_counts, qmul); 950cabdff1aSopenharmony_ci} 951cabdff1aSopenharmony_ci 952cabdff1aSopenharmony_cistatic int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, 953cabdff1aSopenharmony_ci unsigned (*cnt)[6][3], unsigned (*eob)[6][2], 954cabdff1aSopenharmony_ci uint8_t (*p)[6][11], int nnz, const int16_t *scan, 955cabdff1aSopenharmony_ci const int16_t (*nb)[2], const int16_t *band_counts, 956cabdff1aSopenharmony_ci int16_t *qmul) 957cabdff1aSopenharmony_ci{ 958cabdff1aSopenharmony_ci return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p, 959cabdff1aSopenharmony_ci nnz, scan, nb, band_counts, qmul); 960cabdff1aSopenharmony_ci} 961cabdff1aSopenharmony_ci 962cabdff1aSopenharmony_cistatic av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel) 963cabdff1aSopenharmony_ci{ 964cabdff1aSopenharmony_ci VP9Context *s = td->s; 965cabdff1aSopenharmony_ci VP9Block *b = td->b; 966cabdff1aSopenharmony_ci int row = td->row, col = td->col; 967cabdff1aSopenharmony_ci uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra]; 968cabdff1aSopenharmony_ci unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra]; 969cabdff1aSopenharmony_ci unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra]; 970cabdff1aSopenharmony_ci int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1; 971cabdff1aSopenharmony_ci int end_x = FFMIN(2 * (s->cols - col), w4); 972cabdff1aSopenharmony_ci int end_y = FFMIN(2 * (s->rows - row), h4); 973cabdff1aSopenharmony_ci int n, pl, x, y, ret; 974cabdff1aSopenharmony_ci int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul; 975cabdff1aSopenharmony_ci int tx = 4 * s->s.h.lossless + b->tx; 976cabdff1aSopenharmony_ci const int16_t * const *yscans = ff_vp9_scans[tx]; 977cabdff1aSopenharmony_ci const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx]; 978cabdff1aSopenharmony_ci const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT]; 979cabdff1aSopenharmony_ci const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT]; 980cabdff1aSopenharmony_ci uint8_t *a = &s->above_y_nnz_ctx[col * 2]; 981cabdff1aSopenharmony_ci uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1]; 982cabdff1aSopenharmony_ci static const int16_t band_counts[4][8] = { 983cabdff1aSopenharmony_ci { 1, 2, 3, 4, 3, 16 - 13 }, 984cabdff1aSopenharmony_ci { 1, 2, 3, 4, 11, 64 - 21 }, 985cabdff1aSopenharmony_ci { 1, 2, 3, 4, 11, 256 - 21 }, 986cabdff1aSopenharmony_ci { 1, 2, 3, 4, 11, 1024 - 21 }, 987cabdff1aSopenharmony_ci }; 988cabdff1aSopenharmony_ci const int16_t *y_band_counts = band_counts[b->tx]; 989cabdff1aSopenharmony_ci const int16_t *uv_band_counts = band_counts[b->uvtx]; 990cabdff1aSopenharmony_ci int bytesperpixel = is8bitsperpixel ? 1 : 2; 991cabdff1aSopenharmony_ci int total_coeff = 0; 992cabdff1aSopenharmony_ci 993cabdff1aSopenharmony_ci#define MERGE(la, end, step, rd) \ 994cabdff1aSopenharmony_ci for (n = 0; n < end; n += step) \ 995cabdff1aSopenharmony_ci la[n] = !!rd(&la[n]) 996cabdff1aSopenharmony_ci#define MERGE_CTX(step, rd) \ 997cabdff1aSopenharmony_ci do { \ 998cabdff1aSopenharmony_ci MERGE(l, end_y, step, rd); \ 999cabdff1aSopenharmony_ci MERGE(a, end_x, step, rd); \ 1000cabdff1aSopenharmony_ci } while (0) 1001cabdff1aSopenharmony_ci 1002cabdff1aSopenharmony_ci#define DECODE_Y_COEF_LOOP(step, mode_index, v) \ 1003cabdff1aSopenharmony_ci for (n = 0, y = 0; y < end_y; y += step) { \ 1004cabdff1aSopenharmony_ci for (x = 0; x < end_x; x += step, n += step * step) { \ 1005cabdff1aSopenharmony_ci enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \ 1006cabdff1aSopenharmony_ci ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \ 1007cabdff1aSopenharmony_ci (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \ 1008cabdff1aSopenharmony_ci c, e, p, a[x] + l[y], yscans[txtp], \ 1009cabdff1aSopenharmony_ci ynbs[txtp], y_band_counts, qmul[0]); \ 1010cabdff1aSopenharmony_ci a[x] = l[y] = !!ret; \ 1011cabdff1aSopenharmony_ci total_coeff |= !!ret; \ 1012cabdff1aSopenharmony_ci if (step >= 4) { \ 1013cabdff1aSopenharmony_ci AV_WN16A(&td->eob[n], ret); \ 1014cabdff1aSopenharmony_ci } else { \ 1015cabdff1aSopenharmony_ci td->eob[n] = ret; \ 1016cabdff1aSopenharmony_ci } \ 1017cabdff1aSopenharmony_ci } \ 1018cabdff1aSopenharmony_ci } 1019cabdff1aSopenharmony_ci 1020cabdff1aSopenharmony_ci#define SPLAT(la, end, step, cond) \ 1021cabdff1aSopenharmony_ci if (step == 2) { \ 1022cabdff1aSopenharmony_ci for (n = 1; n < end; n += step) \ 1023cabdff1aSopenharmony_ci la[n] = la[n - 1]; \ 1024cabdff1aSopenharmony_ci } else if (step == 4) { \ 1025cabdff1aSopenharmony_ci if (cond) { \ 1026cabdff1aSopenharmony_ci for (n = 0; n < end; n += step) \ 1027cabdff1aSopenharmony_ci AV_WN32A(&la[n], la[n] * 0x01010101); \ 1028cabdff1aSopenharmony_ci } else { \ 1029cabdff1aSopenharmony_ci for (n = 0; n < end; n += step) \ 1030cabdff1aSopenharmony_ci memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \ 1031cabdff1aSopenharmony_ci } \ 1032cabdff1aSopenharmony_ci } else /* step == 8 */ { \ 1033cabdff1aSopenharmony_ci if (cond) { \ 1034cabdff1aSopenharmony_ci if (HAVE_FAST_64BIT) { \ 1035cabdff1aSopenharmony_ci for (n = 0; n < end; n += step) \ 1036cabdff1aSopenharmony_ci AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \ 1037cabdff1aSopenharmony_ci } else { \ 1038cabdff1aSopenharmony_ci for (n = 0; n < end; n += step) { \ 1039cabdff1aSopenharmony_ci uint32_t v32 = la[n] * 0x01010101; \ 1040cabdff1aSopenharmony_ci AV_WN32A(&la[n], v32); \ 1041cabdff1aSopenharmony_ci AV_WN32A(&la[n + 4], v32); \ 1042cabdff1aSopenharmony_ci } \ 1043cabdff1aSopenharmony_ci } \ 1044cabdff1aSopenharmony_ci } else { \ 1045cabdff1aSopenharmony_ci for (n = 0; n < end; n += step) \ 1046cabdff1aSopenharmony_ci memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \ 1047cabdff1aSopenharmony_ci } \ 1048cabdff1aSopenharmony_ci } 1049cabdff1aSopenharmony_ci#define SPLAT_CTX(step) \ 1050cabdff1aSopenharmony_ci do { \ 1051cabdff1aSopenharmony_ci SPLAT(a, end_x, step, end_x == w4); \ 1052cabdff1aSopenharmony_ci SPLAT(l, end_y, step, end_y == h4); \ 1053cabdff1aSopenharmony_ci } while (0) 1054cabdff1aSopenharmony_ci 1055cabdff1aSopenharmony_ci /* y tokens */ 1056cabdff1aSopenharmony_ci switch (b->tx) { 1057cabdff1aSopenharmony_ci case TX_4X4: 1058cabdff1aSopenharmony_ci DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,); 1059cabdff1aSopenharmony_ci break; 1060cabdff1aSopenharmony_ci case TX_8X8: 1061cabdff1aSopenharmony_ci MERGE_CTX(2, AV_RN16A); 1062cabdff1aSopenharmony_ci DECODE_Y_COEF_LOOP(2, 0,); 1063cabdff1aSopenharmony_ci SPLAT_CTX(2); 1064cabdff1aSopenharmony_ci break; 1065cabdff1aSopenharmony_ci case TX_16X16: 1066cabdff1aSopenharmony_ci MERGE_CTX(4, AV_RN32A); 1067cabdff1aSopenharmony_ci DECODE_Y_COEF_LOOP(4, 0,); 1068cabdff1aSopenharmony_ci SPLAT_CTX(4); 1069cabdff1aSopenharmony_ci break; 1070cabdff1aSopenharmony_ci case TX_32X32: 1071cabdff1aSopenharmony_ci MERGE_CTX(8, AV_RN64A); 1072cabdff1aSopenharmony_ci DECODE_Y_COEF_LOOP(8, 0, 32); 1073cabdff1aSopenharmony_ci SPLAT_CTX(8); 1074cabdff1aSopenharmony_ci break; 1075cabdff1aSopenharmony_ci } 1076cabdff1aSopenharmony_ci 1077cabdff1aSopenharmony_ci#define DECODE_UV_COEF_LOOP(step, v) \ 1078cabdff1aSopenharmony_ci for (n = 0, y = 0; y < end_y; y += step) { \ 1079cabdff1aSopenharmony_ci for (x = 0; x < end_x; x += step, n += step * step) { \ 1080cabdff1aSopenharmony_ci ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \ 1081cabdff1aSopenharmony_ci (td, td->uvblock[pl] + 16 * n * bytesperpixel, \ 1082cabdff1aSopenharmony_ci 16 * step * step, c, e, p, a[x] + l[y], \ 1083cabdff1aSopenharmony_ci uvscan, uvnb, uv_band_counts, qmul[1]); \ 1084cabdff1aSopenharmony_ci a[x] = l[y] = !!ret; \ 1085cabdff1aSopenharmony_ci total_coeff |= !!ret; \ 1086cabdff1aSopenharmony_ci if (step >= 4) { \ 1087cabdff1aSopenharmony_ci AV_WN16A(&td->uveob[pl][n], ret); \ 1088cabdff1aSopenharmony_ci } else { \ 1089cabdff1aSopenharmony_ci td->uveob[pl][n] = ret; \ 1090cabdff1aSopenharmony_ci } \ 1091cabdff1aSopenharmony_ci } \ 1092cabdff1aSopenharmony_ci } 1093cabdff1aSopenharmony_ci 1094cabdff1aSopenharmony_ci p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra]; 1095cabdff1aSopenharmony_ci c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra]; 1096cabdff1aSopenharmony_ci e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra]; 1097cabdff1aSopenharmony_ci w4 >>= s->ss_h; 1098cabdff1aSopenharmony_ci end_x >>= s->ss_h; 1099cabdff1aSopenharmony_ci h4 >>= s->ss_v; 1100cabdff1aSopenharmony_ci end_y >>= s->ss_v; 1101cabdff1aSopenharmony_ci for (pl = 0; pl < 2; pl++) { 1102cabdff1aSopenharmony_ci a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h]; 1103cabdff1aSopenharmony_ci l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v]; 1104cabdff1aSopenharmony_ci switch (b->uvtx) { 1105cabdff1aSopenharmony_ci case TX_4X4: 1106cabdff1aSopenharmony_ci DECODE_UV_COEF_LOOP(1,); 1107cabdff1aSopenharmony_ci break; 1108cabdff1aSopenharmony_ci case TX_8X8: 1109cabdff1aSopenharmony_ci MERGE_CTX(2, AV_RN16A); 1110cabdff1aSopenharmony_ci DECODE_UV_COEF_LOOP(2,); 1111cabdff1aSopenharmony_ci SPLAT_CTX(2); 1112cabdff1aSopenharmony_ci break; 1113cabdff1aSopenharmony_ci case TX_16X16: 1114cabdff1aSopenharmony_ci MERGE_CTX(4, AV_RN32A); 1115cabdff1aSopenharmony_ci DECODE_UV_COEF_LOOP(4,); 1116cabdff1aSopenharmony_ci SPLAT_CTX(4); 1117cabdff1aSopenharmony_ci break; 1118cabdff1aSopenharmony_ci case TX_32X32: 1119cabdff1aSopenharmony_ci MERGE_CTX(8, AV_RN64A); 1120cabdff1aSopenharmony_ci DECODE_UV_COEF_LOOP(8, 32); 1121cabdff1aSopenharmony_ci SPLAT_CTX(8); 1122cabdff1aSopenharmony_ci break; 1123cabdff1aSopenharmony_ci } 1124cabdff1aSopenharmony_ci } 1125cabdff1aSopenharmony_ci 1126cabdff1aSopenharmony_ci return total_coeff; 1127cabdff1aSopenharmony_ci} 1128cabdff1aSopenharmony_ci 1129cabdff1aSopenharmony_cistatic int decode_coeffs_8bpp(VP9TileData *td) 1130cabdff1aSopenharmony_ci{ 1131cabdff1aSopenharmony_ci return decode_coeffs(td, 1); 1132cabdff1aSopenharmony_ci} 1133cabdff1aSopenharmony_ci 1134cabdff1aSopenharmony_cistatic int decode_coeffs_16bpp(VP9TileData *td) 1135cabdff1aSopenharmony_ci{ 1136cabdff1aSopenharmony_ci return decode_coeffs(td, 0); 1137cabdff1aSopenharmony_ci} 1138cabdff1aSopenharmony_ci 1139cabdff1aSopenharmony_cistatic av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v, 1140cabdff1aSopenharmony_ci int row_and_7, int col_and_7, 1141cabdff1aSopenharmony_ci int w, int h, int col_end, int row_end, 1142cabdff1aSopenharmony_ci enum TxfmMode tx, int skip_inter) 1143cabdff1aSopenharmony_ci{ 1144cabdff1aSopenharmony_ci static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 }; 1145cabdff1aSopenharmony_ci static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 }; 1146cabdff1aSopenharmony_ci 1147cabdff1aSopenharmony_ci // FIXME I'm pretty sure all loops can be replaced by a single LUT if 1148cabdff1aSopenharmony_ci // we make VP9Filter.mask uint64_t (i.e. row/col all single variable) 1149cabdff1aSopenharmony_ci // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then 1150cabdff1aSopenharmony_ci // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7) 1151cabdff1aSopenharmony_ci 1152cabdff1aSopenharmony_ci // the intended behaviour of the vp9 loopfilter is to work on 8-pixel 1153cabdff1aSopenharmony_ci // edges. This means that for UV, we work on two subsampled blocks at 1154cabdff1aSopenharmony_ci // a time, and we only use the topleft block's mode information to set 1155cabdff1aSopenharmony_ci // things like block strength. Thus, for any block size smaller than 1156cabdff1aSopenharmony_ci // 16x16, ignore the odd portion of the block. 1157cabdff1aSopenharmony_ci if (tx == TX_4X4 && (ss_v | ss_h)) { 1158cabdff1aSopenharmony_ci if (h == ss_v) { 1159cabdff1aSopenharmony_ci if (row_and_7 & 1) 1160cabdff1aSopenharmony_ci return; 1161cabdff1aSopenharmony_ci if (!row_end) 1162cabdff1aSopenharmony_ci h += 1; 1163cabdff1aSopenharmony_ci } 1164cabdff1aSopenharmony_ci if (w == ss_h) { 1165cabdff1aSopenharmony_ci if (col_and_7 & 1) 1166cabdff1aSopenharmony_ci return; 1167cabdff1aSopenharmony_ci if (!col_end) 1168cabdff1aSopenharmony_ci w += 1; 1169cabdff1aSopenharmony_ci } 1170cabdff1aSopenharmony_ci } 1171cabdff1aSopenharmony_ci 1172cabdff1aSopenharmony_ci if (tx == TX_4X4 && !skip_inter) { 1173cabdff1aSopenharmony_ci int t = 1 << col_and_7, m_col = (t << w) - t, y; 1174cabdff1aSopenharmony_ci // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide 1175cabdff1aSopenharmony_ci int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8; 1176cabdff1aSopenharmony_ci 1177cabdff1aSopenharmony_ci for (y = row_and_7; y < h + row_and_7; y++) { 1178cabdff1aSopenharmony_ci int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]); 1179cabdff1aSopenharmony_ci 1180cabdff1aSopenharmony_ci mask[0][y][1] |= m_row_8; 1181cabdff1aSopenharmony_ci mask[0][y][2] |= m_row_4; 1182cabdff1aSopenharmony_ci // for odd lines, if the odd col is not being filtered, 1183cabdff1aSopenharmony_ci // skip odd row also: 1184cabdff1aSopenharmony_ci // .---. <-- a 1185cabdff1aSopenharmony_ci // | | 1186cabdff1aSopenharmony_ci // |___| <-- b 1187cabdff1aSopenharmony_ci // ^ ^ 1188cabdff1aSopenharmony_ci // c d 1189cabdff1aSopenharmony_ci // 1190cabdff1aSopenharmony_ci // if a/c are even row/col and b/d are odd, and d is skipped, 1191cabdff1aSopenharmony_ci // e.g. right edge of size-66x66.webm, then skip b also (bug) 1192cabdff1aSopenharmony_ci if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) { 1193cabdff1aSopenharmony_ci mask[1][y][col_mask_id] |= (t << (w - 1)) - t; 1194cabdff1aSopenharmony_ci } else { 1195cabdff1aSopenharmony_ci mask[1][y][col_mask_id] |= m_col; 1196cabdff1aSopenharmony_ci } 1197cabdff1aSopenharmony_ci if (!ss_h) 1198cabdff1aSopenharmony_ci mask[0][y][3] |= m_col; 1199cabdff1aSopenharmony_ci if (!ss_v) { 1200cabdff1aSopenharmony_ci if (ss_h && (col_end & 1)) 1201cabdff1aSopenharmony_ci mask[1][y][3] |= (t << (w - 1)) - t; 1202cabdff1aSopenharmony_ci else 1203cabdff1aSopenharmony_ci mask[1][y][3] |= m_col; 1204cabdff1aSopenharmony_ci } 1205cabdff1aSopenharmony_ci } 1206cabdff1aSopenharmony_ci } else { 1207cabdff1aSopenharmony_ci int y, t = 1 << col_and_7, m_col = (t << w) - t; 1208cabdff1aSopenharmony_ci 1209cabdff1aSopenharmony_ci if (!skip_inter) { 1210cabdff1aSopenharmony_ci int mask_id = (tx == TX_8X8); 1211cabdff1aSopenharmony_ci int l2 = tx + ss_h - 1, step1d; 1212cabdff1aSopenharmony_ci static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 }; 1213cabdff1aSopenharmony_ci int m_row = m_col & masks[l2]; 1214cabdff1aSopenharmony_ci 1215cabdff1aSopenharmony_ci // at odd UV col/row edges tx16/tx32 loopfilter edges, force 1216cabdff1aSopenharmony_ci // 8wd loopfilter to prevent going off the visible edge. 1217cabdff1aSopenharmony_ci if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) { 1218cabdff1aSopenharmony_ci int m_row_16 = ((t << (w - 1)) - t) & masks[l2]; 1219cabdff1aSopenharmony_ci int m_row_8 = m_row - m_row_16; 1220cabdff1aSopenharmony_ci 1221cabdff1aSopenharmony_ci for (y = row_and_7; y < h + row_and_7; y++) { 1222cabdff1aSopenharmony_ci mask[0][y][0] |= m_row_16; 1223cabdff1aSopenharmony_ci mask[0][y][1] |= m_row_8; 1224cabdff1aSopenharmony_ci } 1225cabdff1aSopenharmony_ci } else { 1226cabdff1aSopenharmony_ci for (y = row_and_7; y < h + row_and_7; y++) 1227cabdff1aSopenharmony_ci mask[0][y][mask_id] |= m_row; 1228cabdff1aSopenharmony_ci } 1229cabdff1aSopenharmony_ci 1230cabdff1aSopenharmony_ci l2 = tx + ss_v - 1; 1231cabdff1aSopenharmony_ci step1d = 1 << l2; 1232cabdff1aSopenharmony_ci if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) { 1233cabdff1aSopenharmony_ci for (y = row_and_7; y < h + row_and_7 - 1; y += step1d) 1234cabdff1aSopenharmony_ci mask[1][y][0] |= m_col; 1235cabdff1aSopenharmony_ci if (y - row_and_7 == h - 1) 1236cabdff1aSopenharmony_ci mask[1][y][1] |= m_col; 1237cabdff1aSopenharmony_ci } else { 1238cabdff1aSopenharmony_ci for (y = row_and_7; y < h + row_and_7; y += step1d) 1239cabdff1aSopenharmony_ci mask[1][y][mask_id] |= m_col; 1240cabdff1aSopenharmony_ci } 1241cabdff1aSopenharmony_ci } else if (tx != TX_4X4) { 1242cabdff1aSopenharmony_ci int mask_id; 1243cabdff1aSopenharmony_ci 1244cabdff1aSopenharmony_ci mask_id = (tx == TX_8X8) || (h == ss_v); 1245cabdff1aSopenharmony_ci mask[1][row_and_7][mask_id] |= m_col; 1246cabdff1aSopenharmony_ci mask_id = (tx == TX_8X8) || (w == ss_h); 1247cabdff1aSopenharmony_ci for (y = row_and_7; y < h + row_and_7; y++) 1248cabdff1aSopenharmony_ci mask[0][y][mask_id] |= t; 1249cabdff1aSopenharmony_ci } else { 1250cabdff1aSopenharmony_ci int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8; 1251cabdff1aSopenharmony_ci 1252cabdff1aSopenharmony_ci for (y = row_and_7; y < h + row_and_7; y++) { 1253cabdff1aSopenharmony_ci mask[0][y][2] |= t4; 1254cabdff1aSopenharmony_ci mask[0][y][1] |= t8; 1255cabdff1aSopenharmony_ci } 1256cabdff1aSopenharmony_ci mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col; 1257cabdff1aSopenharmony_ci } 1258cabdff1aSopenharmony_ci } 1259cabdff1aSopenharmony_ci} 1260cabdff1aSopenharmony_ci 1261cabdff1aSopenharmony_civoid ff_vp9_decode_block(VP9TileData *td, int row, int col, 1262cabdff1aSopenharmony_ci VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, 1263cabdff1aSopenharmony_ci enum BlockLevel bl, enum BlockPartition bp) 1264cabdff1aSopenharmony_ci{ 1265cabdff1aSopenharmony_ci VP9Context *s = td->s; 1266cabdff1aSopenharmony_ci VP9Block *b = td->b; 1267cabdff1aSopenharmony_ci enum BlockSize bs = bl * 3 + bp; 1268cabdff1aSopenharmony_ci int bytesperpixel = s->bytesperpixel; 1269cabdff1aSopenharmony_ci int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl; 1270cabdff1aSopenharmony_ci int emu[2]; 1271cabdff1aSopenharmony_ci AVFrame *f = s->s.frames[CUR_FRAME].tf.f; 1272cabdff1aSopenharmony_ci 1273cabdff1aSopenharmony_ci td->row = row; 1274cabdff1aSopenharmony_ci td->row7 = row & 7; 1275cabdff1aSopenharmony_ci td->col = col; 1276cabdff1aSopenharmony_ci td->col7 = col & 7; 1277cabdff1aSopenharmony_ci 1278cabdff1aSopenharmony_ci td->min_mv.x = -(128 + col * 64); 1279cabdff1aSopenharmony_ci td->min_mv.y = -(128 + row * 64); 1280cabdff1aSopenharmony_ci td->max_mv.x = 128 + (s->cols - col - w4) * 64; 1281cabdff1aSopenharmony_ci td->max_mv.y = 128 + (s->rows - row - h4) * 64; 1282cabdff1aSopenharmony_ci 1283cabdff1aSopenharmony_ci if (s->pass < 2) { 1284cabdff1aSopenharmony_ci b->bs = bs; 1285cabdff1aSopenharmony_ci b->bl = bl; 1286cabdff1aSopenharmony_ci b->bp = bp; 1287cabdff1aSopenharmony_ci decode_mode(td); 1288cabdff1aSopenharmony_ci b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) || 1289cabdff1aSopenharmony_ci (s->ss_v && h4 * 2 == (1 << b->tx))); 1290cabdff1aSopenharmony_ci 1291cabdff1aSopenharmony_ci if (td->block_structure) { 1292cabdff1aSopenharmony_ci td->block_structure[td->nb_block_structure].row = row; 1293cabdff1aSopenharmony_ci td->block_structure[td->nb_block_structure].col = col; 1294cabdff1aSopenharmony_ci td->block_structure[td->nb_block_structure].block_size_idx_x = av_log2(w4); 1295cabdff1aSopenharmony_ci td->block_structure[td->nb_block_structure].block_size_idx_y = av_log2(h4); 1296cabdff1aSopenharmony_ci td->nb_block_structure++; 1297cabdff1aSopenharmony_ci } 1298cabdff1aSopenharmony_ci 1299cabdff1aSopenharmony_ci if (!b->skip) { 1300cabdff1aSopenharmony_ci int has_coeffs; 1301cabdff1aSopenharmony_ci 1302cabdff1aSopenharmony_ci if (bytesperpixel == 1) { 1303cabdff1aSopenharmony_ci has_coeffs = decode_coeffs_8bpp(td); 1304cabdff1aSopenharmony_ci } else { 1305cabdff1aSopenharmony_ci has_coeffs = decode_coeffs_16bpp(td); 1306cabdff1aSopenharmony_ci } 1307cabdff1aSopenharmony_ci if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) { 1308cabdff1aSopenharmony_ci b->skip = 1; 1309cabdff1aSopenharmony_ci memset(&s->above_skip_ctx[col], 1, w4); 1310cabdff1aSopenharmony_ci memset(&td->left_skip_ctx[td->row7], 1, h4); 1311cabdff1aSopenharmony_ci } 1312cabdff1aSopenharmony_ci } else { 1313cabdff1aSopenharmony_ci int row7 = td->row7; 1314cabdff1aSopenharmony_ci 1315cabdff1aSopenharmony_ci#define SPLAT_ZERO_CTX(v, n) \ 1316cabdff1aSopenharmony_ci switch (n) { \ 1317cabdff1aSopenharmony_ci case 1: v = 0; break; \ 1318cabdff1aSopenharmony_ci case 2: AV_ZERO16(&v); break; \ 1319cabdff1aSopenharmony_ci case 4: AV_ZERO32(&v); break; \ 1320cabdff1aSopenharmony_ci case 8: AV_ZERO64(&v); break; \ 1321cabdff1aSopenharmony_ci case 16: AV_ZERO128(&v); break; \ 1322cabdff1aSopenharmony_ci } 1323cabdff1aSopenharmony_ci#define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \ 1324cabdff1aSopenharmony_ci do { \ 1325cabdff1aSopenharmony_ci SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \ 1326cabdff1aSopenharmony_ci if (s->ss_##dir2) { \ 1327cabdff1aSopenharmony_ci SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \ 1328cabdff1aSopenharmony_ci SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \ 1329cabdff1aSopenharmony_ci } else { \ 1330cabdff1aSopenharmony_ci SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \ 1331cabdff1aSopenharmony_ci SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \ 1332cabdff1aSopenharmony_ci } \ 1333cabdff1aSopenharmony_ci } while (0) 1334cabdff1aSopenharmony_ci 1335cabdff1aSopenharmony_ci switch (w4) { 1336cabdff1aSopenharmony_ci case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break; 1337cabdff1aSopenharmony_ci case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break; 1338cabdff1aSopenharmony_ci case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break; 1339cabdff1aSopenharmony_ci case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break; 1340cabdff1aSopenharmony_ci } 1341cabdff1aSopenharmony_ci switch (h4) { 1342cabdff1aSopenharmony_ci case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break; 1343cabdff1aSopenharmony_ci case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break; 1344cabdff1aSopenharmony_ci case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break; 1345cabdff1aSopenharmony_ci case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break; 1346cabdff1aSopenharmony_ci } 1347cabdff1aSopenharmony_ci } 1348cabdff1aSopenharmony_ci 1349cabdff1aSopenharmony_ci if (s->pass == 1) { 1350cabdff1aSopenharmony_ci s->td[0].b++; 1351cabdff1aSopenharmony_ci s->td[0].block += w4 * h4 * 64 * bytesperpixel; 1352cabdff1aSopenharmony_ci s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v); 1353cabdff1aSopenharmony_ci s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v); 1354cabdff1aSopenharmony_ci s->td[0].eob += 4 * w4 * h4; 1355cabdff1aSopenharmony_ci s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v); 1356cabdff1aSopenharmony_ci s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v); 1357cabdff1aSopenharmony_ci 1358cabdff1aSopenharmony_ci return; 1359cabdff1aSopenharmony_ci } 1360cabdff1aSopenharmony_ci } 1361cabdff1aSopenharmony_ci 1362cabdff1aSopenharmony_ci // emulated overhangs if the stride of the target buffer can't hold. This 1363cabdff1aSopenharmony_ci // makes it possible to support emu-edge and so on even if we have large block 1364cabdff1aSopenharmony_ci // overhangs 1365cabdff1aSopenharmony_ci emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] || 1366cabdff1aSopenharmony_ci (row + h4) > s->rows; 1367cabdff1aSopenharmony_ci emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] || 1368cabdff1aSopenharmony_ci (row + h4) > s->rows; 1369cabdff1aSopenharmony_ci if (emu[0]) { 1370cabdff1aSopenharmony_ci td->dst[0] = td->tmp_y; 1371cabdff1aSopenharmony_ci td->y_stride = 128; 1372cabdff1aSopenharmony_ci } else { 1373cabdff1aSopenharmony_ci td->dst[0] = f->data[0] + yoff; 1374cabdff1aSopenharmony_ci td->y_stride = f->linesize[0]; 1375cabdff1aSopenharmony_ci } 1376cabdff1aSopenharmony_ci if (emu[1]) { 1377cabdff1aSopenharmony_ci td->dst[1] = td->tmp_uv[0]; 1378cabdff1aSopenharmony_ci td->dst[2] = td->tmp_uv[1]; 1379cabdff1aSopenharmony_ci td->uv_stride = 128; 1380cabdff1aSopenharmony_ci } else { 1381cabdff1aSopenharmony_ci td->dst[1] = f->data[1] + uvoff; 1382cabdff1aSopenharmony_ci td->dst[2] = f->data[2] + uvoff; 1383cabdff1aSopenharmony_ci td->uv_stride = f->linesize[1]; 1384cabdff1aSopenharmony_ci } 1385cabdff1aSopenharmony_ci if (b->intra) { 1386cabdff1aSopenharmony_ci if (s->s.h.bpp > 8) { 1387cabdff1aSopenharmony_ci ff_vp9_intra_recon_16bpp(td, yoff, uvoff); 1388cabdff1aSopenharmony_ci } else { 1389cabdff1aSopenharmony_ci ff_vp9_intra_recon_8bpp(td, yoff, uvoff); 1390cabdff1aSopenharmony_ci } 1391cabdff1aSopenharmony_ci } else { 1392cabdff1aSopenharmony_ci if (s->s.h.bpp > 8) { 1393cabdff1aSopenharmony_ci ff_vp9_inter_recon_16bpp(td); 1394cabdff1aSopenharmony_ci } else { 1395cabdff1aSopenharmony_ci ff_vp9_inter_recon_8bpp(td); 1396cabdff1aSopenharmony_ci } 1397cabdff1aSopenharmony_ci } 1398cabdff1aSopenharmony_ci if (emu[0]) { 1399cabdff1aSopenharmony_ci int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0; 1400cabdff1aSopenharmony_ci 1401cabdff1aSopenharmony_ci for (n = 0; o < w; n++) { 1402cabdff1aSopenharmony_ci int bw = 64 >> n; 1403cabdff1aSopenharmony_ci 1404cabdff1aSopenharmony_ci av_assert2(n <= 4); 1405cabdff1aSopenharmony_ci if (w & bw) { 1406cabdff1aSopenharmony_ci s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0], 1407cabdff1aSopenharmony_ci td->tmp_y + o * bytesperpixel, 128, h, 0, 0); 1408cabdff1aSopenharmony_ci o += bw; 1409cabdff1aSopenharmony_ci } 1410cabdff1aSopenharmony_ci } 1411cabdff1aSopenharmony_ci } 1412cabdff1aSopenharmony_ci if (emu[1]) { 1413cabdff1aSopenharmony_ci int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h; 1414cabdff1aSopenharmony_ci int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0; 1415cabdff1aSopenharmony_ci 1416cabdff1aSopenharmony_ci for (n = s->ss_h; o < w; n++) { 1417cabdff1aSopenharmony_ci int bw = 64 >> n; 1418cabdff1aSopenharmony_ci 1419cabdff1aSopenharmony_ci av_assert2(n <= 4); 1420cabdff1aSopenharmony_ci if (w & bw) { 1421cabdff1aSopenharmony_ci s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1], 1422cabdff1aSopenharmony_ci td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0); 1423cabdff1aSopenharmony_ci s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2], 1424cabdff1aSopenharmony_ci td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0); 1425cabdff1aSopenharmony_ci o += bw; 1426cabdff1aSopenharmony_ci } 1427cabdff1aSopenharmony_ci } 1428cabdff1aSopenharmony_ci } 1429cabdff1aSopenharmony_ci 1430cabdff1aSopenharmony_ci // pick filter level and find edges to apply filter to 1431cabdff1aSopenharmony_ci if (s->s.h.filter.level && 1432cabdff1aSopenharmony_ci (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1] 1433cabdff1aSopenharmony_ci [b->mode[3] != ZEROMV]) > 0) { 1434cabdff1aSopenharmony_ci int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4); 1435cabdff1aSopenharmony_ci int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7; 1436cabdff1aSopenharmony_ci 1437cabdff1aSopenharmony_ci setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl); 1438cabdff1aSopenharmony_ci mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter); 1439cabdff1aSopenharmony_ci if (s->ss_h || s->ss_v) 1440cabdff1aSopenharmony_ci mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end, 1441cabdff1aSopenharmony_ci s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0, 1442cabdff1aSopenharmony_ci s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0, 1443cabdff1aSopenharmony_ci b->uvtx, skip_inter); 1444cabdff1aSopenharmony_ci } 1445cabdff1aSopenharmony_ci 1446cabdff1aSopenharmony_ci if (s->pass == 2) { 1447cabdff1aSopenharmony_ci s->td[0].b++; 1448cabdff1aSopenharmony_ci s->td[0].block += w4 * h4 * 64 * bytesperpixel; 1449cabdff1aSopenharmony_ci s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h); 1450cabdff1aSopenharmony_ci s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h); 1451cabdff1aSopenharmony_ci s->td[0].eob += 4 * w4 * h4; 1452cabdff1aSopenharmony_ci s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h); 1453cabdff1aSopenharmony_ci s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h); 1454cabdff1aSopenharmony_ci } 1455cabdff1aSopenharmony_ci} 1456