1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (C) 2010 David Conrad 3cabdff1aSopenharmony_ci * Copyright (C) 2010 Ronald S. Bultje 4cabdff1aSopenharmony_ci * Copyright (C) 2014 Peter Ross 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * This file is part of FFmpeg. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci * 13cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci * Lesser General Public License for more details. 17cabdff1aSopenharmony_ci * 18cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci */ 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci/** 24cabdff1aSopenharmony_ci * @file 25cabdff1aSopenharmony_ci * VP8 compatible video decoder 26cabdff1aSopenharmony_ci */ 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ci#include "config_components.h" 29cabdff1aSopenharmony_ci 30cabdff1aSopenharmony_ci#include "libavutil/common.h" 31cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h" 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci#include "mathops.h" 34cabdff1aSopenharmony_ci#include "vp8dsp.h" 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_ci#define MK_IDCT_DC_ADD4_C(name) \ 37cabdff1aSopenharmony_cistatic void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], \ 38cabdff1aSopenharmony_ci ptrdiff_t stride) \ 39cabdff1aSopenharmony_ci{ \ 40cabdff1aSopenharmony_ci name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \ 41cabdff1aSopenharmony_ci name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \ 42cabdff1aSopenharmony_ci name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \ 43cabdff1aSopenharmony_ci name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \ 44cabdff1aSopenharmony_ci} \ 45cabdff1aSopenharmony_ci \ 46cabdff1aSopenharmony_cistatic void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], \ 47cabdff1aSopenharmony_ci ptrdiff_t stride) \ 48cabdff1aSopenharmony_ci{ \ 49cabdff1aSopenharmony_ci name ## _idct_dc_add_c(dst + 0, block[0], stride); \ 50cabdff1aSopenharmony_ci name ## _idct_dc_add_c(dst + 4, block[1], stride); \ 51cabdff1aSopenharmony_ci name ## _idct_dc_add_c(dst + 8, block[2], stride); \ 52cabdff1aSopenharmony_ci name ## _idct_dc_add_c(dst + 12, block[3], stride); \ 53cabdff1aSopenharmony_ci} 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_ci#if CONFIG_VP7_DECODER 56cabdff1aSopenharmony_cistatic void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) 57cabdff1aSopenharmony_ci{ 58cabdff1aSopenharmony_ci int i; 59cabdff1aSopenharmony_ci unsigned a1, b1, c1, d1; 60cabdff1aSopenharmony_ci int16_t tmp[16]; 61cabdff1aSopenharmony_ci 62cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 63cabdff1aSopenharmony_ci a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170; 64cabdff1aSopenharmony_ci b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170; 65cabdff1aSopenharmony_ci c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274; 66cabdff1aSopenharmony_ci d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540; 67cabdff1aSopenharmony_ci tmp[i * 4 + 0] = (int)(a1 + d1) >> 14; 68cabdff1aSopenharmony_ci tmp[i * 4 + 3] = (int)(a1 - d1) >> 14; 69cabdff1aSopenharmony_ci tmp[i * 4 + 1] = (int)(b1 + c1) >> 14; 70cabdff1aSopenharmony_ci tmp[i * 4 + 2] = (int)(b1 - c1) >> 14; 71cabdff1aSopenharmony_ci } 72cabdff1aSopenharmony_ci 73cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 74cabdff1aSopenharmony_ci a1 = (tmp[i + 0] + tmp[i + 8]) * 23170; 75cabdff1aSopenharmony_ci b1 = (tmp[i + 0] - tmp[i + 8]) * 23170; 76cabdff1aSopenharmony_ci c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274; 77cabdff1aSopenharmony_ci d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540; 78cabdff1aSopenharmony_ci AV_ZERO64(dc + i * 4); 79cabdff1aSopenharmony_ci block[0][i][0] = (int)(a1 + d1 + 0x20000) >> 18; 80cabdff1aSopenharmony_ci block[3][i][0] = (int)(a1 - d1 + 0x20000) >> 18; 81cabdff1aSopenharmony_ci block[1][i][0] = (int)(b1 + c1 + 0x20000) >> 18; 82cabdff1aSopenharmony_ci block[2][i][0] = (int)(b1 - c1 + 0x20000) >> 18; 83cabdff1aSopenharmony_ci } 84cabdff1aSopenharmony_ci} 85cabdff1aSopenharmony_ci 86cabdff1aSopenharmony_cistatic void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) 87cabdff1aSopenharmony_ci{ 88cabdff1aSopenharmony_ci int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18; 89cabdff1aSopenharmony_ci dc[0] = 0; 90cabdff1aSopenharmony_ci 91cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 92cabdff1aSopenharmony_ci block[i][0][0] = val; 93cabdff1aSopenharmony_ci block[i][1][0] = val; 94cabdff1aSopenharmony_ci block[i][2][0] = val; 95cabdff1aSopenharmony_ci block[i][3][0] = val; 96cabdff1aSopenharmony_ci } 97cabdff1aSopenharmony_ci} 98cabdff1aSopenharmony_ci 99cabdff1aSopenharmony_cistatic void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) 100cabdff1aSopenharmony_ci{ 101cabdff1aSopenharmony_ci int i; 102cabdff1aSopenharmony_ci unsigned a1, b1, c1, d1; 103cabdff1aSopenharmony_ci int16_t tmp[16]; 104cabdff1aSopenharmony_ci 105cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 106cabdff1aSopenharmony_ci a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170; 107cabdff1aSopenharmony_ci b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170; 108cabdff1aSopenharmony_ci c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274; 109cabdff1aSopenharmony_ci d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540; 110cabdff1aSopenharmony_ci AV_ZERO64(block + i * 4); 111cabdff1aSopenharmony_ci tmp[i * 4 + 0] = (int)(a1 + d1) >> 14; 112cabdff1aSopenharmony_ci tmp[i * 4 + 3] = (int)(a1 - d1) >> 14; 113cabdff1aSopenharmony_ci tmp[i * 4 + 1] = (int)(b1 + c1) >> 14; 114cabdff1aSopenharmony_ci tmp[i * 4 + 2] = (int)(b1 - c1) >> 14; 115cabdff1aSopenharmony_ci } 116cabdff1aSopenharmony_ci 117cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 118cabdff1aSopenharmony_ci a1 = (tmp[i + 0] + tmp[i + 8]) * 23170; 119cabdff1aSopenharmony_ci b1 = (tmp[i + 0] - tmp[i + 8]) * 23170; 120cabdff1aSopenharmony_ci c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274; 121cabdff1aSopenharmony_ci d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540; 122cabdff1aSopenharmony_ci dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] + 123cabdff1aSopenharmony_ci ((int)(a1 + d1 + 0x20000) >> 18)); 124cabdff1aSopenharmony_ci dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] + 125cabdff1aSopenharmony_ci ((int)(a1 - d1 + 0x20000) >> 18)); 126cabdff1aSopenharmony_ci dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] + 127cabdff1aSopenharmony_ci ((int)(b1 + c1 + 0x20000) >> 18)); 128cabdff1aSopenharmony_ci dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] + 129cabdff1aSopenharmony_ci ((int)(b1 - c1 + 0x20000) >> 18)); 130cabdff1aSopenharmony_ci } 131cabdff1aSopenharmony_ci} 132cabdff1aSopenharmony_ci 133cabdff1aSopenharmony_cistatic void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) 134cabdff1aSopenharmony_ci{ 135cabdff1aSopenharmony_ci int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18; 136cabdff1aSopenharmony_ci block[0] = 0; 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 139cabdff1aSopenharmony_ci dst[0] = av_clip_uint8(dst[0] + dc); 140cabdff1aSopenharmony_ci dst[1] = av_clip_uint8(dst[1] + dc); 141cabdff1aSopenharmony_ci dst[2] = av_clip_uint8(dst[2] + dc); 142cabdff1aSopenharmony_ci dst[3] = av_clip_uint8(dst[3] + dc); 143cabdff1aSopenharmony_ci dst += stride; 144cabdff1aSopenharmony_ci } 145cabdff1aSopenharmony_ci} 146cabdff1aSopenharmony_ci 147cabdff1aSopenharmony_ciMK_IDCT_DC_ADD4_C(vp7) 148cabdff1aSopenharmony_ci#endif /* CONFIG_VP7_DECODER */ 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ci// TODO: Maybe add dequant 151cabdff1aSopenharmony_ci#if CONFIG_VP8_DECODER 152cabdff1aSopenharmony_cistatic void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) 153cabdff1aSopenharmony_ci{ 154cabdff1aSopenharmony_ci int i, t0, t1, t2, t3; 155cabdff1aSopenharmony_ci 156cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 157cabdff1aSopenharmony_ci t0 = dc[0 * 4 + i] + dc[3 * 4 + i]; 158cabdff1aSopenharmony_ci t1 = dc[1 * 4 + i] + dc[2 * 4 + i]; 159cabdff1aSopenharmony_ci t2 = dc[1 * 4 + i] - dc[2 * 4 + i]; 160cabdff1aSopenharmony_ci t3 = dc[0 * 4 + i] - dc[3 * 4 + i]; 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci dc[0 * 4 + i] = t0 + t1; 163cabdff1aSopenharmony_ci dc[1 * 4 + i] = t3 + t2; 164cabdff1aSopenharmony_ci dc[2 * 4 + i] = t0 - t1; 165cabdff1aSopenharmony_ci dc[3 * 4 + i] = t3 - t2; 166cabdff1aSopenharmony_ci } 167cabdff1aSopenharmony_ci 168cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 169cabdff1aSopenharmony_ci t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding 170cabdff1aSopenharmony_ci t1 = dc[i * 4 + 1] + dc[i * 4 + 2]; 171cabdff1aSopenharmony_ci t2 = dc[i * 4 + 1] - dc[i * 4 + 2]; 172cabdff1aSopenharmony_ci t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding 173cabdff1aSopenharmony_ci AV_ZERO64(dc + i * 4); 174cabdff1aSopenharmony_ci 175cabdff1aSopenharmony_ci block[i][0][0] = (t0 + t1) >> 3; 176cabdff1aSopenharmony_ci block[i][1][0] = (t3 + t2) >> 3; 177cabdff1aSopenharmony_ci block[i][2][0] = (t0 - t1) >> 3; 178cabdff1aSopenharmony_ci block[i][3][0] = (t3 - t2) >> 3; 179cabdff1aSopenharmony_ci } 180cabdff1aSopenharmony_ci} 181cabdff1aSopenharmony_ci 182cabdff1aSopenharmony_cistatic void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) 183cabdff1aSopenharmony_ci{ 184cabdff1aSopenharmony_ci int i, val = (dc[0] + 3) >> 3; 185cabdff1aSopenharmony_ci dc[0] = 0; 186cabdff1aSopenharmony_ci 187cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 188cabdff1aSopenharmony_ci block[i][0][0] = val; 189cabdff1aSopenharmony_ci block[i][1][0] = val; 190cabdff1aSopenharmony_ci block[i][2][0] = val; 191cabdff1aSopenharmony_ci block[i][3][0] = val; 192cabdff1aSopenharmony_ci } 193cabdff1aSopenharmony_ci} 194cabdff1aSopenharmony_ci 195cabdff1aSopenharmony_ci#define MUL_20091(a) ((((a) * 20091) >> 16) + (a)) 196cabdff1aSopenharmony_ci#define MUL_35468(a) (((a) * 35468) >> 16) 197cabdff1aSopenharmony_ci 198cabdff1aSopenharmony_cistatic void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) 199cabdff1aSopenharmony_ci{ 200cabdff1aSopenharmony_ci int i, t0, t1, t2, t3; 201cabdff1aSopenharmony_ci int16_t tmp[16]; 202cabdff1aSopenharmony_ci 203cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 204cabdff1aSopenharmony_ci t0 = block[0 * 4 + i] + block[2 * 4 + i]; 205cabdff1aSopenharmony_ci t1 = block[0 * 4 + i] - block[2 * 4 + i]; 206cabdff1aSopenharmony_ci t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]); 207cabdff1aSopenharmony_ci t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]); 208cabdff1aSopenharmony_ci block[0 * 4 + i] = 0; 209cabdff1aSopenharmony_ci block[1 * 4 + i] = 0; 210cabdff1aSopenharmony_ci block[2 * 4 + i] = 0; 211cabdff1aSopenharmony_ci block[3 * 4 + i] = 0; 212cabdff1aSopenharmony_ci 213cabdff1aSopenharmony_ci tmp[i * 4 + 0] = t0 + t3; 214cabdff1aSopenharmony_ci tmp[i * 4 + 1] = t1 + t2; 215cabdff1aSopenharmony_ci tmp[i * 4 + 2] = t1 - t2; 216cabdff1aSopenharmony_ci tmp[i * 4 + 3] = t0 - t3; 217cabdff1aSopenharmony_ci } 218cabdff1aSopenharmony_ci 219cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 220cabdff1aSopenharmony_ci t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i]; 221cabdff1aSopenharmony_ci t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i]; 222cabdff1aSopenharmony_ci t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]); 223cabdff1aSopenharmony_ci t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]); 224cabdff1aSopenharmony_ci 225cabdff1aSopenharmony_ci dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); 226cabdff1aSopenharmony_ci dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); 227cabdff1aSopenharmony_ci dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); 228cabdff1aSopenharmony_ci dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); 229cabdff1aSopenharmony_ci dst += stride; 230cabdff1aSopenharmony_ci } 231cabdff1aSopenharmony_ci} 232cabdff1aSopenharmony_ci 233cabdff1aSopenharmony_cistatic void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) 234cabdff1aSopenharmony_ci{ 235cabdff1aSopenharmony_ci int i, dc = (block[0] + 4) >> 3; 236cabdff1aSopenharmony_ci block[0] = 0; 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 239cabdff1aSopenharmony_ci dst[0] = av_clip_uint8(dst[0] + dc); 240cabdff1aSopenharmony_ci dst[1] = av_clip_uint8(dst[1] + dc); 241cabdff1aSopenharmony_ci dst[2] = av_clip_uint8(dst[2] + dc); 242cabdff1aSopenharmony_ci dst[3] = av_clip_uint8(dst[3] + dc); 243cabdff1aSopenharmony_ci dst += stride; 244cabdff1aSopenharmony_ci } 245cabdff1aSopenharmony_ci} 246cabdff1aSopenharmony_ci 247cabdff1aSopenharmony_ciMK_IDCT_DC_ADD4_C(vp8) 248cabdff1aSopenharmony_ci#endif /* CONFIG_VP8_DECODER */ 249cabdff1aSopenharmony_ci 250cabdff1aSopenharmony_ci// because I like only having two parameters to pass functions... 251cabdff1aSopenharmony_ci#define LOAD_PIXELS \ 252cabdff1aSopenharmony_ci int av_unused p3 = p[-4 * stride]; \ 253cabdff1aSopenharmony_ci int av_unused p2 = p[-3 * stride]; \ 254cabdff1aSopenharmony_ci int av_unused p1 = p[-2 * stride]; \ 255cabdff1aSopenharmony_ci int av_unused p0 = p[-1 * stride]; \ 256cabdff1aSopenharmony_ci int av_unused q0 = p[ 0 * stride]; \ 257cabdff1aSopenharmony_ci int av_unused q1 = p[ 1 * stride]; \ 258cabdff1aSopenharmony_ci int av_unused q2 = p[ 2 * stride]; \ 259cabdff1aSopenharmony_ci int av_unused q3 = p[ 3 * stride]; 260cabdff1aSopenharmony_ci 261cabdff1aSopenharmony_ci#define clip_int8(n) (cm[(n) + 0x80] - 0x80) 262cabdff1aSopenharmony_ci 263cabdff1aSopenharmony_cistatic av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, 264cabdff1aSopenharmony_ci int is4tap, int is_vp7) 265cabdff1aSopenharmony_ci{ 266cabdff1aSopenharmony_ci LOAD_PIXELS 267cabdff1aSopenharmony_ci int a, f1, f2; 268cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 269cabdff1aSopenharmony_ci 270cabdff1aSopenharmony_ci a = 3 * (q0 - p0); 271cabdff1aSopenharmony_ci 272cabdff1aSopenharmony_ci if (is4tap) 273cabdff1aSopenharmony_ci a += clip_int8(p1 - q1); 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_ci a = clip_int8(a); 276cabdff1aSopenharmony_ci 277cabdff1aSopenharmony_ci // We deviate from the spec here with c(a+3) >> 3 278cabdff1aSopenharmony_ci // since that's what libvpx does. 279cabdff1aSopenharmony_ci f1 = FFMIN(a + 4, 127) >> 3; 280cabdff1aSopenharmony_ci 281cabdff1aSopenharmony_ci if (is_vp7) 282cabdff1aSopenharmony_ci f2 = f1 - ((a & 7) == 4); 283cabdff1aSopenharmony_ci else 284cabdff1aSopenharmony_ci f2 = FFMIN(a + 3, 127) >> 3; 285cabdff1aSopenharmony_ci 286cabdff1aSopenharmony_ci // Despite what the spec says, we do need to clamp here to 287cabdff1aSopenharmony_ci // be bitexact with libvpx. 288cabdff1aSopenharmony_ci p[-1 * stride] = cm[p0 + f2]; 289cabdff1aSopenharmony_ci p[ 0 * stride] = cm[q0 - f1]; 290cabdff1aSopenharmony_ci 291cabdff1aSopenharmony_ci // only used for _inner on blocks without high edge variance 292cabdff1aSopenharmony_ci if (!is4tap) { 293cabdff1aSopenharmony_ci a = (f1 + 1) >> 1; 294cabdff1aSopenharmony_ci p[-2 * stride] = cm[p1 + a]; 295cabdff1aSopenharmony_ci p[ 1 * stride] = cm[q1 - a]; 296cabdff1aSopenharmony_ci } 297cabdff1aSopenharmony_ci} 298cabdff1aSopenharmony_ci 299cabdff1aSopenharmony_cistatic av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride, 300cabdff1aSopenharmony_ci int is4tap) 301cabdff1aSopenharmony_ci{ 302cabdff1aSopenharmony_ci filter_common(p, stride, is4tap, IS_VP7); 303cabdff1aSopenharmony_ci} 304cabdff1aSopenharmony_ci 305cabdff1aSopenharmony_cistatic av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride, 306cabdff1aSopenharmony_ci int is4tap) 307cabdff1aSopenharmony_ci{ 308cabdff1aSopenharmony_ci filter_common(p, stride, is4tap, IS_VP8); 309cabdff1aSopenharmony_ci} 310cabdff1aSopenharmony_ci 311cabdff1aSopenharmony_cistatic av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride, 312cabdff1aSopenharmony_ci int flim) 313cabdff1aSopenharmony_ci{ 314cabdff1aSopenharmony_ci LOAD_PIXELS 315cabdff1aSopenharmony_ci return FFABS(p0 - q0) <= flim; 316cabdff1aSopenharmony_ci} 317cabdff1aSopenharmony_ci 318cabdff1aSopenharmony_cistatic av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, 319cabdff1aSopenharmony_ci int flim) 320cabdff1aSopenharmony_ci{ 321cabdff1aSopenharmony_ci LOAD_PIXELS 322cabdff1aSopenharmony_ci return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim; 323cabdff1aSopenharmony_ci} 324cabdff1aSopenharmony_ci 325cabdff1aSopenharmony_ci/** 326cabdff1aSopenharmony_ci * E - limit at the macroblock edge 327cabdff1aSopenharmony_ci * I - limit for interior difference 328cabdff1aSopenharmony_ci */ 329cabdff1aSopenharmony_ci#define NORMAL_LIMIT(vpn) \ 330cabdff1aSopenharmony_cistatic av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p, \ 331cabdff1aSopenharmony_ci ptrdiff_t stride, \ 332cabdff1aSopenharmony_ci int E, int I) \ 333cabdff1aSopenharmony_ci{ \ 334cabdff1aSopenharmony_ci LOAD_PIXELS \ 335cabdff1aSopenharmony_ci return vp ## vpn ## _simple_limit(p, stride, E) && \ 336cabdff1aSopenharmony_ci FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \ 337cabdff1aSopenharmony_ci FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \ 338cabdff1aSopenharmony_ci FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \ 339cabdff1aSopenharmony_ci} 340cabdff1aSopenharmony_ci 341cabdff1aSopenharmony_ciNORMAL_LIMIT(7) 342cabdff1aSopenharmony_ciNORMAL_LIMIT(8) 343cabdff1aSopenharmony_ci 344cabdff1aSopenharmony_ci// high edge variance 345cabdff1aSopenharmony_cistatic av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh) 346cabdff1aSopenharmony_ci{ 347cabdff1aSopenharmony_ci LOAD_PIXELS 348cabdff1aSopenharmony_ci return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh; 349cabdff1aSopenharmony_ci} 350cabdff1aSopenharmony_ci 351cabdff1aSopenharmony_cistatic av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) 352cabdff1aSopenharmony_ci{ 353cabdff1aSopenharmony_ci int a0, a1, a2, w; 354cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; 355cabdff1aSopenharmony_ci 356cabdff1aSopenharmony_ci LOAD_PIXELS 357cabdff1aSopenharmony_ci 358cabdff1aSopenharmony_ci w = clip_int8(p1 - q1); 359cabdff1aSopenharmony_ci w = clip_int8(w + 3 * (q0 - p0)); 360cabdff1aSopenharmony_ci 361cabdff1aSopenharmony_ci a0 = (27 * w + 63) >> 7; 362cabdff1aSopenharmony_ci a1 = (18 * w + 63) >> 7; 363cabdff1aSopenharmony_ci a2 = (9 * w + 63) >> 7; 364cabdff1aSopenharmony_ci 365cabdff1aSopenharmony_ci p[-3 * stride] = cm[p2 + a2]; 366cabdff1aSopenharmony_ci p[-2 * stride] = cm[p1 + a1]; 367cabdff1aSopenharmony_ci p[-1 * stride] = cm[p0 + a0]; 368cabdff1aSopenharmony_ci p[ 0 * stride] = cm[q0 - a0]; 369cabdff1aSopenharmony_ci p[ 1 * stride] = cm[q1 - a1]; 370cabdff1aSopenharmony_ci p[ 2 * stride] = cm[q2 - a2]; 371cabdff1aSopenharmony_ci} 372cabdff1aSopenharmony_ci 373cabdff1aSopenharmony_ci#define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline) \ 374cabdff1aSopenharmony_cistatic maybe_inline \ 375cabdff1aSopenharmony_civoid vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \ 376cabdff1aSopenharmony_ci ptrdiff_t stride, \ 377cabdff1aSopenharmony_ci int flim_E, int flim_I, \ 378cabdff1aSopenharmony_ci int hev_thresh) \ 379cabdff1aSopenharmony_ci{ \ 380cabdff1aSopenharmony_ci int i; \ 381cabdff1aSopenharmony_ci for (i = 0; i < size; i++) \ 382cabdff1aSopenharmony_ci if (vpn ## _normal_limit(dst + i * stridea, strideb, \ 383cabdff1aSopenharmony_ci flim_E, flim_I)) { \ 384cabdff1aSopenharmony_ci if (hev(dst + i * stridea, strideb, hev_thresh)) \ 385cabdff1aSopenharmony_ci vpn ## _filter_common(dst + i * stridea, strideb, 1); \ 386cabdff1aSopenharmony_ci else \ 387cabdff1aSopenharmony_ci filter_mbedge(dst + i * stridea, strideb); \ 388cabdff1aSopenharmony_ci } \ 389cabdff1aSopenharmony_ci} \ 390cabdff1aSopenharmony_ci \ 391cabdff1aSopenharmony_cistatic maybe_inline \ 392cabdff1aSopenharmony_civoid vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \ 393cabdff1aSopenharmony_ci ptrdiff_t stride, \ 394cabdff1aSopenharmony_ci int flim_E, \ 395cabdff1aSopenharmony_ci int flim_I, \ 396cabdff1aSopenharmony_ci int hev_thresh) \ 397cabdff1aSopenharmony_ci{ \ 398cabdff1aSopenharmony_ci int i; \ 399cabdff1aSopenharmony_ci for (i = 0; i < size; i++) \ 400cabdff1aSopenharmony_ci if (vpn ## _normal_limit(dst + i * stridea, strideb, \ 401cabdff1aSopenharmony_ci flim_E, flim_I)) { \ 402cabdff1aSopenharmony_ci int hv = hev(dst + i * stridea, strideb, hev_thresh); \ 403cabdff1aSopenharmony_ci if (hv) \ 404cabdff1aSopenharmony_ci vpn ## _filter_common(dst + i * stridea, strideb, 1); \ 405cabdff1aSopenharmony_ci else \ 406cabdff1aSopenharmony_ci vpn ## _filter_common(dst + i * stridea, strideb, 0); \ 407cabdff1aSopenharmony_ci } \ 408cabdff1aSopenharmony_ci} 409cabdff1aSopenharmony_ci 410cabdff1aSopenharmony_ci#define UV_LOOP_FILTER(vpn, dir, stridea, strideb) \ 411cabdff1aSopenharmony_ciLOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline) \ 412cabdff1aSopenharmony_cistatic void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU, \ 413cabdff1aSopenharmony_ci uint8_t *dstV, \ 414cabdff1aSopenharmony_ci ptrdiff_t stride, int fE, \ 415cabdff1aSopenharmony_ci int fI, int hev_thresh) \ 416cabdff1aSopenharmony_ci{ \ 417cabdff1aSopenharmony_ci vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ 418cabdff1aSopenharmony_ci vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ 419cabdff1aSopenharmony_ci} \ 420cabdff1aSopenharmony_ci \ 421cabdff1aSopenharmony_cistatic void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \ 422cabdff1aSopenharmony_ci uint8_t *dstV, \ 423cabdff1aSopenharmony_ci ptrdiff_t stride, \ 424cabdff1aSopenharmony_ci int fE, int fI, \ 425cabdff1aSopenharmony_ci int hev_thresh) \ 426cabdff1aSopenharmony_ci{ \ 427cabdff1aSopenharmony_ci vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \ 428cabdff1aSopenharmony_ci hev_thresh); \ 429cabdff1aSopenharmony_ci vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \ 430cabdff1aSopenharmony_ci hev_thresh); \ 431cabdff1aSopenharmony_ci} 432cabdff1aSopenharmony_ci 433cabdff1aSopenharmony_ci#define LOOP_FILTER_SIMPLE(vpn) \ 434cabdff1aSopenharmony_cistatic void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \ 435cabdff1aSopenharmony_ci int flim) \ 436cabdff1aSopenharmony_ci{ \ 437cabdff1aSopenharmony_ci int i; \ 438cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) \ 439cabdff1aSopenharmony_ci if (vpn ## _simple_limit(dst + i, stride, flim)) \ 440cabdff1aSopenharmony_ci vpn ## _filter_common(dst + i, stride, 1); \ 441cabdff1aSopenharmony_ci} \ 442cabdff1aSopenharmony_ci \ 443cabdff1aSopenharmony_cistatic void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \ 444cabdff1aSopenharmony_ci int flim) \ 445cabdff1aSopenharmony_ci{ \ 446cabdff1aSopenharmony_ci int i; \ 447cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) \ 448cabdff1aSopenharmony_ci if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \ 449cabdff1aSopenharmony_ci vpn ## _filter_common(dst + i * stride, 1, 1); \ 450cabdff1aSopenharmony_ci} 451cabdff1aSopenharmony_ci 452cabdff1aSopenharmony_ci#define LOOP_FILTERS(vpn) \ 453cabdff1aSopenharmony_ci LOOP_FILTER(vpn, v, 16, 1, stride, ) \ 454cabdff1aSopenharmony_ci LOOP_FILTER(vpn, h, 16, stride, 1, ) \ 455cabdff1aSopenharmony_ci UV_LOOP_FILTER(vpn, v, 1, stride) \ 456cabdff1aSopenharmony_ci UV_LOOP_FILTER(vpn, h, stride, 1) \ 457cabdff1aSopenharmony_ci LOOP_FILTER_SIMPLE(vpn) \ 458cabdff1aSopenharmony_ci 459cabdff1aSopenharmony_cistatic const uint8_t subpel_filters[7][6] = { 460cabdff1aSopenharmony_ci { 0, 6, 123, 12, 1, 0 }, 461cabdff1aSopenharmony_ci { 2, 11, 108, 36, 8, 1 }, 462cabdff1aSopenharmony_ci { 0, 9, 93, 50, 6, 0 }, 463cabdff1aSopenharmony_ci { 3, 16, 77, 77, 16, 3 }, 464cabdff1aSopenharmony_ci { 0, 6, 50, 93, 9, 0 }, 465cabdff1aSopenharmony_ci { 1, 8, 36, 108, 11, 2 }, 466cabdff1aSopenharmony_ci { 0, 1, 12, 123, 6, 0 }, 467cabdff1aSopenharmony_ci}; 468cabdff1aSopenharmony_ci 469cabdff1aSopenharmony_ci#define PUT_PIXELS(WIDTH) \ 470cabdff1aSopenharmony_cistatic void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \ 471cabdff1aSopenharmony_ci uint8_t *src, ptrdiff_t srcstride, \ 472cabdff1aSopenharmony_ci int h, int x, int y) \ 473cabdff1aSopenharmony_ci{ \ 474cabdff1aSopenharmony_ci int i; \ 475cabdff1aSopenharmony_ci for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ 476cabdff1aSopenharmony_ci memcpy(dst, src, WIDTH); \ 477cabdff1aSopenharmony_ci} 478cabdff1aSopenharmony_ci 479cabdff1aSopenharmony_ciPUT_PIXELS(16) 480cabdff1aSopenharmony_ciPUT_PIXELS(8) 481cabdff1aSopenharmony_ciPUT_PIXELS(4) 482cabdff1aSopenharmony_ci 483cabdff1aSopenharmony_ci#define FILTER_6TAP(src, F, stride) \ 484cabdff1aSopenharmony_ci cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ 485cabdff1aSopenharmony_ci F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \ 486cabdff1aSopenharmony_ci F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7] 487cabdff1aSopenharmony_ci 488cabdff1aSopenharmony_ci#define FILTER_4TAP(src, F, stride) \ 489cabdff1aSopenharmony_ci cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ 490cabdff1aSopenharmony_ci F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7] 491cabdff1aSopenharmony_ci 492cabdff1aSopenharmony_ci#define VP8_EPEL_H(SIZE, TAPS) \ 493cabdff1aSopenharmony_cistatic void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \ 494cabdff1aSopenharmony_ci ptrdiff_t dststride, \ 495cabdff1aSopenharmony_ci uint8_t *src, \ 496cabdff1aSopenharmony_ci ptrdiff_t srcstride, \ 497cabdff1aSopenharmony_ci int h, int mx, int my) \ 498cabdff1aSopenharmony_ci{ \ 499cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; \ 500cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ 501cabdff1aSopenharmony_ci int x, y; \ 502cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { \ 503cabdff1aSopenharmony_ci for (x = 0; x < SIZE; x++) \ 504cabdff1aSopenharmony_ci dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ 505cabdff1aSopenharmony_ci dst += dststride; \ 506cabdff1aSopenharmony_ci src += srcstride; \ 507cabdff1aSopenharmony_ci } \ 508cabdff1aSopenharmony_ci} 509cabdff1aSopenharmony_ci 510cabdff1aSopenharmony_ci#define VP8_EPEL_V(SIZE, TAPS) \ 511cabdff1aSopenharmony_cistatic void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \ 512cabdff1aSopenharmony_ci ptrdiff_t dststride, \ 513cabdff1aSopenharmony_ci uint8_t *src, \ 514cabdff1aSopenharmony_ci ptrdiff_t srcstride, \ 515cabdff1aSopenharmony_ci int h, int mx, int my) \ 516cabdff1aSopenharmony_ci{ \ 517cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[my - 1]; \ 518cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ 519cabdff1aSopenharmony_ci int x, y; \ 520cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { \ 521cabdff1aSopenharmony_ci for (x = 0; x < SIZE; x++) \ 522cabdff1aSopenharmony_ci dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ 523cabdff1aSopenharmony_ci dst += dststride; \ 524cabdff1aSopenharmony_ci src += srcstride; \ 525cabdff1aSopenharmony_ci } \ 526cabdff1aSopenharmony_ci} 527cabdff1aSopenharmony_ci 528cabdff1aSopenharmony_ci#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ 529cabdff1aSopenharmony_cistatic void \ 530cabdff1aSopenharmony_ciput_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \ 531cabdff1aSopenharmony_ci ptrdiff_t dststride, \ 532cabdff1aSopenharmony_ci uint8_t *src, \ 533cabdff1aSopenharmony_ci ptrdiff_t srcstride, \ 534cabdff1aSopenharmony_ci int h, int mx, \ 535cabdff1aSopenharmony_ci int my) \ 536cabdff1aSopenharmony_ci{ \ 537cabdff1aSopenharmony_ci const uint8_t *filter = subpel_filters[mx - 1]; \ 538cabdff1aSopenharmony_ci const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ 539cabdff1aSopenharmony_ci int x, y; \ 540cabdff1aSopenharmony_ci uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ 541cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; \ 542cabdff1aSopenharmony_ci src -= (2 - (VTAPS == 4)) * srcstride; \ 543cabdff1aSopenharmony_ci \ 544cabdff1aSopenharmony_ci for (y = 0; y < h + VTAPS - 1; y++) { \ 545cabdff1aSopenharmony_ci for (x = 0; x < SIZE; x++) \ 546cabdff1aSopenharmony_ci tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ 547cabdff1aSopenharmony_ci tmp += SIZE; \ 548cabdff1aSopenharmony_ci src += srcstride; \ 549cabdff1aSopenharmony_ci } \ 550cabdff1aSopenharmony_ci tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ 551cabdff1aSopenharmony_ci filter = subpel_filters[my - 1]; \ 552cabdff1aSopenharmony_ci \ 553cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { \ 554cabdff1aSopenharmony_ci for (x = 0; x < SIZE; x++) \ 555cabdff1aSopenharmony_ci dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ 556cabdff1aSopenharmony_ci dst += dststride; \ 557cabdff1aSopenharmony_ci tmp += SIZE; \ 558cabdff1aSopenharmony_ci } \ 559cabdff1aSopenharmony_ci} 560cabdff1aSopenharmony_ci 561cabdff1aSopenharmony_ciVP8_EPEL_H(16, 4) 562cabdff1aSopenharmony_ciVP8_EPEL_H(8, 4) 563cabdff1aSopenharmony_ciVP8_EPEL_H(4, 4) 564cabdff1aSopenharmony_ciVP8_EPEL_H(16, 6) 565cabdff1aSopenharmony_ciVP8_EPEL_H(8, 6) 566cabdff1aSopenharmony_ciVP8_EPEL_H(4, 6) 567cabdff1aSopenharmony_ciVP8_EPEL_V(16, 4) 568cabdff1aSopenharmony_ciVP8_EPEL_V(8, 4) 569cabdff1aSopenharmony_ciVP8_EPEL_V(4, 4) 570cabdff1aSopenharmony_ciVP8_EPEL_V(16, 6) 571cabdff1aSopenharmony_ciVP8_EPEL_V(8, 6) 572cabdff1aSopenharmony_ciVP8_EPEL_V(4, 6) 573cabdff1aSopenharmony_ci 574cabdff1aSopenharmony_ciVP8_EPEL_HV(16, 4, 4) 575cabdff1aSopenharmony_ciVP8_EPEL_HV(8, 4, 4) 576cabdff1aSopenharmony_ciVP8_EPEL_HV(4, 4, 4) 577cabdff1aSopenharmony_ciVP8_EPEL_HV(16, 4, 6) 578cabdff1aSopenharmony_ciVP8_EPEL_HV(8, 4, 6) 579cabdff1aSopenharmony_ciVP8_EPEL_HV(4, 4, 6) 580cabdff1aSopenharmony_ciVP8_EPEL_HV(16, 6, 4) 581cabdff1aSopenharmony_ciVP8_EPEL_HV(8, 6, 4) 582cabdff1aSopenharmony_ciVP8_EPEL_HV(4, 6, 4) 583cabdff1aSopenharmony_ciVP8_EPEL_HV(16, 6, 6) 584cabdff1aSopenharmony_ciVP8_EPEL_HV(8, 6, 6) 585cabdff1aSopenharmony_ciVP8_EPEL_HV(4, 6, 6) 586cabdff1aSopenharmony_ci 587cabdff1aSopenharmony_ci#define VP8_BILINEAR(SIZE) \ 588cabdff1aSopenharmony_cistatic void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \ 589cabdff1aSopenharmony_ci uint8_t *src, ptrdiff_t sstride, \ 590cabdff1aSopenharmony_ci int h, int mx, int my) \ 591cabdff1aSopenharmony_ci{ \ 592cabdff1aSopenharmony_ci int a = 8 - mx, b = mx; \ 593cabdff1aSopenharmony_ci int x, y; \ 594cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { \ 595cabdff1aSopenharmony_ci for (x = 0; x < SIZE; x++) \ 596cabdff1aSopenharmony_ci dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ 597cabdff1aSopenharmony_ci dst += dstride; \ 598cabdff1aSopenharmony_ci src += sstride; \ 599cabdff1aSopenharmony_ci } \ 600cabdff1aSopenharmony_ci} \ 601cabdff1aSopenharmony_ci \ 602cabdff1aSopenharmony_cistatic void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \ 603cabdff1aSopenharmony_ci uint8_t *src, ptrdiff_t sstride, \ 604cabdff1aSopenharmony_ci int h, int mx, int my) \ 605cabdff1aSopenharmony_ci{ \ 606cabdff1aSopenharmony_ci int c = 8 - my, d = my; \ 607cabdff1aSopenharmony_ci int x, y; \ 608cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { \ 609cabdff1aSopenharmony_ci for (x = 0; x < SIZE; x++) \ 610cabdff1aSopenharmony_ci dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ 611cabdff1aSopenharmony_ci dst += dstride; \ 612cabdff1aSopenharmony_ci src += sstride; \ 613cabdff1aSopenharmony_ci } \ 614cabdff1aSopenharmony_ci} \ 615cabdff1aSopenharmony_ci \ 616cabdff1aSopenharmony_cistatic void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \ 617cabdff1aSopenharmony_ci ptrdiff_t dstride, \ 618cabdff1aSopenharmony_ci uint8_t *src, \ 619cabdff1aSopenharmony_ci ptrdiff_t sstride, \ 620cabdff1aSopenharmony_ci int h, int mx, int my) \ 621cabdff1aSopenharmony_ci{ \ 622cabdff1aSopenharmony_ci int a = 8 - mx, b = mx; \ 623cabdff1aSopenharmony_ci int c = 8 - my, d = my; \ 624cabdff1aSopenharmony_ci int x, y; \ 625cabdff1aSopenharmony_ci uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ 626cabdff1aSopenharmony_ci uint8_t *tmp = tmp_array; \ 627cabdff1aSopenharmony_ci for (y = 0; y < h + 1; y++) { \ 628cabdff1aSopenharmony_ci for (x = 0; x < SIZE; x++) \ 629cabdff1aSopenharmony_ci tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ 630cabdff1aSopenharmony_ci tmp += SIZE; \ 631cabdff1aSopenharmony_ci src += sstride; \ 632cabdff1aSopenharmony_ci } \ 633cabdff1aSopenharmony_ci tmp = tmp_array; \ 634cabdff1aSopenharmony_ci for (y = 0; y < h; y++) { \ 635cabdff1aSopenharmony_ci for (x = 0; x < SIZE; x++) \ 636cabdff1aSopenharmony_ci dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ 637cabdff1aSopenharmony_ci dst += dstride; \ 638cabdff1aSopenharmony_ci tmp += SIZE; \ 639cabdff1aSopenharmony_ci } \ 640cabdff1aSopenharmony_ci} 641cabdff1aSopenharmony_ci 642cabdff1aSopenharmony_ciVP8_BILINEAR(16) 643cabdff1aSopenharmony_ciVP8_BILINEAR(8) 644cabdff1aSopenharmony_ciVP8_BILINEAR(4) 645cabdff1aSopenharmony_ci 646cabdff1aSopenharmony_ci#define VP78_MC_FUNC(IDX, SIZE) \ 647cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ 648cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ 649cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ 650cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ 651cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ 652cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ 653cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ 654cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ 655cabdff1aSopenharmony_ci dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c 656cabdff1aSopenharmony_ci 657cabdff1aSopenharmony_ci#define VP78_BILINEAR_MC_FUNC(IDX, SIZE) \ 658cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ 659cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ 660cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ 661cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ 662cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ 663cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ 664cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ 665cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ 666cabdff1aSopenharmony_ci dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c 667cabdff1aSopenharmony_ci 668cabdff1aSopenharmony_ciav_cold void ff_vp78dsp_init(VP8DSPContext *dsp) 669cabdff1aSopenharmony_ci{ 670cabdff1aSopenharmony_ci VP78_MC_FUNC(0, 16); 671cabdff1aSopenharmony_ci VP78_MC_FUNC(1, 8); 672cabdff1aSopenharmony_ci VP78_MC_FUNC(2, 4); 673cabdff1aSopenharmony_ci 674cabdff1aSopenharmony_ci VP78_BILINEAR_MC_FUNC(0, 16); 675cabdff1aSopenharmony_ci VP78_BILINEAR_MC_FUNC(1, 8); 676cabdff1aSopenharmony_ci VP78_BILINEAR_MC_FUNC(2, 4); 677cabdff1aSopenharmony_ci 678cabdff1aSopenharmony_ci#if ARCH_AARCH64 679cabdff1aSopenharmony_ci ff_vp78dsp_init_aarch64(dsp); 680cabdff1aSopenharmony_ci#elif ARCH_ARM 681cabdff1aSopenharmony_ci ff_vp78dsp_init_arm(dsp); 682cabdff1aSopenharmony_ci#elif ARCH_PPC 683cabdff1aSopenharmony_ci ff_vp78dsp_init_ppc(dsp); 684cabdff1aSopenharmony_ci#elif ARCH_X86 685cabdff1aSopenharmony_ci ff_vp78dsp_init_x86(dsp); 686cabdff1aSopenharmony_ci#endif 687cabdff1aSopenharmony_ci} 688cabdff1aSopenharmony_ci 689cabdff1aSopenharmony_ci#if CONFIG_VP7_DECODER 690cabdff1aSopenharmony_ciLOOP_FILTERS(vp7) 691cabdff1aSopenharmony_ci 692cabdff1aSopenharmony_ciav_cold void ff_vp7dsp_init(VP8DSPContext *dsp) 693cabdff1aSopenharmony_ci{ 694cabdff1aSopenharmony_ci dsp->vp8_luma_dc_wht = vp7_luma_dc_wht_c; 695cabdff1aSopenharmony_ci dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c; 696cabdff1aSopenharmony_ci dsp->vp8_idct_add = vp7_idct_add_c; 697cabdff1aSopenharmony_ci dsp->vp8_idct_dc_add = vp7_idct_dc_add_c; 698cabdff1aSopenharmony_ci dsp->vp8_idct_dc_add4y = vp7_idct_dc_add4y_c; 699cabdff1aSopenharmony_ci dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c; 700cabdff1aSopenharmony_ci 701cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c; 702cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c; 703cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c; 704cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c; 705cabdff1aSopenharmony_ci 706cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c; 707cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c; 708cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c; 709cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c; 710cabdff1aSopenharmony_ci 711cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c; 712cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c; 713cabdff1aSopenharmony_ci} 714cabdff1aSopenharmony_ci#endif /* CONFIG_VP7_DECODER */ 715cabdff1aSopenharmony_ci 716cabdff1aSopenharmony_ci#if CONFIG_VP8_DECODER 717cabdff1aSopenharmony_ciLOOP_FILTERS(vp8) 718cabdff1aSopenharmony_ci 719cabdff1aSopenharmony_ciav_cold void ff_vp8dsp_init(VP8DSPContext *dsp) 720cabdff1aSopenharmony_ci{ 721cabdff1aSopenharmony_ci dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; 722cabdff1aSopenharmony_ci dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c; 723cabdff1aSopenharmony_ci dsp->vp8_idct_add = vp8_idct_add_c; 724cabdff1aSopenharmony_ci dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; 725cabdff1aSopenharmony_ci dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c; 726cabdff1aSopenharmony_ci dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c; 727cabdff1aSopenharmony_ci 728cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; 729cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; 730cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; 731cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; 732cabdff1aSopenharmony_ci 733cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; 734cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; 735cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; 736cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; 737cabdff1aSopenharmony_ci 738cabdff1aSopenharmony_ci dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; 739cabdff1aSopenharmony_ci dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; 740cabdff1aSopenharmony_ci 741cabdff1aSopenharmony_ci#if ARCH_AARCH64 742cabdff1aSopenharmony_ci ff_vp8dsp_init_aarch64(dsp); 743cabdff1aSopenharmony_ci#elif ARCH_ARM 744cabdff1aSopenharmony_ci ff_vp8dsp_init_arm(dsp); 745cabdff1aSopenharmony_ci#elif ARCH_X86 746cabdff1aSopenharmony_ci ff_vp8dsp_init_x86(dsp); 747cabdff1aSopenharmony_ci#elif ARCH_MIPS 748cabdff1aSopenharmony_ci ff_vp8dsp_init_mips(dsp); 749cabdff1aSopenharmony_ci#elif ARCH_LOONGARCH 750cabdff1aSopenharmony_ci ff_vp8dsp_init_loongarch(dsp); 751cabdff1aSopenharmony_ci#endif 752cabdff1aSopenharmony_ci} 753cabdff1aSopenharmony_ci#endif /* CONFIG_VP8_DECODER */ 754