1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * VP9 compatible video decoder 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> 5cabdff1aSopenharmony_ci * Copyright (C) 2013 Clément Bœsch <u pkh me> 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * This file is part of FFmpeg. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci * Lesser General Public License for more details. 18cabdff1aSopenharmony_ci * 19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "libavutil/common.h" 25cabdff1aSopenharmony_ci#include "bit_depth_template.c" 26cabdff1aSopenharmony_ci#include "vp9dsp.h" 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ci#if BIT_DEPTH != 12 29cabdff1aSopenharmony_ci 30cabdff1aSopenharmony_ci// FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8) 31cabdff1aSopenharmony_ci// back with h264pred.[ch] 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_cistatic void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride, 34cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 35cabdff1aSopenharmony_ci{ 36cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 37cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 38cabdff1aSopenharmony_ci pixel4 p4 = AV_RN4PA(top); 39cabdff1aSopenharmony_ci 40cabdff1aSopenharmony_ci stride /= sizeof(pixel); 41cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 0, p4); 42cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 1, p4); 43cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 2, p4); 44cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 3, p4); 45cabdff1aSopenharmony_ci} 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_cistatic void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride, 48cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 49cabdff1aSopenharmony_ci{ 50cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 51cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 52cabdff1aSopenharmony_ci pixel4 p4a = AV_RN4PA(top + 0); 53cabdff1aSopenharmony_ci pixel4 p4b = AV_RN4PA(top + 4); 54cabdff1aSopenharmony_ci int y; 55cabdff1aSopenharmony_ci 56cabdff1aSopenharmony_ci stride /= sizeof(pixel); 57cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 58cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, p4a); 59cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, p4b); 60cabdff1aSopenharmony_ci dst += stride; 61cabdff1aSopenharmony_ci } 62cabdff1aSopenharmony_ci} 63cabdff1aSopenharmony_ci 64cabdff1aSopenharmony_cistatic void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride, 65cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 66cabdff1aSopenharmony_ci{ 67cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 68cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 69cabdff1aSopenharmony_ci pixel4 p4a = AV_RN4PA(top + 0); 70cabdff1aSopenharmony_ci pixel4 p4b = AV_RN4PA(top + 4); 71cabdff1aSopenharmony_ci pixel4 p4c = AV_RN4PA(top + 8); 72cabdff1aSopenharmony_ci pixel4 p4d = AV_RN4PA(top + 12); 73cabdff1aSopenharmony_ci int y; 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_ci stride /= sizeof(pixel); 76cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 77cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, p4a); 78cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, p4b); 79cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, p4c); 80cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, p4d); 81cabdff1aSopenharmony_ci dst += stride; 82cabdff1aSopenharmony_ci } 83cabdff1aSopenharmony_ci} 84cabdff1aSopenharmony_ci 85cabdff1aSopenharmony_cistatic void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride, 86cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 87cabdff1aSopenharmony_ci{ 88cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 89cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 90cabdff1aSopenharmony_ci pixel4 p4a = AV_RN4PA(top + 0); 91cabdff1aSopenharmony_ci pixel4 p4b = AV_RN4PA(top + 4); 92cabdff1aSopenharmony_ci pixel4 p4c = AV_RN4PA(top + 8); 93cabdff1aSopenharmony_ci pixel4 p4d = AV_RN4PA(top + 12); 94cabdff1aSopenharmony_ci pixel4 p4e = AV_RN4PA(top + 16); 95cabdff1aSopenharmony_ci pixel4 p4f = AV_RN4PA(top + 20); 96cabdff1aSopenharmony_ci pixel4 p4g = AV_RN4PA(top + 24); 97cabdff1aSopenharmony_ci pixel4 p4h = AV_RN4PA(top + 28); 98cabdff1aSopenharmony_ci int y; 99cabdff1aSopenharmony_ci 100cabdff1aSopenharmony_ci stride /= sizeof(pixel); 101cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 102cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, p4a); 103cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, p4b); 104cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, p4c); 105cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, p4d); 106cabdff1aSopenharmony_ci AV_WN4PA(dst + 16, p4e); 107cabdff1aSopenharmony_ci AV_WN4PA(dst + 20, p4f); 108cabdff1aSopenharmony_ci AV_WN4PA(dst + 24, p4g); 109cabdff1aSopenharmony_ci AV_WN4PA(dst + 28, p4h); 110cabdff1aSopenharmony_ci dst += stride; 111cabdff1aSopenharmony_ci } 112cabdff1aSopenharmony_ci} 113cabdff1aSopenharmony_ci 114cabdff1aSopenharmony_cistatic void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride, 115cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 116cabdff1aSopenharmony_ci{ 117cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 118cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 119cabdff1aSopenharmony_ci 120cabdff1aSopenharmony_ci stride /= sizeof(pixel); 121cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 0, PIXEL_SPLAT_X4(left[3])); 122cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 1, PIXEL_SPLAT_X4(left[2])); 123cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 2, PIXEL_SPLAT_X4(left[1])); 124cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 3, PIXEL_SPLAT_X4(left[0])); 125cabdff1aSopenharmony_ci} 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_cistatic void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride, 128cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 129cabdff1aSopenharmony_ci{ 130cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 131cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 132cabdff1aSopenharmony_ci int y; 133cabdff1aSopenharmony_ci 134cabdff1aSopenharmony_ci stride /= sizeof(pixel); 135cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 136cabdff1aSopenharmony_ci pixel4 p4 = PIXEL_SPLAT_X4(left[7 - y]); 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, p4); 139cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, p4); 140cabdff1aSopenharmony_ci dst += stride; 141cabdff1aSopenharmony_ci } 142cabdff1aSopenharmony_ci} 143cabdff1aSopenharmony_ci 144cabdff1aSopenharmony_cistatic void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride, 145cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 146cabdff1aSopenharmony_ci{ 147cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 148cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 149cabdff1aSopenharmony_ci int y; 150cabdff1aSopenharmony_ci 151cabdff1aSopenharmony_ci stride /= sizeof(pixel); 152cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 153cabdff1aSopenharmony_ci pixel4 p4 = PIXEL_SPLAT_X4(left[15 - y]); 154cabdff1aSopenharmony_ci 155cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, p4); 156cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, p4); 157cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, p4); 158cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, p4); 159cabdff1aSopenharmony_ci dst += stride; 160cabdff1aSopenharmony_ci } 161cabdff1aSopenharmony_ci} 162cabdff1aSopenharmony_ci 163cabdff1aSopenharmony_cistatic void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride, 164cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 165cabdff1aSopenharmony_ci{ 166cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 167cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 168cabdff1aSopenharmony_ci int y; 169cabdff1aSopenharmony_ci 170cabdff1aSopenharmony_ci stride /= sizeof(pixel); 171cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 172cabdff1aSopenharmony_ci pixel4 p4 = PIXEL_SPLAT_X4(left[31 - y]); 173cabdff1aSopenharmony_ci 174cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, p4); 175cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, p4); 176cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, p4); 177cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, p4); 178cabdff1aSopenharmony_ci AV_WN4PA(dst + 16, p4); 179cabdff1aSopenharmony_ci AV_WN4PA(dst + 20, p4); 180cabdff1aSopenharmony_ci AV_WN4PA(dst + 24, p4); 181cabdff1aSopenharmony_ci AV_WN4PA(dst + 28, p4); 182cabdff1aSopenharmony_ci dst += stride; 183cabdff1aSopenharmony_ci } 184cabdff1aSopenharmony_ci} 185cabdff1aSopenharmony_ci 186cabdff1aSopenharmony_ci#endif /* BIT_DEPTH != 12 */ 187cabdff1aSopenharmony_ci 188cabdff1aSopenharmony_cistatic void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride, 189cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 190cabdff1aSopenharmony_ci{ 191cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 192cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 193cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 194cabdff1aSopenharmony_ci int y, tl = top[-1]; 195cabdff1aSopenharmony_ci 196cabdff1aSopenharmony_ci stride /= sizeof(pixel); 197cabdff1aSopenharmony_ci for (y = 0; y < 4; y++) { 198cabdff1aSopenharmony_ci int l_m_tl = left[3 - y] - tl; 199cabdff1aSopenharmony_ci 200cabdff1aSopenharmony_ci dst[0] = av_clip_pixel(top[0] + l_m_tl); 201cabdff1aSopenharmony_ci dst[1] = av_clip_pixel(top[1] + l_m_tl); 202cabdff1aSopenharmony_ci dst[2] = av_clip_pixel(top[2] + l_m_tl); 203cabdff1aSopenharmony_ci dst[3] = av_clip_pixel(top[3] + l_m_tl); 204cabdff1aSopenharmony_ci dst += stride; 205cabdff1aSopenharmony_ci } 206cabdff1aSopenharmony_ci} 207cabdff1aSopenharmony_ci 208cabdff1aSopenharmony_cistatic void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride, 209cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 210cabdff1aSopenharmony_ci{ 211cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 212cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 213cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 214cabdff1aSopenharmony_ci int y, tl = top[-1]; 215cabdff1aSopenharmony_ci 216cabdff1aSopenharmony_ci stride /= sizeof(pixel); 217cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 218cabdff1aSopenharmony_ci int l_m_tl = left[7 - y] - tl; 219cabdff1aSopenharmony_ci 220cabdff1aSopenharmony_ci dst[0] = av_clip_pixel(top[0] + l_m_tl); 221cabdff1aSopenharmony_ci dst[1] = av_clip_pixel(top[1] + l_m_tl); 222cabdff1aSopenharmony_ci dst[2] = av_clip_pixel(top[2] + l_m_tl); 223cabdff1aSopenharmony_ci dst[3] = av_clip_pixel(top[3] + l_m_tl); 224cabdff1aSopenharmony_ci dst[4] = av_clip_pixel(top[4] + l_m_tl); 225cabdff1aSopenharmony_ci dst[5] = av_clip_pixel(top[5] + l_m_tl); 226cabdff1aSopenharmony_ci dst[6] = av_clip_pixel(top[6] + l_m_tl); 227cabdff1aSopenharmony_ci dst[7] = av_clip_pixel(top[7] + l_m_tl); 228cabdff1aSopenharmony_ci dst += stride; 229cabdff1aSopenharmony_ci } 230cabdff1aSopenharmony_ci} 231cabdff1aSopenharmony_ci 232cabdff1aSopenharmony_cistatic void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride, 233cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 234cabdff1aSopenharmony_ci{ 235cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 236cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 237cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 238cabdff1aSopenharmony_ci int y, tl = top[-1]; 239cabdff1aSopenharmony_ci 240cabdff1aSopenharmony_ci stride /= sizeof(pixel); 241cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 242cabdff1aSopenharmony_ci int l_m_tl = left[15 - y] - tl; 243cabdff1aSopenharmony_ci 244cabdff1aSopenharmony_ci dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl); 245cabdff1aSopenharmony_ci dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl); 246cabdff1aSopenharmony_ci dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl); 247cabdff1aSopenharmony_ci dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl); 248cabdff1aSopenharmony_ci dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl); 249cabdff1aSopenharmony_ci dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl); 250cabdff1aSopenharmony_ci dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl); 251cabdff1aSopenharmony_ci dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl); 252cabdff1aSopenharmony_ci dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl); 253cabdff1aSopenharmony_ci dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl); 254cabdff1aSopenharmony_ci dst[10] = av_clip_pixel(top[10] + l_m_tl); 255cabdff1aSopenharmony_ci dst[11] = av_clip_pixel(top[11] + l_m_tl); 256cabdff1aSopenharmony_ci dst[12] = av_clip_pixel(top[12] + l_m_tl); 257cabdff1aSopenharmony_ci dst[13] = av_clip_pixel(top[13] + l_m_tl); 258cabdff1aSopenharmony_ci dst[14] = av_clip_pixel(top[14] + l_m_tl); 259cabdff1aSopenharmony_ci dst[15] = av_clip_pixel(top[15] + l_m_tl); 260cabdff1aSopenharmony_ci dst += stride; 261cabdff1aSopenharmony_ci } 262cabdff1aSopenharmony_ci} 263cabdff1aSopenharmony_ci 264cabdff1aSopenharmony_cistatic void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride, 265cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 266cabdff1aSopenharmony_ci{ 267cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 268cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 269cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 270cabdff1aSopenharmony_ci int y, tl = top[-1]; 271cabdff1aSopenharmony_ci 272cabdff1aSopenharmony_ci stride /= sizeof(pixel); 273cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 274cabdff1aSopenharmony_ci int l_m_tl = left[31 - y] - tl; 275cabdff1aSopenharmony_ci 276cabdff1aSopenharmony_ci dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl); 277cabdff1aSopenharmony_ci dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl); 278cabdff1aSopenharmony_ci dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl); 279cabdff1aSopenharmony_ci dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl); 280cabdff1aSopenharmony_ci dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl); 281cabdff1aSopenharmony_ci dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl); 282cabdff1aSopenharmony_ci dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl); 283cabdff1aSopenharmony_ci dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl); 284cabdff1aSopenharmony_ci dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl); 285cabdff1aSopenharmony_ci dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl); 286cabdff1aSopenharmony_ci dst[10] = av_clip_pixel(top[10] + l_m_tl); 287cabdff1aSopenharmony_ci dst[11] = av_clip_pixel(top[11] + l_m_tl); 288cabdff1aSopenharmony_ci dst[12] = av_clip_pixel(top[12] + l_m_tl); 289cabdff1aSopenharmony_ci dst[13] = av_clip_pixel(top[13] + l_m_tl); 290cabdff1aSopenharmony_ci dst[14] = av_clip_pixel(top[14] + l_m_tl); 291cabdff1aSopenharmony_ci dst[15] = av_clip_pixel(top[15] + l_m_tl); 292cabdff1aSopenharmony_ci dst[16] = av_clip_pixel(top[16] + l_m_tl); 293cabdff1aSopenharmony_ci dst[17] = av_clip_pixel(top[17] + l_m_tl); 294cabdff1aSopenharmony_ci dst[18] = av_clip_pixel(top[18] + l_m_tl); 295cabdff1aSopenharmony_ci dst[19] = av_clip_pixel(top[19] + l_m_tl); 296cabdff1aSopenharmony_ci dst[20] = av_clip_pixel(top[20] + l_m_tl); 297cabdff1aSopenharmony_ci dst[21] = av_clip_pixel(top[21] + l_m_tl); 298cabdff1aSopenharmony_ci dst[22] = av_clip_pixel(top[22] + l_m_tl); 299cabdff1aSopenharmony_ci dst[23] = av_clip_pixel(top[23] + l_m_tl); 300cabdff1aSopenharmony_ci dst[24] = av_clip_pixel(top[24] + l_m_tl); 301cabdff1aSopenharmony_ci dst[25] = av_clip_pixel(top[25] + l_m_tl); 302cabdff1aSopenharmony_ci dst[26] = av_clip_pixel(top[26] + l_m_tl); 303cabdff1aSopenharmony_ci dst[27] = av_clip_pixel(top[27] + l_m_tl); 304cabdff1aSopenharmony_ci dst[28] = av_clip_pixel(top[28] + l_m_tl); 305cabdff1aSopenharmony_ci dst[29] = av_clip_pixel(top[29] + l_m_tl); 306cabdff1aSopenharmony_ci dst[30] = av_clip_pixel(top[30] + l_m_tl); 307cabdff1aSopenharmony_ci dst[31] = av_clip_pixel(top[31] + l_m_tl); 308cabdff1aSopenharmony_ci dst += stride; 309cabdff1aSopenharmony_ci } 310cabdff1aSopenharmony_ci} 311cabdff1aSopenharmony_ci 312cabdff1aSopenharmony_ci#if BIT_DEPTH != 12 313cabdff1aSopenharmony_ci 314cabdff1aSopenharmony_cistatic void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride, 315cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 316cabdff1aSopenharmony_ci{ 317cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 318cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 319cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 320cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] + 321cabdff1aSopenharmony_ci top[0] + top[1] + top[2] + top[3] + 4) >> 3); 322cabdff1aSopenharmony_ci 323cabdff1aSopenharmony_ci stride /= sizeof(pixel); 324cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 0, dc); 325cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 1, dc); 326cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 2, dc); 327cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 3, dc); 328cabdff1aSopenharmony_ci} 329cabdff1aSopenharmony_ci 330cabdff1aSopenharmony_cistatic void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride, 331cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 332cabdff1aSopenharmony_ci{ 333cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 334cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 335cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 336cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 337cabdff1aSopenharmony_ci ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + 338cabdff1aSopenharmony_ci left[6] + left[7] + top[0] + top[1] + top[2] + top[3] + 339cabdff1aSopenharmony_ci top[4] + top[5] + top[6] + top[7] + 8) >> 4); 340cabdff1aSopenharmony_ci int y; 341cabdff1aSopenharmony_ci 342cabdff1aSopenharmony_ci stride /= sizeof(pixel); 343cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 344cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 345cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 346cabdff1aSopenharmony_ci dst += stride; 347cabdff1aSopenharmony_ci } 348cabdff1aSopenharmony_ci} 349cabdff1aSopenharmony_ci 350cabdff1aSopenharmony_cistatic void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride, 351cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 352cabdff1aSopenharmony_ci{ 353cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 354cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 355cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 356cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 357cabdff1aSopenharmony_ci ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] + 358cabdff1aSopenharmony_ci left[7] + left[8] + left[9] + left[10] + left[11] + left[12] + 359cabdff1aSopenharmony_ci left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] + 360cabdff1aSopenharmony_ci top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] + 361cabdff1aSopenharmony_ci top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5); 362cabdff1aSopenharmony_ci int y; 363cabdff1aSopenharmony_ci 364cabdff1aSopenharmony_ci stride /= sizeof(pixel); 365cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 366cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 367cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 368cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, dc); 369cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, dc); 370cabdff1aSopenharmony_ci dst += stride; 371cabdff1aSopenharmony_ci } 372cabdff1aSopenharmony_ci} 373cabdff1aSopenharmony_ci 374cabdff1aSopenharmony_cistatic void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride, 375cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 376cabdff1aSopenharmony_ci{ 377cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 378cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 379cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 380cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 381cabdff1aSopenharmony_ci ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] + 382cabdff1aSopenharmony_ci left[7] + left[8] + left[9] + left[10] + left[11] + left[12] + 383cabdff1aSopenharmony_ci left[13] + left[14] + left[15] + left[16] + left[17] + left[18] + 384cabdff1aSopenharmony_ci left[19] + left[20] + left[21] + left[22] + left[23] + left[24] + 385cabdff1aSopenharmony_ci left[25] + left[26] + left[27] + left[28] + left[29] + left[30] + 386cabdff1aSopenharmony_ci left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] + 387cabdff1aSopenharmony_ci top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] + 388cabdff1aSopenharmony_ci top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] + 389cabdff1aSopenharmony_ci top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] + 390cabdff1aSopenharmony_ci top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6); 391cabdff1aSopenharmony_ci int y; 392cabdff1aSopenharmony_ci 393cabdff1aSopenharmony_ci stride /= sizeof(pixel); 394cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 395cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 396cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 397cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, dc); 398cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, dc); 399cabdff1aSopenharmony_ci AV_WN4PA(dst + 16, dc); 400cabdff1aSopenharmony_ci AV_WN4PA(dst + 20, dc); 401cabdff1aSopenharmony_ci AV_WN4PA(dst + 24, dc); 402cabdff1aSopenharmony_ci AV_WN4PA(dst + 28, dc); 403cabdff1aSopenharmony_ci dst += stride; 404cabdff1aSopenharmony_ci } 405cabdff1aSopenharmony_ci} 406cabdff1aSopenharmony_ci 407cabdff1aSopenharmony_cistatic void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride, 408cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 409cabdff1aSopenharmony_ci{ 410cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 411cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 412cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] + 2) >> 2); 413cabdff1aSopenharmony_ci 414cabdff1aSopenharmony_ci stride /= sizeof(pixel); 415cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 0, dc); 416cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 1, dc); 417cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 2, dc); 418cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 3, dc); 419cabdff1aSopenharmony_ci} 420cabdff1aSopenharmony_ci 421cabdff1aSopenharmony_cistatic void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride, 422cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 423cabdff1aSopenharmony_ci{ 424cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 425cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 426cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 427cabdff1aSopenharmony_ci ((left[0] + left[1] + left[2] + left[3] + 428cabdff1aSopenharmony_ci left[4] + left[5] + left[6] + left[7] + 4) >> 3); 429cabdff1aSopenharmony_ci int y; 430cabdff1aSopenharmony_ci 431cabdff1aSopenharmony_ci stride /= sizeof(pixel); 432cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 433cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 434cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 435cabdff1aSopenharmony_ci dst += stride; 436cabdff1aSopenharmony_ci } 437cabdff1aSopenharmony_ci} 438cabdff1aSopenharmony_ci 439cabdff1aSopenharmony_cistatic void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride, 440cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 441cabdff1aSopenharmony_ci{ 442cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 443cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 444cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 445cabdff1aSopenharmony_ci ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + 446cabdff1aSopenharmony_ci left[6] + left[7] + left[8] + left[9] + left[10] + left[11] + 447cabdff1aSopenharmony_ci left[12] + left[13] + left[14] + left[15] + 8) >> 4); 448cabdff1aSopenharmony_ci int y; 449cabdff1aSopenharmony_ci 450cabdff1aSopenharmony_ci stride /= sizeof(pixel); 451cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 452cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 453cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 454cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, dc); 455cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, dc); 456cabdff1aSopenharmony_ci dst += stride; 457cabdff1aSopenharmony_ci } 458cabdff1aSopenharmony_ci} 459cabdff1aSopenharmony_ci 460cabdff1aSopenharmony_cistatic void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride, 461cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 462cabdff1aSopenharmony_ci{ 463cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 464cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 465cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 466cabdff1aSopenharmony_ci ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + 467cabdff1aSopenharmony_ci left[6] + left[7] + left[8] + left[9] + left[10] + left[11] + 468cabdff1aSopenharmony_ci left[12] + left[13] + left[14] + left[15] + left[16] + left[17] + 469cabdff1aSopenharmony_ci left[18] + left[19] + left[20] + left[21] + left[22] + left[23] + 470cabdff1aSopenharmony_ci left[24] + left[25] + left[26] + left[27] + left[28] + left[29] + 471cabdff1aSopenharmony_ci left[30] + left[31] + 16) >> 5); 472cabdff1aSopenharmony_ci int y; 473cabdff1aSopenharmony_ci 474cabdff1aSopenharmony_ci stride /= sizeof(pixel); 475cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 476cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 477cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 478cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, dc); 479cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, dc); 480cabdff1aSopenharmony_ci AV_WN4PA(dst + 16, dc); 481cabdff1aSopenharmony_ci AV_WN4PA(dst + 20, dc); 482cabdff1aSopenharmony_ci AV_WN4PA(dst + 24, dc); 483cabdff1aSopenharmony_ci AV_WN4PA(dst + 28, dc); 484cabdff1aSopenharmony_ci dst += stride; 485cabdff1aSopenharmony_ci } 486cabdff1aSopenharmony_ci} 487cabdff1aSopenharmony_ci 488cabdff1aSopenharmony_cistatic void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride, 489cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 490cabdff1aSopenharmony_ci{ 491cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 492cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 493cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4((top[0] + top[1] + top[2] + top[3] + 2) >> 2); 494cabdff1aSopenharmony_ci 495cabdff1aSopenharmony_ci stride /= sizeof(pixel); 496cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 0, dc); 497cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 1, dc); 498cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 2, dc); 499cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 3, dc); 500cabdff1aSopenharmony_ci} 501cabdff1aSopenharmony_ci 502cabdff1aSopenharmony_cistatic void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride, 503cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 504cabdff1aSopenharmony_ci{ 505cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 506cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 507cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 508cabdff1aSopenharmony_ci ((top[0] + top[1] + top[2] + top[3] + 509cabdff1aSopenharmony_ci top[4] + top[5] + top[6] + top[7] + 4) >> 3); 510cabdff1aSopenharmony_ci int y; 511cabdff1aSopenharmony_ci 512cabdff1aSopenharmony_ci stride /= sizeof(pixel); 513cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 514cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 515cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 516cabdff1aSopenharmony_ci dst += stride; 517cabdff1aSopenharmony_ci } 518cabdff1aSopenharmony_ci} 519cabdff1aSopenharmony_ci 520cabdff1aSopenharmony_cistatic void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride, 521cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 522cabdff1aSopenharmony_ci{ 523cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 524cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 525cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 526cabdff1aSopenharmony_ci ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] + 527cabdff1aSopenharmony_ci top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + 528cabdff1aSopenharmony_ci top[12] + top[13] + top[14] + top[15] + 8) >> 4); 529cabdff1aSopenharmony_ci int y; 530cabdff1aSopenharmony_ci 531cabdff1aSopenharmony_ci stride /= sizeof(pixel); 532cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 533cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 534cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 535cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, dc); 536cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, dc); 537cabdff1aSopenharmony_ci dst += stride; 538cabdff1aSopenharmony_ci } 539cabdff1aSopenharmony_ci} 540cabdff1aSopenharmony_ci 541cabdff1aSopenharmony_cistatic void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride, 542cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 543cabdff1aSopenharmony_ci{ 544cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 545cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 546cabdff1aSopenharmony_ci pixel4 dc = PIXEL_SPLAT_X4 547cabdff1aSopenharmony_ci ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] + 548cabdff1aSopenharmony_ci top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + 549cabdff1aSopenharmony_ci top[12] + top[13] + top[14] + top[15] + top[16] + top[17] + 550cabdff1aSopenharmony_ci top[18] + top[19] + top[20] + top[21] + top[22] + top[23] + 551cabdff1aSopenharmony_ci top[24] + top[25] + top[26] + top[27] + top[28] + top[29] + 552cabdff1aSopenharmony_ci top[30] + top[31] + 16) >> 5); 553cabdff1aSopenharmony_ci int y; 554cabdff1aSopenharmony_ci 555cabdff1aSopenharmony_ci stride /= sizeof(pixel); 556cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 557cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, dc); 558cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, dc); 559cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, dc); 560cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, dc); 561cabdff1aSopenharmony_ci AV_WN4PA(dst + 16, dc); 562cabdff1aSopenharmony_ci AV_WN4PA(dst + 20, dc); 563cabdff1aSopenharmony_ci AV_WN4PA(dst + 24, dc); 564cabdff1aSopenharmony_ci AV_WN4PA(dst + 28, dc); 565cabdff1aSopenharmony_ci dst += stride; 566cabdff1aSopenharmony_ci } 567cabdff1aSopenharmony_ci} 568cabdff1aSopenharmony_ci 569cabdff1aSopenharmony_ci#endif /* BIT_DEPTH != 12 */ 570cabdff1aSopenharmony_ci 571cabdff1aSopenharmony_cistatic void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride, 572cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 573cabdff1aSopenharmony_ci{ 574cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 575cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8)); 576cabdff1aSopenharmony_ci 577cabdff1aSopenharmony_ci stride /= sizeof(pixel); 578cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 0, val); 579cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 1, val); 580cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 2, val); 581cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 3, val); 582cabdff1aSopenharmony_ci} 583cabdff1aSopenharmony_ci 584cabdff1aSopenharmony_cistatic void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride, 585cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 586cabdff1aSopenharmony_ci{ 587cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 588cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8)); 589cabdff1aSopenharmony_ci int y; 590cabdff1aSopenharmony_ci 591cabdff1aSopenharmony_ci stride /= sizeof(pixel); 592cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 593cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 594cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 595cabdff1aSopenharmony_ci dst += stride; 596cabdff1aSopenharmony_ci } 597cabdff1aSopenharmony_ci} 598cabdff1aSopenharmony_ci 599cabdff1aSopenharmony_cistatic void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride, 600cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 601cabdff1aSopenharmony_ci{ 602cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 603cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8)); 604cabdff1aSopenharmony_ci int y; 605cabdff1aSopenharmony_ci 606cabdff1aSopenharmony_ci stride /= sizeof(pixel); 607cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 608cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 609cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 610cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, val); 611cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, val); 612cabdff1aSopenharmony_ci dst += stride; 613cabdff1aSopenharmony_ci } 614cabdff1aSopenharmony_ci} 615cabdff1aSopenharmony_ci 616cabdff1aSopenharmony_cistatic void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride, 617cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 618cabdff1aSopenharmony_ci{ 619cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 620cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8)); 621cabdff1aSopenharmony_ci int y; 622cabdff1aSopenharmony_ci 623cabdff1aSopenharmony_ci stride /= sizeof(pixel); 624cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 625cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 626cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 627cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, val); 628cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, val); 629cabdff1aSopenharmony_ci AV_WN4PA(dst + 16, val); 630cabdff1aSopenharmony_ci AV_WN4PA(dst + 20, val); 631cabdff1aSopenharmony_ci AV_WN4PA(dst + 24, val); 632cabdff1aSopenharmony_ci AV_WN4PA(dst + 28, val); 633cabdff1aSopenharmony_ci dst += stride; 634cabdff1aSopenharmony_ci } 635cabdff1aSopenharmony_ci} 636cabdff1aSopenharmony_ci 637cabdff1aSopenharmony_cistatic void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride, 638cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 639cabdff1aSopenharmony_ci{ 640cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 641cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1); 642cabdff1aSopenharmony_ci 643cabdff1aSopenharmony_ci stride /= sizeof(pixel); 644cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 0, val); 645cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 1, val); 646cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 2, val); 647cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 3, val);} 648cabdff1aSopenharmony_ci 649cabdff1aSopenharmony_cistatic void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride, 650cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 651cabdff1aSopenharmony_ci{ 652cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 653cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1); 654cabdff1aSopenharmony_ci int y; 655cabdff1aSopenharmony_ci 656cabdff1aSopenharmony_ci stride /= sizeof(pixel); 657cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 658cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 659cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 660cabdff1aSopenharmony_ci dst += stride; 661cabdff1aSopenharmony_ci } 662cabdff1aSopenharmony_ci} 663cabdff1aSopenharmony_ci 664cabdff1aSopenharmony_cistatic void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride, 665cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 666cabdff1aSopenharmony_ci{ 667cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 668cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1); 669cabdff1aSopenharmony_ci int y; 670cabdff1aSopenharmony_ci 671cabdff1aSopenharmony_ci stride /= sizeof(pixel); 672cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 673cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 674cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 675cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, val); 676cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, val); 677cabdff1aSopenharmony_ci dst += stride; 678cabdff1aSopenharmony_ci } 679cabdff1aSopenharmony_ci} 680cabdff1aSopenharmony_ci 681cabdff1aSopenharmony_cistatic void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride, 682cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 683cabdff1aSopenharmony_ci{ 684cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 685cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1); 686cabdff1aSopenharmony_ci int y; 687cabdff1aSopenharmony_ci 688cabdff1aSopenharmony_ci stride /= sizeof(pixel); 689cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 690cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 691cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 692cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, val); 693cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, val); 694cabdff1aSopenharmony_ci AV_WN4PA(dst + 16, val); 695cabdff1aSopenharmony_ci AV_WN4PA(dst + 20, val); 696cabdff1aSopenharmony_ci AV_WN4PA(dst + 24, val); 697cabdff1aSopenharmony_ci AV_WN4PA(dst + 28, val); 698cabdff1aSopenharmony_ci dst += stride; 699cabdff1aSopenharmony_ci } 700cabdff1aSopenharmony_ci} 701cabdff1aSopenharmony_ci 702cabdff1aSopenharmony_cistatic void dc_129_4x4_c(uint8_t *_dst, ptrdiff_t stride, 703cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 704cabdff1aSopenharmony_ci{ 705cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 706cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1); 707cabdff1aSopenharmony_ci 708cabdff1aSopenharmony_ci stride /= sizeof(pixel); 709cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 0, val); 710cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 1, val); 711cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 2, val); 712cabdff1aSopenharmony_ci AV_WN4PA(dst + stride * 3, val); 713cabdff1aSopenharmony_ci} 714cabdff1aSopenharmony_ci 715cabdff1aSopenharmony_cistatic void dc_129_8x8_c(uint8_t *_dst, ptrdiff_t stride, 716cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 717cabdff1aSopenharmony_ci{ 718cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 719cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1); 720cabdff1aSopenharmony_ci int y; 721cabdff1aSopenharmony_ci 722cabdff1aSopenharmony_ci stride /= sizeof(pixel); 723cabdff1aSopenharmony_ci for (y = 0; y < 8; y++) { 724cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 725cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 726cabdff1aSopenharmony_ci dst += stride; 727cabdff1aSopenharmony_ci } 728cabdff1aSopenharmony_ci} 729cabdff1aSopenharmony_ci 730cabdff1aSopenharmony_cistatic void dc_129_16x16_c(uint8_t *_dst, ptrdiff_t stride, 731cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 732cabdff1aSopenharmony_ci{ 733cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 734cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1); 735cabdff1aSopenharmony_ci int y; 736cabdff1aSopenharmony_ci 737cabdff1aSopenharmony_ci stride /= sizeof(pixel); 738cabdff1aSopenharmony_ci for (y = 0; y < 16; y++) { 739cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 740cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 741cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, val); 742cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, val); 743cabdff1aSopenharmony_ci dst += stride; 744cabdff1aSopenharmony_ci } 745cabdff1aSopenharmony_ci} 746cabdff1aSopenharmony_ci 747cabdff1aSopenharmony_cistatic void dc_129_32x32_c(uint8_t *_dst, ptrdiff_t stride, 748cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *top) 749cabdff1aSopenharmony_ci{ 750cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 751cabdff1aSopenharmony_ci pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1); 752cabdff1aSopenharmony_ci int y; 753cabdff1aSopenharmony_ci 754cabdff1aSopenharmony_ci stride /= sizeof(pixel); 755cabdff1aSopenharmony_ci for (y = 0; y < 32; y++) { 756cabdff1aSopenharmony_ci AV_WN4PA(dst + 0, val); 757cabdff1aSopenharmony_ci AV_WN4PA(dst + 4, val); 758cabdff1aSopenharmony_ci AV_WN4PA(dst + 8, val); 759cabdff1aSopenharmony_ci AV_WN4PA(dst + 12, val); 760cabdff1aSopenharmony_ci AV_WN4PA(dst + 16, val); 761cabdff1aSopenharmony_ci AV_WN4PA(dst + 20, val); 762cabdff1aSopenharmony_ci AV_WN4PA(dst + 24, val); 763cabdff1aSopenharmony_ci AV_WN4PA(dst + 28, val); 764cabdff1aSopenharmony_ci dst += stride; 765cabdff1aSopenharmony_ci } 766cabdff1aSopenharmony_ci} 767cabdff1aSopenharmony_ci 768cabdff1aSopenharmony_ci#if BIT_DEPTH != 12 769cabdff1aSopenharmony_ci 770cabdff1aSopenharmony_ci#if BIT_DEPTH == 8 771cabdff1aSopenharmony_ci#define memset_bpc memset 772cabdff1aSopenharmony_ci#else 773cabdff1aSopenharmony_cistatic inline void memset_bpc(uint16_t *dst, int val, int len) { 774cabdff1aSopenharmony_ci int n; 775cabdff1aSopenharmony_ci for (n = 0; n < len; n++) { 776cabdff1aSopenharmony_ci dst[n] = val; 777cabdff1aSopenharmony_ci } 778cabdff1aSopenharmony_ci} 779cabdff1aSopenharmony_ci#endif 780cabdff1aSopenharmony_ci 781cabdff1aSopenharmony_ci#define DST(x, y) dst[(x) + (y) * stride] 782cabdff1aSopenharmony_ci 783cabdff1aSopenharmony_cistatic void diag_downleft_4x4_c(uint8_t *_dst, ptrdiff_t stride, 784cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 785cabdff1aSopenharmony_ci{ 786cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 787cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 788cabdff1aSopenharmony_ci int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], 789cabdff1aSopenharmony_ci a4 = top[4], a5 = top[5], a6 = top[6], a7 = top[7]; 790cabdff1aSopenharmony_ci 791cabdff1aSopenharmony_ci stride /= sizeof(pixel); 792cabdff1aSopenharmony_ci DST(0,0) = (a0 + a1 * 2 + a2 + 2) >> 2; 793cabdff1aSopenharmony_ci DST(1,0) = DST(0,1) = (a1 + a2 * 2 + a3 + 2) >> 2; 794cabdff1aSopenharmony_ci DST(2,0) = DST(1,1) = DST(0,2) = (a2 + a3 * 2 + a4 + 2) >> 2; 795cabdff1aSopenharmony_ci DST(3,0) = DST(2,1) = DST(1,2) = DST(0,3) = (a3 + a4 * 2 + a5 + 2) >> 2; 796cabdff1aSopenharmony_ci DST(3,1) = DST(2,2) = DST(1,3) = (a4 + a5 * 2 + a6 + 2) >> 2; 797cabdff1aSopenharmony_ci DST(3,2) = DST(2,3) = (a5 + a6 * 2 + a7 + 2) >> 2; 798cabdff1aSopenharmony_ci DST(3,3) = a7; // note: this is different from vp8 and such 799cabdff1aSopenharmony_ci} 800cabdff1aSopenharmony_ci 801cabdff1aSopenharmony_ci#define def_diag_downleft(size) \ 802cabdff1aSopenharmony_cistatic void diag_downleft_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 803cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) \ 804cabdff1aSopenharmony_ci{ \ 805cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; \ 806cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; \ 807cabdff1aSopenharmony_ci int i, j; \ 808cabdff1aSopenharmony_ci pixel v[size - 1]; \ 809cabdff1aSopenharmony_ci\ 810cabdff1aSopenharmony_ci stride /= sizeof(pixel); \ 811cabdff1aSopenharmony_ci for (i = 0; i < size - 2; i++) \ 812cabdff1aSopenharmony_ci v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ 813cabdff1aSopenharmony_ci v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \ 814cabdff1aSopenharmony_ci\ 815cabdff1aSopenharmony_ci for (j = 0; j < size; j++) { \ 816cabdff1aSopenharmony_ci memcpy(dst + j*stride, v + j, (size - 1 - j) * sizeof(pixel)); \ 817cabdff1aSopenharmony_ci memset_bpc(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \ 818cabdff1aSopenharmony_ci } \ 819cabdff1aSopenharmony_ci} 820cabdff1aSopenharmony_ci 821cabdff1aSopenharmony_cidef_diag_downleft(8) 822cabdff1aSopenharmony_cidef_diag_downleft(16) 823cabdff1aSopenharmony_cidef_diag_downleft(32) 824cabdff1aSopenharmony_ci 825cabdff1aSopenharmony_cistatic void diag_downright_4x4_c(uint8_t *_dst, ptrdiff_t stride, 826cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 827cabdff1aSopenharmony_ci{ 828cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 829cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 830cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 831cabdff1aSopenharmony_ci int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], 832cabdff1aSopenharmony_ci l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0]; 833cabdff1aSopenharmony_ci 834cabdff1aSopenharmony_ci stride /= sizeof(pixel); 835cabdff1aSopenharmony_ci DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2; 836cabdff1aSopenharmony_ci DST(0,2) = DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2; 837cabdff1aSopenharmony_ci DST(0,1) = DST(1,2) = DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2; 838cabdff1aSopenharmony_ci DST(0,0) = DST(1,1) = DST(2,2) = DST(3,3) = (l0 + tl * 2 + a0 + 2) >> 2; 839cabdff1aSopenharmony_ci DST(1,0) = DST(2,1) = DST(3,2) = (tl + a0 * 2 + a1 + 2) >> 2; 840cabdff1aSopenharmony_ci DST(2,0) = DST(3,1) = (a0 + a1 * 2 + a2 + 2) >> 2; 841cabdff1aSopenharmony_ci DST(3,0) = (a1 + a2 * 2 + a3 + 2) >> 2; 842cabdff1aSopenharmony_ci} 843cabdff1aSopenharmony_ci 844cabdff1aSopenharmony_ci#define def_diag_downright(size) \ 845cabdff1aSopenharmony_cistatic void diag_downright_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 846cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) \ 847cabdff1aSopenharmony_ci{ \ 848cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; \ 849cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; \ 850cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; \ 851cabdff1aSopenharmony_ci int i, j; \ 852cabdff1aSopenharmony_ci pixel v[size + size - 1]; \ 853cabdff1aSopenharmony_ci\ 854cabdff1aSopenharmony_ci stride /= sizeof(pixel); \ 855cabdff1aSopenharmony_ci for (i = 0; i < size - 2; i++) { \ 856cabdff1aSopenharmony_ci v[i ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \ 857cabdff1aSopenharmony_ci v[size + 1 + i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ 858cabdff1aSopenharmony_ci } \ 859cabdff1aSopenharmony_ci v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \ 860cabdff1aSopenharmony_ci v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \ 861cabdff1aSopenharmony_ci v[size ] = (top[-1] + top[0] * 2 + top[ 1] + 2) >> 2; \ 862cabdff1aSopenharmony_ci\ 863cabdff1aSopenharmony_ci for (j = 0; j < size; j++) \ 864cabdff1aSopenharmony_ci memcpy(dst + j*stride, v + size - 1 - j, size * sizeof(pixel)); \ 865cabdff1aSopenharmony_ci} 866cabdff1aSopenharmony_ci 867cabdff1aSopenharmony_cidef_diag_downright(8) 868cabdff1aSopenharmony_cidef_diag_downright(16) 869cabdff1aSopenharmony_cidef_diag_downright(32) 870cabdff1aSopenharmony_ci 871cabdff1aSopenharmony_cistatic void vert_right_4x4_c(uint8_t *_dst, ptrdiff_t stride, 872cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 873cabdff1aSopenharmony_ci{ 874cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 875cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 876cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 877cabdff1aSopenharmony_ci int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], 878cabdff1aSopenharmony_ci l0 = left[3], l1 = left[2], l2 = left[1]; 879cabdff1aSopenharmony_ci 880cabdff1aSopenharmony_ci stride /= sizeof(pixel); 881cabdff1aSopenharmony_ci DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2; 882cabdff1aSopenharmony_ci DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2; 883cabdff1aSopenharmony_ci DST(0,0) = DST(1,2) = (tl + a0 + 1) >> 1; 884cabdff1aSopenharmony_ci DST(0,1) = DST(1,3) = (l0 + tl * 2 + a0 + 2) >> 2; 885cabdff1aSopenharmony_ci DST(1,0) = DST(2,2) = (a0 + a1 + 1) >> 1; 886cabdff1aSopenharmony_ci DST(1,1) = DST(2,3) = (tl + a0 * 2 + a1 + 2) >> 2; 887cabdff1aSopenharmony_ci DST(2,0) = DST(3,2) = (a1 + a2 + 1) >> 1; 888cabdff1aSopenharmony_ci DST(2,1) = DST(3,3) = (a0 + a1 * 2 + a2 + 2) >> 2; 889cabdff1aSopenharmony_ci DST(3,0) = (a2 + a3 + 1) >> 1; 890cabdff1aSopenharmony_ci DST(3,1) = (a1 + a2 * 2 + a3 + 2) >> 2; 891cabdff1aSopenharmony_ci} 892cabdff1aSopenharmony_ci 893cabdff1aSopenharmony_ci#define def_vert_right(size) \ 894cabdff1aSopenharmony_cistatic void vert_right_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 895cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) \ 896cabdff1aSopenharmony_ci{ \ 897cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; \ 898cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; \ 899cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; \ 900cabdff1aSopenharmony_ci int i, j; \ 901cabdff1aSopenharmony_ci pixel ve[size + size/2 - 1], vo[size + size/2 - 1]; \ 902cabdff1aSopenharmony_ci\ 903cabdff1aSopenharmony_ci stride /= sizeof(pixel); \ 904cabdff1aSopenharmony_ci for (i = 0; i < size/2 - 2; i++) { \ 905cabdff1aSopenharmony_ci vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \ 906cabdff1aSopenharmony_ci ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \ 907cabdff1aSopenharmony_ci } \ 908cabdff1aSopenharmony_ci vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \ 909cabdff1aSopenharmony_ci ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \ 910cabdff1aSopenharmony_ci\ 911cabdff1aSopenharmony_ci ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \ 912cabdff1aSopenharmony_ci vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \ 913cabdff1aSopenharmony_ci for (i = 0; i < size - 1; i++) { \ 914cabdff1aSopenharmony_ci ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \ 915cabdff1aSopenharmony_ci vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \ 916cabdff1aSopenharmony_ci } \ 917cabdff1aSopenharmony_ci\ 918cabdff1aSopenharmony_ci for (j = 0; j < size / 2; j++) { \ 919cabdff1aSopenharmony_ci memcpy(dst + j*2 *stride, ve + size/2 - 1 - j, size * sizeof(pixel)); \ 920cabdff1aSopenharmony_ci memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size * sizeof(pixel)); \ 921cabdff1aSopenharmony_ci } \ 922cabdff1aSopenharmony_ci} 923cabdff1aSopenharmony_ci 924cabdff1aSopenharmony_cidef_vert_right(8) 925cabdff1aSopenharmony_cidef_vert_right(16) 926cabdff1aSopenharmony_cidef_vert_right(32) 927cabdff1aSopenharmony_ci 928cabdff1aSopenharmony_cistatic void hor_down_4x4_c(uint8_t *_dst, ptrdiff_t stride, 929cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) 930cabdff1aSopenharmony_ci{ 931cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 932cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 933cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 934cabdff1aSopenharmony_ci int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0], 935cabdff1aSopenharmony_ci tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2]; 936cabdff1aSopenharmony_ci 937cabdff1aSopenharmony_ci stride /= sizeof(pixel); 938cabdff1aSopenharmony_ci DST(2,0) = (tl + a0 * 2 + a1 + 2) >> 2; 939cabdff1aSopenharmony_ci DST(3,0) = (a0 + a1 * 2 + a2 + 2) >> 2; 940cabdff1aSopenharmony_ci DST(0,0) = DST(2,1) = (tl + l0 + 1) >> 1; 941cabdff1aSopenharmony_ci DST(1,0) = DST(3,1) = (a0 + tl * 2 + l0 + 2) >> 2; 942cabdff1aSopenharmony_ci DST(0,1) = DST(2,2) = (l0 + l1 + 1) >> 1; 943cabdff1aSopenharmony_ci DST(1,1) = DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2; 944cabdff1aSopenharmony_ci DST(0,2) = DST(2,3) = (l1 + l2 + 1) >> 1; 945cabdff1aSopenharmony_ci DST(1,2) = DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2; 946cabdff1aSopenharmony_ci DST(0,3) = (l2 + l3 + 1) >> 1; 947cabdff1aSopenharmony_ci DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2; 948cabdff1aSopenharmony_ci} 949cabdff1aSopenharmony_ci 950cabdff1aSopenharmony_ci#define def_hor_down(size) \ 951cabdff1aSopenharmony_cistatic void hor_down_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 952cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *_top) \ 953cabdff1aSopenharmony_ci{ \ 954cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; \ 955cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; \ 956cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; \ 957cabdff1aSopenharmony_ci int i, j; \ 958cabdff1aSopenharmony_ci pixel v[size * 3 - 2]; \ 959cabdff1aSopenharmony_ci\ 960cabdff1aSopenharmony_ci stride /= sizeof(pixel); \ 961cabdff1aSopenharmony_ci for (i = 0; i < size - 2; i++) { \ 962cabdff1aSopenharmony_ci v[i*2 ] = (left[i + 1] + left[i + 0] + 1) >> 1; \ 963cabdff1aSopenharmony_ci v[i*2 + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \ 964cabdff1aSopenharmony_ci v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \ 965cabdff1aSopenharmony_ci } \ 966cabdff1aSopenharmony_ci v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \ 967cabdff1aSopenharmony_ci v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \ 968cabdff1aSopenharmony_ci v[size*2 - 1] = (top[0] + top[-1] * 2 + left[size - 1] + 2) >> 2; \ 969cabdff1aSopenharmony_ci v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \ 970cabdff1aSopenharmony_ci\ 971cabdff1aSopenharmony_ci for (j = 0; j < size; j++) \ 972cabdff1aSopenharmony_ci memcpy(dst + j*stride, v + size*2 - 2 - j*2, size * sizeof(pixel)); \ 973cabdff1aSopenharmony_ci} 974cabdff1aSopenharmony_ci 975cabdff1aSopenharmony_cidef_hor_down(8) 976cabdff1aSopenharmony_cidef_hor_down(16) 977cabdff1aSopenharmony_cidef_hor_down(32) 978cabdff1aSopenharmony_ci 979cabdff1aSopenharmony_cistatic void vert_left_4x4_c(uint8_t *_dst, ptrdiff_t stride, 980cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) 981cabdff1aSopenharmony_ci{ 982cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 983cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; 984cabdff1aSopenharmony_ci int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3], 985cabdff1aSopenharmony_ci a4 = top[4], a5 = top[5], a6 = top[6]; 986cabdff1aSopenharmony_ci 987cabdff1aSopenharmony_ci stride /= sizeof(pixel); 988cabdff1aSopenharmony_ci DST(0,0) = (a0 + a1 + 1) >> 1; 989cabdff1aSopenharmony_ci DST(0,1) = (a0 + a1 * 2 + a2 + 2) >> 2; 990cabdff1aSopenharmony_ci DST(1,0) = DST(0,2) = (a1 + a2 + 1) >> 1; 991cabdff1aSopenharmony_ci DST(1,1) = DST(0,3) = (a1 + a2 * 2 + a3 + 2) >> 2; 992cabdff1aSopenharmony_ci DST(2,0) = DST(1,2) = (a2 + a3 + 1) >> 1; 993cabdff1aSopenharmony_ci DST(2,1) = DST(1,3) = (a2 + a3 * 2 + a4 + 2) >> 2; 994cabdff1aSopenharmony_ci DST(3,0) = DST(2,2) = (a3 + a4 + 1) >> 1; 995cabdff1aSopenharmony_ci DST(3,1) = DST(2,3) = (a3 + a4 * 2 + a5 + 2) >> 2; 996cabdff1aSopenharmony_ci DST(3,2) = (a4 + a5 + 1) >> 1; 997cabdff1aSopenharmony_ci DST(3,3) = (a4 + a5 * 2 + a6 + 2) >> 2; 998cabdff1aSopenharmony_ci} 999cabdff1aSopenharmony_ci 1000cabdff1aSopenharmony_ci#define def_vert_left(size) \ 1001cabdff1aSopenharmony_cistatic void vert_left_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 1002cabdff1aSopenharmony_ci const uint8_t *left, const uint8_t *_top) \ 1003cabdff1aSopenharmony_ci{ \ 1004cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; \ 1005cabdff1aSopenharmony_ci const pixel *top = (const pixel *) _top; \ 1006cabdff1aSopenharmony_ci int i, j; \ 1007cabdff1aSopenharmony_ci pixel ve[size - 1], vo[size - 1]; \ 1008cabdff1aSopenharmony_ci\ 1009cabdff1aSopenharmony_ci stride /= sizeof(pixel); \ 1010cabdff1aSopenharmony_ci for (i = 0; i < size - 2; i++) { \ 1011cabdff1aSopenharmony_ci ve[i] = (top[i] + top[i + 1] + 1) >> 1; \ 1012cabdff1aSopenharmony_ci vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \ 1013cabdff1aSopenharmony_ci } \ 1014cabdff1aSopenharmony_ci ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \ 1015cabdff1aSopenharmony_ci vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \ 1016cabdff1aSopenharmony_ci\ 1017cabdff1aSopenharmony_ci for (j = 0; j < size / 2; j++) { \ 1018cabdff1aSopenharmony_ci memcpy(dst + j*2 * stride, ve + j, (size - j - 1) * sizeof(pixel)); \ 1019cabdff1aSopenharmony_ci memset_bpc(dst + j*2 * stride + size - j - 1, top[size - 1], j + 1); \ 1020cabdff1aSopenharmony_ci memcpy(dst + (j*2 + 1) * stride, vo + j, (size - j - 1) * sizeof(pixel)); \ 1021cabdff1aSopenharmony_ci memset_bpc(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \ 1022cabdff1aSopenharmony_ci } \ 1023cabdff1aSopenharmony_ci} 1024cabdff1aSopenharmony_ci 1025cabdff1aSopenharmony_cidef_vert_left(8) 1026cabdff1aSopenharmony_cidef_vert_left(16) 1027cabdff1aSopenharmony_cidef_vert_left(32) 1028cabdff1aSopenharmony_ci 1029cabdff1aSopenharmony_cistatic void hor_up_4x4_c(uint8_t *_dst, ptrdiff_t stride, 1030cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) 1031cabdff1aSopenharmony_ci{ 1032cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 1033cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; 1034cabdff1aSopenharmony_ci int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3]; 1035cabdff1aSopenharmony_ci 1036cabdff1aSopenharmony_ci stride /= sizeof(pixel); 1037cabdff1aSopenharmony_ci DST(0,0) = (l0 + l1 + 1) >> 1; 1038cabdff1aSopenharmony_ci DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2; 1039cabdff1aSopenharmony_ci DST(0,1) = DST(2,0) = (l1 + l2 + 1) >> 1; 1040cabdff1aSopenharmony_ci DST(1,1) = DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2; 1041cabdff1aSopenharmony_ci DST(0,2) = DST(2,1) = (l2 + l3 + 1) >> 1; 1042cabdff1aSopenharmony_ci DST(1,2) = DST(3,1) = (l2 + l3 * 3 + 2) >> 2; 1043cabdff1aSopenharmony_ci DST(0,3) = DST(1,3) = DST(2,2) = DST(2,3) = DST(3,2) = DST(3,3) = l3; 1044cabdff1aSopenharmony_ci} 1045cabdff1aSopenharmony_ci 1046cabdff1aSopenharmony_ci#define def_hor_up(size) \ 1047cabdff1aSopenharmony_cistatic void hor_up_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \ 1048cabdff1aSopenharmony_ci const uint8_t *_left, const uint8_t *top) \ 1049cabdff1aSopenharmony_ci{ \ 1050cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; \ 1051cabdff1aSopenharmony_ci const pixel *left = (const pixel *) _left; \ 1052cabdff1aSopenharmony_ci int i, j; \ 1053cabdff1aSopenharmony_ci pixel v[size*2 - 2]; \ 1054cabdff1aSopenharmony_ci\ 1055cabdff1aSopenharmony_ci stride /= sizeof(pixel); \ 1056cabdff1aSopenharmony_ci for (i = 0; i < size - 2; i++) { \ 1057cabdff1aSopenharmony_ci v[i*2 ] = (left[i] + left[i + 1] + 1) >> 1; \ 1058cabdff1aSopenharmony_ci v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \ 1059cabdff1aSopenharmony_ci } \ 1060cabdff1aSopenharmony_ci v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \ 1061cabdff1aSopenharmony_ci v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \ 1062cabdff1aSopenharmony_ci\ 1063cabdff1aSopenharmony_ci for (j = 0; j < size / 2; j++) \ 1064cabdff1aSopenharmony_ci memcpy(dst + j*stride, v + j*2, size * sizeof(pixel)); \ 1065cabdff1aSopenharmony_ci for (j = size / 2; j < size; j++) { \ 1066cabdff1aSopenharmony_ci memcpy(dst + j*stride, v + j*2, (size*2 - 2 - j*2) * sizeof(pixel)); \ 1067cabdff1aSopenharmony_ci memset_bpc(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \ 1068cabdff1aSopenharmony_ci 2 + j*2 - size); \ 1069cabdff1aSopenharmony_ci } \ 1070cabdff1aSopenharmony_ci} 1071cabdff1aSopenharmony_ci 1072cabdff1aSopenharmony_cidef_hor_up(8) 1073cabdff1aSopenharmony_cidef_hor_up(16) 1074cabdff1aSopenharmony_cidef_hor_up(32) 1075cabdff1aSopenharmony_ci 1076cabdff1aSopenharmony_ci#undef DST 1077cabdff1aSopenharmony_ci 1078cabdff1aSopenharmony_ci#endif /* BIT_DEPTH != 12 */ 1079cabdff1aSopenharmony_ci 1080cabdff1aSopenharmony_ci#if BIT_DEPTH != 8 1081cabdff1aSopenharmony_civoid ff_vp9dsp_intrapred_init_10(VP9DSPContext *dsp); 1082cabdff1aSopenharmony_ci#endif 1083cabdff1aSopenharmony_ci#if BIT_DEPTH != 10 1084cabdff1aSopenharmony_cistatic 1085cabdff1aSopenharmony_ci#endif 1086cabdff1aSopenharmony_ciav_cold void FUNC(ff_vp9dsp_intrapred_init)(VP9DSPContext *dsp) 1087cabdff1aSopenharmony_ci{ 1088cabdff1aSopenharmony_ci#define init_intra_pred_bd_aware(tx, sz) \ 1089cabdff1aSopenharmony_ci dsp->intra_pred[tx][TM_VP8_PRED] = tm_##sz##_c; \ 1090cabdff1aSopenharmony_ci dsp->intra_pred[tx][DC_128_PRED] = dc_128_##sz##_c; \ 1091cabdff1aSopenharmony_ci dsp->intra_pred[tx][DC_127_PRED] = dc_127_##sz##_c; \ 1092cabdff1aSopenharmony_ci dsp->intra_pred[tx][DC_129_PRED] = dc_129_##sz##_c 1093cabdff1aSopenharmony_ci 1094cabdff1aSopenharmony_ci#if BIT_DEPTH == 12 1095cabdff1aSopenharmony_ci ff_vp9dsp_intrapred_init_10(dsp); 1096cabdff1aSopenharmony_ci#define init_intra_pred(tx, sz) \ 1097cabdff1aSopenharmony_ci init_intra_pred_bd_aware(tx, sz) 1098cabdff1aSopenharmony_ci#else 1099cabdff1aSopenharmony_ci #define init_intra_pred(tx, sz) \ 1100cabdff1aSopenharmony_ci dsp->intra_pred[tx][VERT_PRED] = vert_##sz##_c; \ 1101cabdff1aSopenharmony_ci dsp->intra_pred[tx][HOR_PRED] = hor_##sz##_c; \ 1102cabdff1aSopenharmony_ci dsp->intra_pred[tx][DC_PRED] = dc_##sz##_c; \ 1103cabdff1aSopenharmony_ci dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED] = diag_downleft_##sz##_c; \ 1104cabdff1aSopenharmony_ci dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \ 1105cabdff1aSopenharmony_ci dsp->intra_pred[tx][VERT_RIGHT_PRED] = vert_right_##sz##_c; \ 1106cabdff1aSopenharmony_ci dsp->intra_pred[tx][HOR_DOWN_PRED] = hor_down_##sz##_c; \ 1107cabdff1aSopenharmony_ci dsp->intra_pred[tx][VERT_LEFT_PRED] = vert_left_##sz##_c; \ 1108cabdff1aSopenharmony_ci dsp->intra_pred[tx][HOR_UP_PRED] = hor_up_##sz##_c; \ 1109cabdff1aSopenharmony_ci dsp->intra_pred[tx][LEFT_DC_PRED] = dc_left_##sz##_c; \ 1110cabdff1aSopenharmony_ci dsp->intra_pred[tx][TOP_DC_PRED] = dc_top_##sz##_c; \ 1111cabdff1aSopenharmony_ci init_intra_pred_bd_aware(tx, sz) 1112cabdff1aSopenharmony_ci#endif 1113cabdff1aSopenharmony_ci 1114cabdff1aSopenharmony_ci init_intra_pred(TX_4X4, 4x4); 1115cabdff1aSopenharmony_ci init_intra_pred(TX_8X8, 8x8); 1116cabdff1aSopenharmony_ci init_intra_pred(TX_16X16, 16x16); 1117cabdff1aSopenharmony_ci init_intra_pred(TX_32X32, 32x32); 1118cabdff1aSopenharmony_ci 1119cabdff1aSopenharmony_ci#undef init_intra_pred 1120cabdff1aSopenharmony_ci#undef init_intra_pred_bd_aware 1121cabdff1aSopenharmony_ci} 1122cabdff1aSopenharmony_ci 1123cabdff1aSopenharmony_ci#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \ 1124cabdff1aSopenharmony_cistatic void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \ 1125cabdff1aSopenharmony_ci ptrdiff_t stride, \ 1126cabdff1aSopenharmony_ci int16_t *_block, int eob) \ 1127cabdff1aSopenharmony_ci{ \ 1128cabdff1aSopenharmony_ci int i, j; \ 1129cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; \ 1130cabdff1aSopenharmony_ci dctcoef *block = (dctcoef *) _block, tmp[sz * sz], out[sz]; \ 1131cabdff1aSopenharmony_ci\ 1132cabdff1aSopenharmony_ci stride /= sizeof(pixel); \ 1133cabdff1aSopenharmony_ci if (has_dconly && eob == 1) { \ 1134cabdff1aSopenharmony_ci const int t = ((((dctint) block[0] * 11585 + (1 << 13)) >> 14) \ 1135cabdff1aSopenharmony_ci * 11585 + (1 << 13)) >> 14; \ 1136cabdff1aSopenharmony_ci block[0] = 0; \ 1137cabdff1aSopenharmony_ci for (i = 0; i < sz; i++) { \ 1138cabdff1aSopenharmony_ci for (j = 0; j < sz; j++) \ 1139cabdff1aSopenharmony_ci dst[j * stride] = av_clip_pixel(dst[j * stride] + \ 1140cabdff1aSopenharmony_ci (bits ? \ 1141cabdff1aSopenharmony_ci (int)(t + (1U << (bits - 1))) >> bits : \ 1142cabdff1aSopenharmony_ci t)); \ 1143cabdff1aSopenharmony_ci dst++; \ 1144cabdff1aSopenharmony_ci } \ 1145cabdff1aSopenharmony_ci return; \ 1146cabdff1aSopenharmony_ci } \ 1147cabdff1aSopenharmony_ci\ 1148cabdff1aSopenharmony_ci for (i = 0; i < sz; i++) \ 1149cabdff1aSopenharmony_ci type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \ 1150cabdff1aSopenharmony_ci memset(block, 0, sz * sz * sizeof(*block)); \ 1151cabdff1aSopenharmony_ci for (i = 0; i < sz; i++) { \ 1152cabdff1aSopenharmony_ci type_b##sz##_1d(tmp + i, sz, out, 1); \ 1153cabdff1aSopenharmony_ci for (j = 0; j < sz; j++) \ 1154cabdff1aSopenharmony_ci dst[j * stride] = av_clip_pixel(dst[j * stride] + \ 1155cabdff1aSopenharmony_ci (bits ? \ 1156cabdff1aSopenharmony_ci (int)(out[j] + (1U << (bits - 1))) >> bits : \ 1157cabdff1aSopenharmony_ci out[j])); \ 1158cabdff1aSopenharmony_ci dst++; \ 1159cabdff1aSopenharmony_ci } \ 1160cabdff1aSopenharmony_ci} 1161cabdff1aSopenharmony_ci 1162cabdff1aSopenharmony_ci#define itxfm_wrap(sz, bits) \ 1163cabdff1aSopenharmony_ciitxfm_wrapper(idct, idct, sz, bits, 1) \ 1164cabdff1aSopenharmony_ciitxfm_wrapper(iadst, idct, sz, bits, 0) \ 1165cabdff1aSopenharmony_ciitxfm_wrapper(idct, iadst, sz, bits, 0) \ 1166cabdff1aSopenharmony_ciitxfm_wrapper(iadst, iadst, sz, bits, 0) 1167cabdff1aSopenharmony_ci 1168cabdff1aSopenharmony_ci#define IN(x) ((dctint) in[(x) * stride]) 1169cabdff1aSopenharmony_ci 1170cabdff1aSopenharmony_cistatic av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride, 1171cabdff1aSopenharmony_ci dctcoef *out, int pass) 1172cabdff1aSopenharmony_ci{ 1173cabdff1aSopenharmony_ci dctint t0, t1, t2, t3; 1174cabdff1aSopenharmony_ci 1175cabdff1aSopenharmony_ci t0 = ((IN(0) + IN(2)) * 11585 + (1 << 13)) >> 14; 1176cabdff1aSopenharmony_ci t1 = ((IN(0) - IN(2)) * 11585 + (1 << 13)) >> 14; 1177cabdff1aSopenharmony_ci t2 = (IN(1) * 6270 - IN(3) * 15137 + (1 << 13)) >> 14; 1178cabdff1aSopenharmony_ci t3 = (IN(1) * 15137 + IN(3) * 6270 + (1 << 13)) >> 14; 1179cabdff1aSopenharmony_ci 1180cabdff1aSopenharmony_ci out[0] = t0 + t3; 1181cabdff1aSopenharmony_ci out[1] = t1 + t2; 1182cabdff1aSopenharmony_ci out[2] = t1 - t2; 1183cabdff1aSopenharmony_ci out[3] = t0 - t3; 1184cabdff1aSopenharmony_ci} 1185cabdff1aSopenharmony_ci 1186cabdff1aSopenharmony_cistatic av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride, 1187cabdff1aSopenharmony_ci dctcoef *out, int pass) 1188cabdff1aSopenharmony_ci{ 1189cabdff1aSopenharmony_ci dctint t0, t1, t2, t3; 1190cabdff1aSopenharmony_ci 1191cabdff1aSopenharmony_ci t0 = 5283 * IN(0) + 15212 * IN(2) + 9929 * IN(3); 1192cabdff1aSopenharmony_ci t1 = 9929 * IN(0) - 5283 * IN(2) - 15212 * IN(3); 1193cabdff1aSopenharmony_ci t2 = 13377 * (IN(0) - IN(2) + IN(3)); 1194cabdff1aSopenharmony_ci t3 = 13377 * IN(1); 1195cabdff1aSopenharmony_ci 1196cabdff1aSopenharmony_ci out[0] = (t0 + t3 + (1 << 13)) >> 14; 1197cabdff1aSopenharmony_ci out[1] = (t1 + t3 + (1 << 13)) >> 14; 1198cabdff1aSopenharmony_ci out[2] = (t2 + (1 << 13)) >> 14; 1199cabdff1aSopenharmony_ci out[3] = (t0 + t1 - t3 + (1 << 13)) >> 14; 1200cabdff1aSopenharmony_ci} 1201cabdff1aSopenharmony_ci 1202cabdff1aSopenharmony_ciitxfm_wrap(4, 4) 1203cabdff1aSopenharmony_ci 1204cabdff1aSopenharmony_cistatic av_always_inline void idct8_1d(const dctcoef *in, ptrdiff_t stride, 1205cabdff1aSopenharmony_ci dctcoef *out, int pass) 1206cabdff1aSopenharmony_ci{ 1207cabdff1aSopenharmony_ci dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a; 1208cabdff1aSopenharmony_ci 1209cabdff1aSopenharmony_ci t0a = ((IN(0) + IN(4)) * 11585 + (1 << 13)) >> 14; 1210cabdff1aSopenharmony_ci t1a = ((IN(0) - IN(4)) * 11585 + (1 << 13)) >> 14; 1211cabdff1aSopenharmony_ci t2a = (IN(2) * 6270 - IN(6) * 15137 + (1 << 13)) >> 14; 1212cabdff1aSopenharmony_ci t3a = (IN(2) * 15137 + IN(6) * 6270 + (1 << 13)) >> 14; 1213cabdff1aSopenharmony_ci t4a = (IN(1) * 3196 - IN(7) * 16069 + (1 << 13)) >> 14; 1214cabdff1aSopenharmony_ci t5a = (IN(5) * 13623 - IN(3) * 9102 + (1 << 13)) >> 14; 1215cabdff1aSopenharmony_ci t6a = (IN(5) * 9102 + IN(3) * 13623 + (1 << 13)) >> 14; 1216cabdff1aSopenharmony_ci t7a = (IN(1) * 16069 + IN(7) * 3196 + (1 << 13)) >> 14; 1217cabdff1aSopenharmony_ci 1218cabdff1aSopenharmony_ci t0 = t0a + t3a; 1219cabdff1aSopenharmony_ci t1 = t1a + t2a; 1220cabdff1aSopenharmony_ci t2 = t1a - t2a; 1221cabdff1aSopenharmony_ci t3 = t0a - t3a; 1222cabdff1aSopenharmony_ci t4 = t4a + t5a; 1223cabdff1aSopenharmony_ci t5a = t4a - t5a; 1224cabdff1aSopenharmony_ci t7 = t7a + t6a; 1225cabdff1aSopenharmony_ci t6a = t7a - t6a; 1226cabdff1aSopenharmony_ci 1227cabdff1aSopenharmony_ci t5 = ((t6a - t5a) * 11585 + (1 << 13)) >> 14; 1228cabdff1aSopenharmony_ci t6 = ((t6a + t5a) * 11585 + (1 << 13)) >> 14; 1229cabdff1aSopenharmony_ci 1230cabdff1aSopenharmony_ci out[0] = t0 + t7; 1231cabdff1aSopenharmony_ci out[1] = t1 + t6; 1232cabdff1aSopenharmony_ci out[2] = t2 + t5; 1233cabdff1aSopenharmony_ci out[3] = t3 + t4; 1234cabdff1aSopenharmony_ci out[4] = t3 - t4; 1235cabdff1aSopenharmony_ci out[5] = t2 - t5; 1236cabdff1aSopenharmony_ci out[6] = t1 - t6; 1237cabdff1aSopenharmony_ci out[7] = t0 - t7; 1238cabdff1aSopenharmony_ci} 1239cabdff1aSopenharmony_ci 1240cabdff1aSopenharmony_cistatic av_always_inline void iadst8_1d(const dctcoef *in, ptrdiff_t stride, 1241cabdff1aSopenharmony_ci dctcoef *out, int pass) 1242cabdff1aSopenharmony_ci{ 1243cabdff1aSopenharmony_ci dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a; 1244cabdff1aSopenharmony_ci 1245cabdff1aSopenharmony_ci t0a = 16305 * IN(7) + 1606 * IN(0); 1246cabdff1aSopenharmony_ci t1a = 1606 * IN(7) - 16305 * IN(0); 1247cabdff1aSopenharmony_ci t2a = 14449 * IN(5) + 7723 * IN(2); 1248cabdff1aSopenharmony_ci t3a = 7723 * IN(5) - 14449 * IN(2); 1249cabdff1aSopenharmony_ci t4a = 10394 * IN(3) + 12665 * IN(4); 1250cabdff1aSopenharmony_ci t5a = 12665 * IN(3) - 10394 * IN(4); 1251cabdff1aSopenharmony_ci t6a = 4756 * IN(1) + 15679 * IN(6); 1252cabdff1aSopenharmony_ci t7a = 15679 * IN(1) - 4756 * IN(6); 1253cabdff1aSopenharmony_ci 1254cabdff1aSopenharmony_ci t0 = (t0a + t4a + (1 << 13)) >> 14; 1255cabdff1aSopenharmony_ci t1 = (t1a + t5a + (1 << 13)) >> 14; 1256cabdff1aSopenharmony_ci t2 = (t2a + t6a + (1 << 13)) >> 14; 1257cabdff1aSopenharmony_ci t3 = (t3a + t7a + (1 << 13)) >> 14; 1258cabdff1aSopenharmony_ci t4 = (t0a - t4a + (1 << 13)) >> 14; 1259cabdff1aSopenharmony_ci t5 = (t1a - t5a + (1 << 13)) >> 14; 1260cabdff1aSopenharmony_ci t6 = (t2a - t6a + (1 << 13)) >> 14; 1261cabdff1aSopenharmony_ci t7 = (t3a - t7a + (1 << 13)) >> 14; 1262cabdff1aSopenharmony_ci 1263cabdff1aSopenharmony_ci t4a = 15137U * t4 + 6270U * t5; 1264cabdff1aSopenharmony_ci t5a = 6270U * t4 - 15137U * t5; 1265cabdff1aSopenharmony_ci t6a = 15137U * t7 - 6270U * t6; 1266cabdff1aSopenharmony_ci t7a = 6270U * t7 + 15137U * t6; 1267cabdff1aSopenharmony_ci 1268cabdff1aSopenharmony_ci out[0] = t0 + t2; 1269cabdff1aSopenharmony_ci out[7] = -(t1 + t3); 1270cabdff1aSopenharmony_ci t2 = t0 - t2; 1271cabdff1aSopenharmony_ci t3 = t1 - t3; 1272cabdff1aSopenharmony_ci 1273cabdff1aSopenharmony_ci out[1] = -((dctint)((1U << 13) + t4a + t6a) >> 14); 1274cabdff1aSopenharmony_ci out[6] = (dctint)((1U << 13) + t5a + t7a) >> 14; 1275cabdff1aSopenharmony_ci t6 = (dctint)((1U << 13) + t4a - t6a) >> 14; 1276cabdff1aSopenharmony_ci t7 = (dctint)((1U << 13) + t5a - t7a) >> 14; 1277cabdff1aSopenharmony_ci 1278cabdff1aSopenharmony_ci out[3] = -((dctint)((t2 + t3) * 11585U + (1 << 13)) >> 14); 1279cabdff1aSopenharmony_ci out[4] = (dctint)((t2 - t3) * 11585U + (1 << 13)) >> 14; 1280cabdff1aSopenharmony_ci out[2] = (dctint)((t6 + t7) * 11585U + (1 << 13)) >> 14; 1281cabdff1aSopenharmony_ci out[5] = -((dctint)((t6 - t7) * 11585U + (1 << 13)) >> 14); 1282cabdff1aSopenharmony_ci} 1283cabdff1aSopenharmony_ci 1284cabdff1aSopenharmony_ciitxfm_wrap(8, 5) 1285cabdff1aSopenharmony_ci 1286cabdff1aSopenharmony_cistatic av_always_inline void idct16_1d(const dctcoef *in, ptrdiff_t stride, 1287cabdff1aSopenharmony_ci dctcoef *out, int pass) 1288cabdff1aSopenharmony_ci{ 1289cabdff1aSopenharmony_ci dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15; 1290cabdff1aSopenharmony_ci dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a; 1291cabdff1aSopenharmony_ci dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a; 1292cabdff1aSopenharmony_ci 1293cabdff1aSopenharmony_ci t0a = (dctint)((IN(0) + IN(8)) * 11585U + (1 << 13)) >> 14; 1294cabdff1aSopenharmony_ci t1a = (dctint)((IN(0) - IN(8)) * 11585U + (1 << 13)) >> 14; 1295cabdff1aSopenharmony_ci t2a = (dctint)(IN(4) * 6270U - IN(12) * 15137U + (1 << 13)) >> 14; 1296cabdff1aSopenharmony_ci t3a = (dctint)(IN(4) * 15137U + IN(12) * 6270U + (1 << 13)) >> 14; 1297cabdff1aSopenharmony_ci t4a = (dctint)(IN(2) * 3196U - IN(14) * 16069U + (1 << 13)) >> 14; 1298cabdff1aSopenharmony_ci t7a = (dctint)(IN(2) * 16069U + IN(14) * 3196U + (1 << 13)) >> 14; 1299cabdff1aSopenharmony_ci t5a = (dctint)(IN(10) * 13623U - IN(6) * 9102U + (1 << 13)) >> 14; 1300cabdff1aSopenharmony_ci t6a = (dctint)(IN(10) * 9102U + IN(6) * 13623U + (1 << 13)) >> 14; 1301cabdff1aSopenharmony_ci t8a = (dctint)(IN(1) * 1606U - IN(15) * 16305U + (1 << 13)) >> 14; 1302cabdff1aSopenharmony_ci t15a = (dctint)(IN(1) * 16305U + IN(15) * 1606U + (1 << 13)) >> 14; 1303cabdff1aSopenharmony_ci t9a = (dctint)(IN(9) * 12665U - IN(7) * 10394U + (1 << 13)) >> 14; 1304cabdff1aSopenharmony_ci t14a = (dctint)(IN(9) * 10394U + IN(7) * 12665U + (1 << 13)) >> 14; 1305cabdff1aSopenharmony_ci t10a = (dctint)(IN(5) * 7723U - IN(11) * 14449U + (1 << 13)) >> 14; 1306cabdff1aSopenharmony_ci t13a = (dctint)(IN(5) * 14449U + IN(11) * 7723U + (1 << 13)) >> 14; 1307cabdff1aSopenharmony_ci t11a = (dctint)(IN(13) * 15679U - IN(3) * 4756U + (1 << 13)) >> 14; 1308cabdff1aSopenharmony_ci t12a = (dctint)(IN(13) * 4756U + IN(3) * 15679U + (1 << 13)) >> 14; 1309cabdff1aSopenharmony_ci 1310cabdff1aSopenharmony_ci t0 = t0a + t3a; 1311cabdff1aSopenharmony_ci t1 = t1a + t2a; 1312cabdff1aSopenharmony_ci t2 = t1a - t2a; 1313cabdff1aSopenharmony_ci t3 = t0a - t3a; 1314cabdff1aSopenharmony_ci t4 = t4a + t5a; 1315cabdff1aSopenharmony_ci t5 = t4a - t5a; 1316cabdff1aSopenharmony_ci t6 = t7a - t6a; 1317cabdff1aSopenharmony_ci t7 = t7a + t6a; 1318cabdff1aSopenharmony_ci t8 = t8a + t9a; 1319cabdff1aSopenharmony_ci t9 = t8a - t9a; 1320cabdff1aSopenharmony_ci t10 = t11a - t10a; 1321cabdff1aSopenharmony_ci t11 = t11a + t10a; 1322cabdff1aSopenharmony_ci t12 = t12a + t13a; 1323cabdff1aSopenharmony_ci t13 = t12a - t13a; 1324cabdff1aSopenharmony_ci t14 = t15a - t14a; 1325cabdff1aSopenharmony_ci t15 = t15a + t14a; 1326cabdff1aSopenharmony_ci 1327cabdff1aSopenharmony_ci t5a = (dctint)((t6 - t5) * 11585U + (1 << 13)) >> 14; 1328cabdff1aSopenharmony_ci t6a = (dctint)((t6 + t5) * 11585U + (1 << 13)) >> 14; 1329cabdff1aSopenharmony_ci t9a = (dctint)( t14 * 6270U - t9 * 15137U + (1 << 13)) >> 14; 1330cabdff1aSopenharmony_ci t14a = (dctint)( t14 * 15137U + t9 * 6270U + (1 << 13)) >> 14; 1331cabdff1aSopenharmony_ci t10a = (dctint)(-(t13 * 15137U + t10 * 6270U) + (1 << 13)) >> 14; 1332cabdff1aSopenharmony_ci t13a = (dctint)( t13 * 6270U - t10 * 15137U + (1 << 13)) >> 14; 1333cabdff1aSopenharmony_ci 1334cabdff1aSopenharmony_ci t0a = t0 + t7; 1335cabdff1aSopenharmony_ci t1a = t1 + t6a; 1336cabdff1aSopenharmony_ci t2a = t2 + t5a; 1337cabdff1aSopenharmony_ci t3a = t3 + t4; 1338cabdff1aSopenharmony_ci t4 = t3 - t4; 1339cabdff1aSopenharmony_ci t5 = t2 - t5a; 1340cabdff1aSopenharmony_ci t6 = t1 - t6a; 1341cabdff1aSopenharmony_ci t7 = t0 - t7; 1342cabdff1aSopenharmony_ci t8a = t8 + t11; 1343cabdff1aSopenharmony_ci t9 = t9a + t10a; 1344cabdff1aSopenharmony_ci t10 = t9a - t10a; 1345cabdff1aSopenharmony_ci t11a = t8 - t11; 1346cabdff1aSopenharmony_ci t12a = t15 - t12; 1347cabdff1aSopenharmony_ci t13 = t14a - t13a; 1348cabdff1aSopenharmony_ci t14 = t14a + t13a; 1349cabdff1aSopenharmony_ci t15a = t15 + t12; 1350cabdff1aSopenharmony_ci 1351cabdff1aSopenharmony_ci t10a = (dctint)((t13 - t10) * 11585U + (1 << 13)) >> 14; 1352cabdff1aSopenharmony_ci t13a = (dctint)((t13 + t10) * 11585U + (1 << 13)) >> 14; 1353cabdff1aSopenharmony_ci t11 = (dctint)((t12a - t11a) * 11585U + (1 << 13)) >> 14; 1354cabdff1aSopenharmony_ci t12 = (dctint)((t12a + t11a) * 11585U + (1 << 13)) >> 14; 1355cabdff1aSopenharmony_ci 1356cabdff1aSopenharmony_ci out[ 0] = t0a + t15a; 1357cabdff1aSopenharmony_ci out[ 1] = t1a + t14; 1358cabdff1aSopenharmony_ci out[ 2] = t2a + t13a; 1359cabdff1aSopenharmony_ci out[ 3] = t3a + t12; 1360cabdff1aSopenharmony_ci out[ 4] = t4 + t11; 1361cabdff1aSopenharmony_ci out[ 5] = t5 + t10a; 1362cabdff1aSopenharmony_ci out[ 6] = t6 + t9; 1363cabdff1aSopenharmony_ci out[ 7] = t7 + t8a; 1364cabdff1aSopenharmony_ci out[ 8] = t7 - t8a; 1365cabdff1aSopenharmony_ci out[ 9] = t6 - t9; 1366cabdff1aSopenharmony_ci out[10] = t5 - t10a; 1367cabdff1aSopenharmony_ci out[11] = t4 - t11; 1368cabdff1aSopenharmony_ci out[12] = t3a - t12; 1369cabdff1aSopenharmony_ci out[13] = t2a - t13a; 1370cabdff1aSopenharmony_ci out[14] = t1a - t14; 1371cabdff1aSopenharmony_ci out[15] = t0a - t15a; 1372cabdff1aSopenharmony_ci} 1373cabdff1aSopenharmony_ci 1374cabdff1aSopenharmony_cistatic av_always_inline void iadst16_1d(const dctcoef *in, ptrdiff_t stride, 1375cabdff1aSopenharmony_ci dctcoef *out, int pass) 1376cabdff1aSopenharmony_ci{ 1377cabdff1aSopenharmony_ci dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15; 1378cabdff1aSopenharmony_ci dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a; 1379cabdff1aSopenharmony_ci dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a; 1380cabdff1aSopenharmony_ci 1381cabdff1aSopenharmony_ci t0 = IN(15) * 16364U + IN(0) * 804U; 1382cabdff1aSopenharmony_ci t1 = IN(15) * 804U - IN(0) * 16364U; 1383cabdff1aSopenharmony_ci t2 = IN(13) * 15893U + IN(2) * 3981U; 1384cabdff1aSopenharmony_ci t3 = IN(13) * 3981U - IN(2) * 15893U; 1385cabdff1aSopenharmony_ci t4 = IN(11) * 14811U + IN(4) * 7005U; 1386cabdff1aSopenharmony_ci t5 = IN(11) * 7005U - IN(4) * 14811U; 1387cabdff1aSopenharmony_ci t6 = IN(9) * 13160U + IN(6) * 9760U; 1388cabdff1aSopenharmony_ci t7 = IN(9) * 9760U - IN(6) * 13160U; 1389cabdff1aSopenharmony_ci t8 = IN(7) * 11003U + IN(8) * 12140U; 1390cabdff1aSopenharmony_ci t9 = IN(7) * 12140U - IN(8) * 11003U; 1391cabdff1aSopenharmony_ci t10 = IN(5) * 8423U + IN(10) * 14053U; 1392cabdff1aSopenharmony_ci t11 = IN(5) * 14053U - IN(10) * 8423U; 1393cabdff1aSopenharmony_ci t12 = IN(3) * 5520U + IN(12) * 15426U; 1394cabdff1aSopenharmony_ci t13 = IN(3) * 15426U - IN(12) * 5520U; 1395cabdff1aSopenharmony_ci t14 = IN(1) * 2404U + IN(14) * 16207U; 1396cabdff1aSopenharmony_ci t15 = IN(1) * 16207U - IN(14) * 2404U; 1397cabdff1aSopenharmony_ci 1398cabdff1aSopenharmony_ci t0a = (dctint)((1U << 13) + t0 + t8 ) >> 14; 1399cabdff1aSopenharmony_ci t1a = (dctint)((1U << 13) + t1 + t9 ) >> 14; 1400cabdff1aSopenharmony_ci t2a = (dctint)((1U << 13) + t2 + t10) >> 14; 1401cabdff1aSopenharmony_ci t3a = (dctint)((1U << 13) + t3 + t11) >> 14; 1402cabdff1aSopenharmony_ci t4a = (dctint)((1U << 13) + t4 + t12) >> 14; 1403cabdff1aSopenharmony_ci t5a = (dctint)((1U << 13) + t5 + t13) >> 14; 1404cabdff1aSopenharmony_ci t6a = (dctint)((1U << 13) + t6 + t14) >> 14; 1405cabdff1aSopenharmony_ci t7a = (dctint)((1U << 13) + t7 + t15) >> 14; 1406cabdff1aSopenharmony_ci t8a = (dctint)((1U << 13) + t0 - t8 ) >> 14; 1407cabdff1aSopenharmony_ci t9a = (dctint)((1U << 13) + t1 - t9 ) >> 14; 1408cabdff1aSopenharmony_ci t10a = (dctint)((1U << 13) + t2 - t10) >> 14; 1409cabdff1aSopenharmony_ci t11a = (dctint)((1U << 13) + t3 - t11) >> 14; 1410cabdff1aSopenharmony_ci t12a = (dctint)((1U << 13) + t4 - t12) >> 14; 1411cabdff1aSopenharmony_ci t13a = (dctint)((1U << 13) + t5 - t13) >> 14; 1412cabdff1aSopenharmony_ci t14a = (dctint)((1U << 13) + t6 - t14) >> 14; 1413cabdff1aSopenharmony_ci t15a = (dctint)((1U << 13) + t7 - t15) >> 14; 1414cabdff1aSopenharmony_ci 1415cabdff1aSopenharmony_ci t8 = t8a * 16069U + t9a * 3196U; 1416cabdff1aSopenharmony_ci t9 = t8a * 3196U - t9a * 16069U; 1417cabdff1aSopenharmony_ci t10 = t10a * 9102U + t11a * 13623U; 1418cabdff1aSopenharmony_ci t11 = t10a * 13623U - t11a * 9102U; 1419cabdff1aSopenharmony_ci t12 = t13a * 16069U - t12a * 3196U; 1420cabdff1aSopenharmony_ci t13 = t13a * 3196U + t12a * 16069U; 1421cabdff1aSopenharmony_ci t14 = t15a * 9102U - t14a * 13623U; 1422cabdff1aSopenharmony_ci t15 = t15a * 13623U + t14a * 9102U; 1423cabdff1aSopenharmony_ci 1424cabdff1aSopenharmony_ci t0 = t0a + t4a; 1425cabdff1aSopenharmony_ci t1 = t1a + t5a; 1426cabdff1aSopenharmony_ci t2 = t2a + t6a; 1427cabdff1aSopenharmony_ci t3 = t3a + t7a; 1428cabdff1aSopenharmony_ci t4 = t0a - t4a; 1429cabdff1aSopenharmony_ci t5 = t1a - t5a; 1430cabdff1aSopenharmony_ci t6 = t2a - t6a; 1431cabdff1aSopenharmony_ci t7 = t3a - t7a; 1432cabdff1aSopenharmony_ci t8a = (dctint)((1U << 13) + t8 + t12) >> 14; 1433cabdff1aSopenharmony_ci t9a = (dctint)((1U << 13) + t9 + t13) >> 14; 1434cabdff1aSopenharmony_ci t10a = (dctint)((1U << 13) + t10 + t14) >> 14; 1435cabdff1aSopenharmony_ci t11a = (dctint)((1U << 13) + t11 + t15) >> 14; 1436cabdff1aSopenharmony_ci t12a = (dctint)((1U << 13) + t8 - t12) >> 14; 1437cabdff1aSopenharmony_ci t13a = (dctint)((1U << 13) + t9 - t13) >> 14; 1438cabdff1aSopenharmony_ci t14a = (dctint)((1U << 13) + t10 - t14) >> 14; 1439cabdff1aSopenharmony_ci t15a = (dctint)((1U << 13) + t11 - t15) >> 14; 1440cabdff1aSopenharmony_ci 1441cabdff1aSopenharmony_ci t4a = t4 * 15137U + t5 * 6270U; 1442cabdff1aSopenharmony_ci t5a = t4 * 6270U - t5 * 15137U; 1443cabdff1aSopenharmony_ci t6a = t7 * 15137U - t6 * 6270U; 1444cabdff1aSopenharmony_ci t7a = t7 * 6270U + t6 * 15137U; 1445cabdff1aSopenharmony_ci t12 = t12a * 15137U + t13a * 6270U; 1446cabdff1aSopenharmony_ci t13 = t12a * 6270U - t13a * 15137U; 1447cabdff1aSopenharmony_ci t14 = t15a * 15137U - t14a * 6270U; 1448cabdff1aSopenharmony_ci t15 = t15a * 6270U + t14a * 15137U; 1449cabdff1aSopenharmony_ci 1450cabdff1aSopenharmony_ci out[ 0] = t0 + t2; 1451cabdff1aSopenharmony_ci out[15] = -(t1 + t3); 1452cabdff1aSopenharmony_ci t2a = t0 - t2; 1453cabdff1aSopenharmony_ci t3a = t1 - t3; 1454cabdff1aSopenharmony_ci out[ 3] = -((dctint)((1U << 13) + t4a + t6a) >> 14); 1455cabdff1aSopenharmony_ci out[12] = (dctint)((1U << 13) + t5a + t7a) >> 14; 1456cabdff1aSopenharmony_ci t6 = (dctint)((1U << 13) + t4a - t6a) >> 14; 1457cabdff1aSopenharmony_ci t7 = (dctint)((1U << 13) + t5a - t7a) >> 14; 1458cabdff1aSopenharmony_ci out[ 1] = -(t8a + t10a); 1459cabdff1aSopenharmony_ci out[14] = t9a + t11a; 1460cabdff1aSopenharmony_ci t10 = t8a - t10a; 1461cabdff1aSopenharmony_ci t11 = t9a - t11a; 1462cabdff1aSopenharmony_ci out[ 2] = (dctint)((1U << 13) + t12 + t14) >> 14; 1463cabdff1aSopenharmony_ci out[13] = -((dctint)((1U << 13) + t13 + t15) >> 14); 1464cabdff1aSopenharmony_ci t14a = (dctint)((1U << 13) + t12 - t14) >> 14; 1465cabdff1aSopenharmony_ci t15a = (dctint)((1U << 13) + t13 - t15) >> 14; 1466cabdff1aSopenharmony_ci 1467cabdff1aSopenharmony_ci out[ 7] = (dctint)(-(t2a + t3a) * 11585U + (1 << 13)) >> 14; 1468cabdff1aSopenharmony_ci out[ 8] = (dctint)( (t2a - t3a) * 11585U + (1 << 13)) >> 14; 1469cabdff1aSopenharmony_ci out[ 4] = (dctint)( (t7 + t6) * 11585U + (1 << 13)) >> 14; 1470cabdff1aSopenharmony_ci out[11] = (dctint)( (t7 - t6) * 11585U + (1 << 13)) >> 14; 1471cabdff1aSopenharmony_ci out[ 6] = (dctint)( (t11 + t10) * 11585U + (1 << 13)) >> 14; 1472cabdff1aSopenharmony_ci out[ 9] = (dctint)( (t11 - t10) * 11585U + (1 << 13)) >> 14; 1473cabdff1aSopenharmony_ci out[ 5] = (dctint)(-(t14a + t15a) * 11585U + (1 << 13)) >> 14; 1474cabdff1aSopenharmony_ci out[10] = (dctint)( (t14a - t15a) * 11585U + (1 << 13)) >> 14; 1475cabdff1aSopenharmony_ci} 1476cabdff1aSopenharmony_ci 1477cabdff1aSopenharmony_ciitxfm_wrap(16, 6) 1478cabdff1aSopenharmony_ci 1479cabdff1aSopenharmony_cistatic av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride, 1480cabdff1aSopenharmony_ci dctcoef *out, int pass) 1481cabdff1aSopenharmony_ci{ 1482cabdff1aSopenharmony_ci dctint t0a = (dctint)((IN(0) + IN(16)) * 11585U + (1 << 13)) >> 14; 1483cabdff1aSopenharmony_ci dctint t1a = (dctint)((IN(0) - IN(16)) * 11585U + (1 << 13)) >> 14; 1484cabdff1aSopenharmony_ci dctint t2a = (dctint)(IN( 8) * 6270U - IN(24) * 15137U + (1 << 13)) >> 14; 1485cabdff1aSopenharmony_ci dctint t3a = (dctint)(IN( 8) * 15137U + IN(24) * 6270U + (1 << 13)) >> 14; 1486cabdff1aSopenharmony_ci dctint t4a = (dctint)(IN( 4) * 3196U - IN(28) * 16069U + (1 << 13)) >> 14; 1487cabdff1aSopenharmony_ci dctint t7a = (dctint)(IN( 4) * 16069U + IN(28) * 3196U + (1 << 13)) >> 14; 1488cabdff1aSopenharmony_ci dctint t5a = (dctint)(IN(20) * 13623U - IN(12) * 9102U + (1 << 13)) >> 14; 1489cabdff1aSopenharmony_ci dctint t6a = (dctint)(IN(20) * 9102U + IN(12) * 13623U + (1 << 13)) >> 14; 1490cabdff1aSopenharmony_ci dctint t8a = (dctint)(IN( 2) * 1606U - IN(30) * 16305U + (1 << 13)) >> 14; 1491cabdff1aSopenharmony_ci dctint t15a = (dctint)(IN( 2) * 16305U + IN(30) * 1606U + (1 << 13)) >> 14; 1492cabdff1aSopenharmony_ci dctint t9a = (dctint)(IN(18) * 12665U - IN(14) * 10394U + (1 << 13)) >> 14; 1493cabdff1aSopenharmony_ci dctint t14a = (dctint)(IN(18) * 10394U + IN(14) * 12665U + (1 << 13)) >> 14; 1494cabdff1aSopenharmony_ci dctint t10a = (dctint)(IN(10) * 7723U - IN(22) * 14449U + (1 << 13)) >> 14; 1495cabdff1aSopenharmony_ci dctint t13a = (dctint)(IN(10) * 14449U + IN(22) * 7723U + (1 << 13)) >> 14; 1496cabdff1aSopenharmony_ci dctint t11a = (dctint)(IN(26) * 15679U - IN( 6) * 4756U + (1 << 13)) >> 14; 1497cabdff1aSopenharmony_ci dctint t12a = (dctint)(IN(26) * 4756U + IN( 6) * 15679U + (1 << 13)) >> 14; 1498cabdff1aSopenharmony_ci dctint t16a = (dctint)(IN( 1) * 804U - IN(31) * 16364U + (1 << 13)) >> 14; 1499cabdff1aSopenharmony_ci dctint t31a = (dctint)(IN( 1) * 16364U + IN(31) * 804U + (1 << 13)) >> 14; 1500cabdff1aSopenharmony_ci dctint t17a = (dctint)(IN(17) * 12140U - IN(15) * 11003U + (1 << 13)) >> 14; 1501cabdff1aSopenharmony_ci dctint t30a = (dctint)(IN(17) * 11003U + IN(15) * 12140U + (1 << 13)) >> 14; 1502cabdff1aSopenharmony_ci dctint t18a = (dctint)(IN( 9) * 7005U - IN(23) * 14811U + (1 << 13)) >> 14; 1503cabdff1aSopenharmony_ci dctint t29a = (dctint)(IN( 9) * 14811U + IN(23) * 7005U + (1 << 13)) >> 14; 1504cabdff1aSopenharmony_ci dctint t19a = (dctint)(IN(25) * 15426U - IN( 7) * 5520U + (1 << 13)) >> 14; 1505cabdff1aSopenharmony_ci dctint t28a = (dctint)(IN(25) * 5520U + IN( 7) * 15426U + (1 << 13)) >> 14; 1506cabdff1aSopenharmony_ci dctint t20a = (dctint)(IN( 5) * 3981U - IN(27) * 15893U + (1 << 13)) >> 14; 1507cabdff1aSopenharmony_ci dctint t27a = (dctint)(IN( 5) * 15893U + IN(27) * 3981U + (1 << 13)) >> 14; 1508cabdff1aSopenharmony_ci dctint t21a = (dctint)(IN(21) * 14053U - IN(11) * 8423U + (1 << 13)) >> 14; 1509cabdff1aSopenharmony_ci dctint t26a = (dctint)(IN(21) * 8423U + IN(11) * 14053U + (1 << 13)) >> 14; 1510cabdff1aSopenharmony_ci dctint t22a = (dctint)(IN(13) * 9760U - IN(19) * 13160U + (1 << 13)) >> 14; 1511cabdff1aSopenharmony_ci dctint t25a = (dctint)(IN(13) * 13160U + IN(19) * 9760U + (1 << 13)) >> 14; 1512cabdff1aSopenharmony_ci dctint t23a = (dctint)(IN(29) * 16207U - IN( 3) * 2404U + (1 << 13)) >> 14; 1513cabdff1aSopenharmony_ci dctint t24a = (dctint)(IN(29) * 2404U + IN( 3) * 16207U + (1 << 13)) >> 14; 1514cabdff1aSopenharmony_ci 1515cabdff1aSopenharmony_ci dctint t0 = t0a + t3a; 1516cabdff1aSopenharmony_ci dctint t1 = t1a + t2a; 1517cabdff1aSopenharmony_ci dctint t2 = t1a - t2a; 1518cabdff1aSopenharmony_ci dctint t3 = t0a - t3a; 1519cabdff1aSopenharmony_ci dctint t4 = t4a + t5a; 1520cabdff1aSopenharmony_ci dctint t5 = t4a - t5a; 1521cabdff1aSopenharmony_ci dctint t6 = t7a - t6a; 1522cabdff1aSopenharmony_ci dctint t7 = t7a + t6a; 1523cabdff1aSopenharmony_ci dctint t8 = t8a + t9a; 1524cabdff1aSopenharmony_ci dctint t9 = t8a - t9a; 1525cabdff1aSopenharmony_ci dctint t10 = t11a - t10a; 1526cabdff1aSopenharmony_ci dctint t11 = t11a + t10a; 1527cabdff1aSopenharmony_ci dctint t12 = t12a + t13a; 1528cabdff1aSopenharmony_ci dctint t13 = t12a - t13a; 1529cabdff1aSopenharmony_ci dctint t14 = t15a - t14a; 1530cabdff1aSopenharmony_ci dctint t15 = t15a + t14a; 1531cabdff1aSopenharmony_ci dctint t16 = t16a + t17a; 1532cabdff1aSopenharmony_ci dctint t17 = t16a - t17a; 1533cabdff1aSopenharmony_ci dctint t18 = t19a - t18a; 1534cabdff1aSopenharmony_ci dctint t19 = t19a + t18a; 1535cabdff1aSopenharmony_ci dctint t20 = t20a + t21a; 1536cabdff1aSopenharmony_ci dctint t21 = t20a - t21a; 1537cabdff1aSopenharmony_ci dctint t22 = t23a - t22a; 1538cabdff1aSopenharmony_ci dctint t23 = t23a + t22a; 1539cabdff1aSopenharmony_ci dctint t24 = t24a + t25a; 1540cabdff1aSopenharmony_ci dctint t25 = t24a - t25a; 1541cabdff1aSopenharmony_ci dctint t26 = t27a - t26a; 1542cabdff1aSopenharmony_ci dctint t27 = t27a + t26a; 1543cabdff1aSopenharmony_ci dctint t28 = t28a + t29a; 1544cabdff1aSopenharmony_ci dctint t29 = t28a - t29a; 1545cabdff1aSopenharmony_ci dctint t30 = t31a - t30a; 1546cabdff1aSopenharmony_ci dctint t31 = t31a + t30a; 1547cabdff1aSopenharmony_ci 1548cabdff1aSopenharmony_ci t5a = (dctint)((t6 - t5) * 11585U + (1 << 13)) >> 14; 1549cabdff1aSopenharmony_ci t6a = (dctint)((t6 + t5) * 11585U + (1 << 13)) >> 14; 1550cabdff1aSopenharmony_ci t9a = (dctint)( t14 * 6270U - t9 * 15137U + (1 << 13)) >> 14; 1551cabdff1aSopenharmony_ci t14a = (dctint)( t14 * 15137U + t9 * 6270U + (1 << 13)) >> 14; 1552cabdff1aSopenharmony_ci t10a = (dctint)(-(t13 * 15137U + t10 * 6270U) + (1 << 13)) >> 14; 1553cabdff1aSopenharmony_ci t13a = (dctint)( t13 * 6270U - t10 * 15137U + (1 << 13)) >> 14; 1554cabdff1aSopenharmony_ci t17a = (dctint)( t30 * 3196U - t17 * 16069U + (1 << 13)) >> 14; 1555cabdff1aSopenharmony_ci t30a = (dctint)( t30 * 16069U + t17 * 3196U + (1 << 13)) >> 14; 1556cabdff1aSopenharmony_ci t18a = (dctint)(-(t29 * 16069U + t18 * 3196U) + (1 << 13)) >> 14; 1557cabdff1aSopenharmony_ci t29a = (dctint)( t29 * 3196U - t18 * 16069U + (1 << 13)) >> 14; 1558cabdff1aSopenharmony_ci t21a = (dctint)( t26 * 13623U - t21 * 9102U + (1 << 13)) >> 14; 1559cabdff1aSopenharmony_ci t26a = (dctint)( t26 * 9102U + t21 * 13623U + (1 << 13)) >> 14; 1560cabdff1aSopenharmony_ci t22a = (dctint)(-(t25 * 9102U + t22 * 13623U) + (1 << 13)) >> 14; 1561cabdff1aSopenharmony_ci t25a = (dctint)( t25 * 13623U - t22 * 9102U + (1 << 13)) >> 14; 1562cabdff1aSopenharmony_ci 1563cabdff1aSopenharmony_ci t0a = t0 + t7; 1564cabdff1aSopenharmony_ci t1a = t1 + t6a; 1565cabdff1aSopenharmony_ci t2a = t2 + t5a; 1566cabdff1aSopenharmony_ci t3a = t3 + t4; 1567cabdff1aSopenharmony_ci t4a = t3 - t4; 1568cabdff1aSopenharmony_ci t5 = t2 - t5a; 1569cabdff1aSopenharmony_ci t6 = t1 - t6a; 1570cabdff1aSopenharmony_ci t7a = t0 - t7; 1571cabdff1aSopenharmony_ci t8a = t8 + t11; 1572cabdff1aSopenharmony_ci t9 = t9a + t10a; 1573cabdff1aSopenharmony_ci t10 = t9a - t10a; 1574cabdff1aSopenharmony_ci t11a = t8 - t11; 1575cabdff1aSopenharmony_ci t12a = t15 - t12; 1576cabdff1aSopenharmony_ci t13 = t14a - t13a; 1577cabdff1aSopenharmony_ci t14 = t14a + t13a; 1578cabdff1aSopenharmony_ci t15a = t15 + t12; 1579cabdff1aSopenharmony_ci t16a = t16 + t19; 1580cabdff1aSopenharmony_ci t17 = t17a + t18a; 1581cabdff1aSopenharmony_ci t18 = t17a - t18a; 1582cabdff1aSopenharmony_ci t19a = t16 - t19; 1583cabdff1aSopenharmony_ci t20a = t23 - t20; 1584cabdff1aSopenharmony_ci t21 = t22a - t21a; 1585cabdff1aSopenharmony_ci t22 = t22a + t21a; 1586cabdff1aSopenharmony_ci t23a = t23 + t20; 1587cabdff1aSopenharmony_ci t24a = t24 + t27; 1588cabdff1aSopenharmony_ci t25 = t25a + t26a; 1589cabdff1aSopenharmony_ci t26 = t25a - t26a; 1590cabdff1aSopenharmony_ci t27a = t24 - t27; 1591cabdff1aSopenharmony_ci t28a = t31 - t28; 1592cabdff1aSopenharmony_ci t29 = t30a - t29a; 1593cabdff1aSopenharmony_ci t30 = t30a + t29a; 1594cabdff1aSopenharmony_ci t31a = t31 + t28; 1595cabdff1aSopenharmony_ci 1596cabdff1aSopenharmony_ci t10a = (dctint)((t13 - t10) * 11585U + (1 << 13)) >> 14; 1597cabdff1aSopenharmony_ci t13a = (dctint)((t13 + t10) * 11585U + (1 << 13)) >> 14; 1598cabdff1aSopenharmony_ci t11 = (dctint)((t12a - t11a) * 11585U + (1 << 13)) >> 14; 1599cabdff1aSopenharmony_ci t12 = (dctint)((t12a + t11a) * 11585U + (1 << 13)) >> 14; 1600cabdff1aSopenharmony_ci t18a = (dctint)( t29 * 6270U - t18 * 15137U + (1 << 13)) >> 14; 1601cabdff1aSopenharmony_ci t29a = (dctint)( t29 * 15137U + t18 * 6270U + (1 << 13)) >> 14; 1602cabdff1aSopenharmony_ci t19 = (dctint)( t28a * 6270U - t19a * 15137U + (1 << 13)) >> 14; 1603cabdff1aSopenharmony_ci t28 = (dctint)( t28a * 15137U + t19a * 6270U + (1 << 13)) >> 14; 1604cabdff1aSopenharmony_ci t20 = (dctint)(-(t27a * 15137U + t20a * 6270U) + (1 << 13)) >> 14; 1605cabdff1aSopenharmony_ci t27 = (dctint)( t27a * 6270U - t20a * 15137U + (1 << 13)) >> 14; 1606cabdff1aSopenharmony_ci t21a = (dctint)(-(t26 * 15137U + t21 * 6270U) + (1 << 13)) >> 14; 1607cabdff1aSopenharmony_ci t26a = (dctint)( t26 * 6270U - t21 * 15137U + (1 << 13)) >> 14; 1608cabdff1aSopenharmony_ci 1609cabdff1aSopenharmony_ci t0 = t0a + t15a; 1610cabdff1aSopenharmony_ci t1 = t1a + t14; 1611cabdff1aSopenharmony_ci t2 = t2a + t13a; 1612cabdff1aSopenharmony_ci t3 = t3a + t12; 1613cabdff1aSopenharmony_ci t4 = t4a + t11; 1614cabdff1aSopenharmony_ci t5a = t5 + t10a; 1615cabdff1aSopenharmony_ci t6a = t6 + t9; 1616cabdff1aSopenharmony_ci t7 = t7a + t8a; 1617cabdff1aSopenharmony_ci t8 = t7a - t8a; 1618cabdff1aSopenharmony_ci t9a = t6 - t9; 1619cabdff1aSopenharmony_ci t10 = t5 - t10a; 1620cabdff1aSopenharmony_ci t11a = t4a - t11; 1621cabdff1aSopenharmony_ci t12a = t3a - t12; 1622cabdff1aSopenharmony_ci t13 = t2a - t13a; 1623cabdff1aSopenharmony_ci t14a = t1a - t14; 1624cabdff1aSopenharmony_ci t15 = t0a - t15a; 1625cabdff1aSopenharmony_ci t16 = t16a + t23a; 1626cabdff1aSopenharmony_ci t17a = t17 + t22; 1627cabdff1aSopenharmony_ci t18 = t18a + t21a; 1628cabdff1aSopenharmony_ci t19a = t19 + t20; 1629cabdff1aSopenharmony_ci t20a = t19 - t20; 1630cabdff1aSopenharmony_ci t21 = t18a - t21a; 1631cabdff1aSopenharmony_ci t22a = t17 - t22; 1632cabdff1aSopenharmony_ci t23 = t16a - t23a; 1633cabdff1aSopenharmony_ci t24 = t31a - t24a; 1634cabdff1aSopenharmony_ci t25a = t30 - t25; 1635cabdff1aSopenharmony_ci t26 = t29a - t26a; 1636cabdff1aSopenharmony_ci t27a = t28 - t27; 1637cabdff1aSopenharmony_ci t28a = t28 + t27; 1638cabdff1aSopenharmony_ci t29 = t29a + t26a; 1639cabdff1aSopenharmony_ci t30a = t30 + t25; 1640cabdff1aSopenharmony_ci t31 = t31a + t24a; 1641cabdff1aSopenharmony_ci 1642cabdff1aSopenharmony_ci t20 = (dctint)((t27a - t20a) * 11585U + (1 << 13)) >> 14; 1643cabdff1aSopenharmony_ci t27 = (dctint)((t27a + t20a) * 11585U + (1 << 13)) >> 14; 1644cabdff1aSopenharmony_ci t21a = (dctint)((t26 - t21 ) * 11585U + (1 << 13)) >> 14; 1645cabdff1aSopenharmony_ci t26a = (dctint)((t26 + t21 ) * 11585U + (1 << 13)) >> 14; 1646cabdff1aSopenharmony_ci t22 = (dctint)((t25a - t22a) * 11585U + (1 << 13)) >> 14; 1647cabdff1aSopenharmony_ci t25 = (dctint)((t25a + t22a) * 11585U + (1 << 13)) >> 14; 1648cabdff1aSopenharmony_ci t23a = (dctint)((t24 - t23 ) * 11585U + (1 << 13)) >> 14; 1649cabdff1aSopenharmony_ci t24a = (dctint)((t24 + t23 ) * 11585U + (1 << 13)) >> 14; 1650cabdff1aSopenharmony_ci 1651cabdff1aSopenharmony_ci out[ 0] = t0 + t31; 1652cabdff1aSopenharmony_ci out[ 1] = t1 + t30a; 1653cabdff1aSopenharmony_ci out[ 2] = t2 + t29; 1654cabdff1aSopenharmony_ci out[ 3] = t3 + t28a; 1655cabdff1aSopenharmony_ci out[ 4] = t4 + t27; 1656cabdff1aSopenharmony_ci out[ 5] = t5a + t26a; 1657cabdff1aSopenharmony_ci out[ 6] = t6a + t25; 1658cabdff1aSopenharmony_ci out[ 7] = t7 + t24a; 1659cabdff1aSopenharmony_ci out[ 8] = t8 + t23a; 1660cabdff1aSopenharmony_ci out[ 9] = t9a + t22; 1661cabdff1aSopenharmony_ci out[10] = t10 + t21a; 1662cabdff1aSopenharmony_ci out[11] = t11a + t20; 1663cabdff1aSopenharmony_ci out[12] = t12a + t19a; 1664cabdff1aSopenharmony_ci out[13] = t13 + t18; 1665cabdff1aSopenharmony_ci out[14] = t14a + t17a; 1666cabdff1aSopenharmony_ci out[15] = t15 + t16; 1667cabdff1aSopenharmony_ci out[16] = t15 - t16; 1668cabdff1aSopenharmony_ci out[17] = t14a - t17a; 1669cabdff1aSopenharmony_ci out[18] = t13 - t18; 1670cabdff1aSopenharmony_ci out[19] = t12a - t19a; 1671cabdff1aSopenharmony_ci out[20] = t11a - t20; 1672cabdff1aSopenharmony_ci out[21] = t10 - t21a; 1673cabdff1aSopenharmony_ci out[22] = t9a - t22; 1674cabdff1aSopenharmony_ci out[23] = t8 - t23a; 1675cabdff1aSopenharmony_ci out[24] = t7 - t24a; 1676cabdff1aSopenharmony_ci out[25] = t6a - t25; 1677cabdff1aSopenharmony_ci out[26] = t5a - t26a; 1678cabdff1aSopenharmony_ci out[27] = t4 - t27; 1679cabdff1aSopenharmony_ci out[28] = t3 - t28a; 1680cabdff1aSopenharmony_ci out[29] = t2 - t29; 1681cabdff1aSopenharmony_ci out[30] = t1 - t30a; 1682cabdff1aSopenharmony_ci out[31] = t0 - t31; 1683cabdff1aSopenharmony_ci} 1684cabdff1aSopenharmony_ci 1685cabdff1aSopenharmony_ciitxfm_wrapper(idct, idct, 32, 6, 1) 1686cabdff1aSopenharmony_ci 1687cabdff1aSopenharmony_cistatic av_always_inline void iwht4_1d(const dctcoef *in, ptrdiff_t stride, 1688cabdff1aSopenharmony_ci dctcoef *out, int pass) 1689cabdff1aSopenharmony_ci{ 1690cabdff1aSopenharmony_ci int t0, t1, t2, t3, t4; 1691cabdff1aSopenharmony_ci 1692cabdff1aSopenharmony_ci if (pass == 0) { 1693cabdff1aSopenharmony_ci t0 = IN(0) >> 2; 1694cabdff1aSopenharmony_ci t1 = IN(3) >> 2; 1695cabdff1aSopenharmony_ci t2 = IN(1) >> 2; 1696cabdff1aSopenharmony_ci t3 = IN(2) >> 2; 1697cabdff1aSopenharmony_ci } else { 1698cabdff1aSopenharmony_ci t0 = IN(0); 1699cabdff1aSopenharmony_ci t1 = IN(3); 1700cabdff1aSopenharmony_ci t2 = IN(1); 1701cabdff1aSopenharmony_ci t3 = IN(2); 1702cabdff1aSopenharmony_ci } 1703cabdff1aSopenharmony_ci 1704cabdff1aSopenharmony_ci t0 += t2; 1705cabdff1aSopenharmony_ci t3 -= t1; 1706cabdff1aSopenharmony_ci t4 = (t0 - t3) >> 1; 1707cabdff1aSopenharmony_ci t1 = t4 - t1; 1708cabdff1aSopenharmony_ci t2 = t4 - t2; 1709cabdff1aSopenharmony_ci t0 -= t1; 1710cabdff1aSopenharmony_ci t3 += t2; 1711cabdff1aSopenharmony_ci 1712cabdff1aSopenharmony_ci out[0] = t0; 1713cabdff1aSopenharmony_ci out[1] = t1; 1714cabdff1aSopenharmony_ci out[2] = t2; 1715cabdff1aSopenharmony_ci out[3] = t3; 1716cabdff1aSopenharmony_ci} 1717cabdff1aSopenharmony_ci 1718cabdff1aSopenharmony_ciitxfm_wrapper(iwht, iwht, 4, 0, 0) 1719cabdff1aSopenharmony_ci 1720cabdff1aSopenharmony_ci#undef IN 1721cabdff1aSopenharmony_ci#undef itxfm_wrapper 1722cabdff1aSopenharmony_ci#undef itxfm_wrap 1723cabdff1aSopenharmony_ci 1724cabdff1aSopenharmony_cistatic av_cold void vp9dsp_itxfm_init(VP9DSPContext *dsp) 1725cabdff1aSopenharmony_ci{ 1726cabdff1aSopenharmony_ci#define init_itxfm(tx, sz) \ 1727cabdff1aSopenharmony_ci dsp->itxfm_add[tx][DCT_DCT] = idct_idct_##sz##_add_c; \ 1728cabdff1aSopenharmony_ci dsp->itxfm_add[tx][DCT_ADST] = iadst_idct_##sz##_add_c; \ 1729cabdff1aSopenharmony_ci dsp->itxfm_add[tx][ADST_DCT] = idct_iadst_##sz##_add_c; \ 1730cabdff1aSopenharmony_ci dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c 1731cabdff1aSopenharmony_ci 1732cabdff1aSopenharmony_ci#define init_idct(tx, nm) \ 1733cabdff1aSopenharmony_ci dsp->itxfm_add[tx][DCT_DCT] = \ 1734cabdff1aSopenharmony_ci dsp->itxfm_add[tx][ADST_DCT] = \ 1735cabdff1aSopenharmony_ci dsp->itxfm_add[tx][DCT_ADST] = \ 1736cabdff1aSopenharmony_ci dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c 1737cabdff1aSopenharmony_ci 1738cabdff1aSopenharmony_ci init_itxfm(TX_4X4, 4x4); 1739cabdff1aSopenharmony_ci init_itxfm(TX_8X8, 8x8); 1740cabdff1aSopenharmony_ci init_itxfm(TX_16X16, 16x16); 1741cabdff1aSopenharmony_ci init_idct(TX_32X32, idct_idct_32x32); 1742cabdff1aSopenharmony_ci init_idct(4 /* lossless */, iwht_iwht_4x4); 1743cabdff1aSopenharmony_ci 1744cabdff1aSopenharmony_ci#undef init_itxfm 1745cabdff1aSopenharmony_ci#undef init_idct 1746cabdff1aSopenharmony_ci} 1747cabdff1aSopenharmony_ci 1748cabdff1aSopenharmony_cistatic av_always_inline void loop_filter(pixel *dst, int E, int I, int H, 1749cabdff1aSopenharmony_ci ptrdiff_t stridea, ptrdiff_t strideb, 1750cabdff1aSopenharmony_ci int wd) 1751cabdff1aSopenharmony_ci{ 1752cabdff1aSopenharmony_ci int i, F = 1 << (BIT_DEPTH - 8); 1753cabdff1aSopenharmony_ci 1754cabdff1aSopenharmony_ci E <<= (BIT_DEPTH - 8); 1755cabdff1aSopenharmony_ci I <<= (BIT_DEPTH - 8); 1756cabdff1aSopenharmony_ci H <<= (BIT_DEPTH - 8); 1757cabdff1aSopenharmony_ci for (i = 0; i < 8; i++, dst += stridea) { 1758cabdff1aSopenharmony_ci int p7, p6, p5, p4; 1759cabdff1aSopenharmony_ci int p3 = dst[strideb * -4], p2 = dst[strideb * -3]; 1760cabdff1aSopenharmony_ci int p1 = dst[strideb * -2], p0 = dst[strideb * -1]; 1761cabdff1aSopenharmony_ci int q0 = dst[strideb * +0], q1 = dst[strideb * +1]; 1762cabdff1aSopenharmony_ci int q2 = dst[strideb * +2], q3 = dst[strideb * +3]; 1763cabdff1aSopenharmony_ci int q4, q5, q6, q7; 1764cabdff1aSopenharmony_ci int fm = FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && 1765cabdff1aSopenharmony_ci FFABS(p1 - p0) <= I && FFABS(q1 - q0) <= I && 1766cabdff1aSopenharmony_ci FFABS(q2 - q1) <= I && FFABS(q3 - q2) <= I && 1767cabdff1aSopenharmony_ci FFABS(p0 - q0) * 2 + (FFABS(p1 - q1) >> 1) <= E; 1768cabdff1aSopenharmony_ci int flat8out, flat8in; 1769cabdff1aSopenharmony_ci 1770cabdff1aSopenharmony_ci if (!fm) 1771cabdff1aSopenharmony_ci continue; 1772cabdff1aSopenharmony_ci 1773cabdff1aSopenharmony_ci if (wd >= 16) { 1774cabdff1aSopenharmony_ci p7 = dst[strideb * -8]; 1775cabdff1aSopenharmony_ci p6 = dst[strideb * -7]; 1776cabdff1aSopenharmony_ci p5 = dst[strideb * -6]; 1777cabdff1aSopenharmony_ci p4 = dst[strideb * -5]; 1778cabdff1aSopenharmony_ci q4 = dst[strideb * +4]; 1779cabdff1aSopenharmony_ci q5 = dst[strideb * +5]; 1780cabdff1aSopenharmony_ci q6 = dst[strideb * +6]; 1781cabdff1aSopenharmony_ci q7 = dst[strideb * +7]; 1782cabdff1aSopenharmony_ci 1783cabdff1aSopenharmony_ci flat8out = FFABS(p7 - p0) <= F && FFABS(p6 - p0) <= F && 1784cabdff1aSopenharmony_ci FFABS(p5 - p0) <= F && FFABS(p4 - p0) <= F && 1785cabdff1aSopenharmony_ci FFABS(q4 - q0) <= F && FFABS(q5 - q0) <= F && 1786cabdff1aSopenharmony_ci FFABS(q6 - q0) <= F && FFABS(q7 - q0) <= F; 1787cabdff1aSopenharmony_ci } 1788cabdff1aSopenharmony_ci 1789cabdff1aSopenharmony_ci if (wd >= 8) 1790cabdff1aSopenharmony_ci flat8in = FFABS(p3 - p0) <= F && FFABS(p2 - p0) <= F && 1791cabdff1aSopenharmony_ci FFABS(p1 - p0) <= F && FFABS(q1 - q0) <= F && 1792cabdff1aSopenharmony_ci FFABS(q2 - q0) <= F && FFABS(q3 - q0) <= F; 1793cabdff1aSopenharmony_ci 1794cabdff1aSopenharmony_ci if (wd >= 16 && flat8out && flat8in) { 1795cabdff1aSopenharmony_ci dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 + 1796cabdff1aSopenharmony_ci p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4; 1797cabdff1aSopenharmony_ci dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 + 1798cabdff1aSopenharmony_ci p4 + p3 + p2 + p1 + p0 + q0 + q1 + 8) >> 4; 1799cabdff1aSopenharmony_ci dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 + 1800cabdff1aSopenharmony_ci p3 + p2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4; 1801cabdff1aSopenharmony_ci dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 + 1802cabdff1aSopenharmony_ci p2 + p1 + p0 + q0 + q1 + q2 + q3 + 8) >> 4; 1803cabdff1aSopenharmony_ci dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 + 1804cabdff1aSopenharmony_ci p1 + p0 + q0 + q1 + q2 + q3 + q4 + 8) >> 4; 1805cabdff1aSopenharmony_ci dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + 1806cabdff1aSopenharmony_ci p0 + q0 + q1 + q2 + q3 + q4 + q5 + 8) >> 4; 1807cabdff1aSopenharmony_ci dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + 1808cabdff1aSopenharmony_ci q0 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4; 1809cabdff1aSopenharmony_ci dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + 1810cabdff1aSopenharmony_ci q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4; 1811cabdff1aSopenharmony_ci dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + 1812cabdff1aSopenharmony_ci q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4; 1813cabdff1aSopenharmony_ci dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + 1814cabdff1aSopenharmony_ci q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4; 1815cabdff1aSopenharmony_ci dst[strideb * +3] = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 + 1816cabdff1aSopenharmony_ci q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4; 1817cabdff1aSopenharmony_ci dst[strideb * +4] = (p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 + 1818cabdff1aSopenharmony_ci q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4; 1819cabdff1aSopenharmony_ci dst[strideb * +5] = (p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 + 1820cabdff1aSopenharmony_ci q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4; 1821cabdff1aSopenharmony_ci dst[strideb * +6] = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + 1822cabdff1aSopenharmony_ci q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4; 1823cabdff1aSopenharmony_ci } else if (wd >= 8 && flat8in) { 1824cabdff1aSopenharmony_ci dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3; 1825cabdff1aSopenharmony_ci dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3; 1826cabdff1aSopenharmony_ci dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3; 1827cabdff1aSopenharmony_ci dst[strideb * +0] = (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3; 1828cabdff1aSopenharmony_ci dst[strideb * +1] = (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4) >> 3; 1829cabdff1aSopenharmony_ci dst[strideb * +2] = (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3; 1830cabdff1aSopenharmony_ci } else { 1831cabdff1aSopenharmony_ci int hev = FFABS(p1 - p0) > H || FFABS(q1 - q0) > H; 1832cabdff1aSopenharmony_ci 1833cabdff1aSopenharmony_ci if (hev) { 1834cabdff1aSopenharmony_ci int f = av_clip_intp2(p1 - q1, BIT_DEPTH - 1), f1, f2; 1835cabdff1aSopenharmony_ci f = av_clip_intp2(3 * (q0 - p0) + f, BIT_DEPTH - 1); 1836cabdff1aSopenharmony_ci 1837cabdff1aSopenharmony_ci f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3; 1838cabdff1aSopenharmony_ci f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3; 1839cabdff1aSopenharmony_ci 1840cabdff1aSopenharmony_ci dst[strideb * -1] = av_clip_pixel(p0 + f2); 1841cabdff1aSopenharmony_ci dst[strideb * +0] = av_clip_pixel(q0 - f1); 1842cabdff1aSopenharmony_ci } else { 1843cabdff1aSopenharmony_ci int f = av_clip_intp2(3 * (q0 - p0), BIT_DEPTH - 1), f1, f2; 1844cabdff1aSopenharmony_ci 1845cabdff1aSopenharmony_ci f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3; 1846cabdff1aSopenharmony_ci f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3; 1847cabdff1aSopenharmony_ci 1848cabdff1aSopenharmony_ci dst[strideb * -1] = av_clip_pixel(p0 + f2); 1849cabdff1aSopenharmony_ci dst[strideb * +0] = av_clip_pixel(q0 - f1); 1850cabdff1aSopenharmony_ci 1851cabdff1aSopenharmony_ci f = (f1 + 1) >> 1; 1852cabdff1aSopenharmony_ci dst[strideb * -2] = av_clip_pixel(p1 + f); 1853cabdff1aSopenharmony_ci dst[strideb * +1] = av_clip_pixel(q1 - f); 1854cabdff1aSopenharmony_ci } 1855cabdff1aSopenharmony_ci } 1856cabdff1aSopenharmony_ci } 1857cabdff1aSopenharmony_ci} 1858cabdff1aSopenharmony_ci 1859cabdff1aSopenharmony_ci#define lf_8_fn(dir, wd, stridea, strideb) \ 1860cabdff1aSopenharmony_cistatic void loop_filter_##dir##_##wd##_8_c(uint8_t *_dst, \ 1861cabdff1aSopenharmony_ci ptrdiff_t stride, \ 1862cabdff1aSopenharmony_ci int E, int I, int H) \ 1863cabdff1aSopenharmony_ci{ \ 1864cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; \ 1865cabdff1aSopenharmony_ci stride /= sizeof(pixel); \ 1866cabdff1aSopenharmony_ci loop_filter(dst, E, I, H, stridea, strideb, wd); \ 1867cabdff1aSopenharmony_ci} 1868cabdff1aSopenharmony_ci 1869cabdff1aSopenharmony_ci#define lf_8_fns(wd) \ 1870cabdff1aSopenharmony_cilf_8_fn(h, wd, stride, 1) \ 1871cabdff1aSopenharmony_cilf_8_fn(v, wd, 1, stride) 1872cabdff1aSopenharmony_ci 1873cabdff1aSopenharmony_cilf_8_fns(4) 1874cabdff1aSopenharmony_cilf_8_fns(8) 1875cabdff1aSopenharmony_cilf_8_fns(16) 1876cabdff1aSopenharmony_ci 1877cabdff1aSopenharmony_ci#undef lf_8_fn 1878cabdff1aSopenharmony_ci#undef lf_8_fns 1879cabdff1aSopenharmony_ci 1880cabdff1aSopenharmony_ci#define lf_16_fn(dir, stridea) \ 1881cabdff1aSopenharmony_cistatic void loop_filter_##dir##_16_16_c(uint8_t *dst, \ 1882cabdff1aSopenharmony_ci ptrdiff_t stride, \ 1883cabdff1aSopenharmony_ci int E, int I, int H) \ 1884cabdff1aSopenharmony_ci{ \ 1885cabdff1aSopenharmony_ci loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \ 1886cabdff1aSopenharmony_ci loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \ 1887cabdff1aSopenharmony_ci} 1888cabdff1aSopenharmony_ci 1889cabdff1aSopenharmony_cilf_16_fn(h, stride) 1890cabdff1aSopenharmony_cilf_16_fn(v, sizeof(pixel)) 1891cabdff1aSopenharmony_ci 1892cabdff1aSopenharmony_ci#undef lf_16_fn 1893cabdff1aSopenharmony_ci 1894cabdff1aSopenharmony_ci#define lf_mix_fn(dir, wd1, wd2, stridea) \ 1895cabdff1aSopenharmony_cistatic void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \ 1896cabdff1aSopenharmony_ci ptrdiff_t stride, \ 1897cabdff1aSopenharmony_ci int E, int I, int H) \ 1898cabdff1aSopenharmony_ci{ \ 1899cabdff1aSopenharmony_ci loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \ 1900cabdff1aSopenharmony_ci loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \ 1901cabdff1aSopenharmony_ci} 1902cabdff1aSopenharmony_ci 1903cabdff1aSopenharmony_ci#define lf_mix_fns(wd1, wd2) \ 1904cabdff1aSopenharmony_cilf_mix_fn(h, wd1, wd2, stride) \ 1905cabdff1aSopenharmony_cilf_mix_fn(v, wd1, wd2, sizeof(pixel)) 1906cabdff1aSopenharmony_ci 1907cabdff1aSopenharmony_cilf_mix_fns(4, 4) 1908cabdff1aSopenharmony_cilf_mix_fns(4, 8) 1909cabdff1aSopenharmony_cilf_mix_fns(8, 4) 1910cabdff1aSopenharmony_cilf_mix_fns(8, 8) 1911cabdff1aSopenharmony_ci 1912cabdff1aSopenharmony_ci#undef lf_mix_fn 1913cabdff1aSopenharmony_ci#undef lf_mix_fns 1914cabdff1aSopenharmony_ci 1915cabdff1aSopenharmony_cistatic av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp) 1916cabdff1aSopenharmony_ci{ 1917cabdff1aSopenharmony_ci dsp->loop_filter_8[0][0] = loop_filter_h_4_8_c; 1918cabdff1aSopenharmony_ci dsp->loop_filter_8[0][1] = loop_filter_v_4_8_c; 1919cabdff1aSopenharmony_ci dsp->loop_filter_8[1][0] = loop_filter_h_8_8_c; 1920cabdff1aSopenharmony_ci dsp->loop_filter_8[1][1] = loop_filter_v_8_8_c; 1921cabdff1aSopenharmony_ci dsp->loop_filter_8[2][0] = loop_filter_h_16_8_c; 1922cabdff1aSopenharmony_ci dsp->loop_filter_8[2][1] = loop_filter_v_16_8_c; 1923cabdff1aSopenharmony_ci 1924cabdff1aSopenharmony_ci dsp->loop_filter_16[0] = loop_filter_h_16_16_c; 1925cabdff1aSopenharmony_ci dsp->loop_filter_16[1] = loop_filter_v_16_16_c; 1926cabdff1aSopenharmony_ci 1927cabdff1aSopenharmony_ci dsp->loop_filter_mix2[0][0][0] = loop_filter_h_44_16_c; 1928cabdff1aSopenharmony_ci dsp->loop_filter_mix2[0][0][1] = loop_filter_v_44_16_c; 1929cabdff1aSopenharmony_ci dsp->loop_filter_mix2[0][1][0] = loop_filter_h_48_16_c; 1930cabdff1aSopenharmony_ci dsp->loop_filter_mix2[0][1][1] = loop_filter_v_48_16_c; 1931cabdff1aSopenharmony_ci dsp->loop_filter_mix2[1][0][0] = loop_filter_h_84_16_c; 1932cabdff1aSopenharmony_ci dsp->loop_filter_mix2[1][0][1] = loop_filter_v_84_16_c; 1933cabdff1aSopenharmony_ci dsp->loop_filter_mix2[1][1][0] = loop_filter_h_88_16_c; 1934cabdff1aSopenharmony_ci dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c; 1935cabdff1aSopenharmony_ci} 1936cabdff1aSopenharmony_ci 1937cabdff1aSopenharmony_ci#if BIT_DEPTH != 12 1938cabdff1aSopenharmony_ci 1939cabdff1aSopenharmony_cistatic av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride, 1940cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, 1941cabdff1aSopenharmony_ci int w, int h) 1942cabdff1aSopenharmony_ci{ 1943cabdff1aSopenharmony_ci do { 1944cabdff1aSopenharmony_ci memcpy(dst, src, w * sizeof(pixel)); 1945cabdff1aSopenharmony_ci 1946cabdff1aSopenharmony_ci dst += dst_stride; 1947cabdff1aSopenharmony_ci src += src_stride; 1948cabdff1aSopenharmony_ci } while (--h); 1949cabdff1aSopenharmony_ci} 1950cabdff1aSopenharmony_ci 1951cabdff1aSopenharmony_cistatic av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride, 1952cabdff1aSopenharmony_ci const uint8_t *_src, ptrdiff_t src_stride, 1953cabdff1aSopenharmony_ci int w, int h) 1954cabdff1aSopenharmony_ci{ 1955cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 1956cabdff1aSopenharmony_ci const pixel *src = (const pixel *) _src; 1957cabdff1aSopenharmony_ci 1958cabdff1aSopenharmony_ci dst_stride /= sizeof(pixel); 1959cabdff1aSopenharmony_ci src_stride /= sizeof(pixel); 1960cabdff1aSopenharmony_ci do { 1961cabdff1aSopenharmony_ci int x; 1962cabdff1aSopenharmony_ci 1963cabdff1aSopenharmony_ci for (x = 0; x < w; x += 4) 1964cabdff1aSopenharmony_ci AV_WN4PA(&dst[x], rnd_avg_pixel4(AV_RN4PA(&dst[x]), AV_RN4P(&src[x]))); 1965cabdff1aSopenharmony_ci 1966cabdff1aSopenharmony_ci dst += dst_stride; 1967cabdff1aSopenharmony_ci src += src_stride; 1968cabdff1aSopenharmony_ci } while (--h); 1969cabdff1aSopenharmony_ci} 1970cabdff1aSopenharmony_ci 1971cabdff1aSopenharmony_ci#define fpel_fn(type, sz) \ 1972cabdff1aSopenharmony_cistatic void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 1973cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 1974cabdff1aSopenharmony_ci int h, int mx, int my) \ 1975cabdff1aSopenharmony_ci{ \ 1976cabdff1aSopenharmony_ci type##_c(dst, dst_stride, src, src_stride, sz, h); \ 1977cabdff1aSopenharmony_ci} 1978cabdff1aSopenharmony_ci 1979cabdff1aSopenharmony_ci#define copy_avg_fn(sz) \ 1980cabdff1aSopenharmony_cifpel_fn(copy, sz) \ 1981cabdff1aSopenharmony_cifpel_fn(avg, sz) 1982cabdff1aSopenharmony_ci 1983cabdff1aSopenharmony_cicopy_avg_fn(64) 1984cabdff1aSopenharmony_cicopy_avg_fn(32) 1985cabdff1aSopenharmony_cicopy_avg_fn(16) 1986cabdff1aSopenharmony_cicopy_avg_fn(8) 1987cabdff1aSopenharmony_cicopy_avg_fn(4) 1988cabdff1aSopenharmony_ci 1989cabdff1aSopenharmony_ci#undef fpel_fn 1990cabdff1aSopenharmony_ci#undef copy_avg_fn 1991cabdff1aSopenharmony_ci 1992cabdff1aSopenharmony_ci#endif /* BIT_DEPTH != 12 */ 1993cabdff1aSopenharmony_ci 1994cabdff1aSopenharmony_ci#define FILTER_8TAP(src, x, F, stride) \ 1995cabdff1aSopenharmony_ci av_clip_pixel((F[0] * src[x + -3 * stride] + \ 1996cabdff1aSopenharmony_ci F[1] * src[x + -2 * stride] + \ 1997cabdff1aSopenharmony_ci F[2] * src[x + -1 * stride] + \ 1998cabdff1aSopenharmony_ci F[3] * src[x + +0 * stride] + \ 1999cabdff1aSopenharmony_ci F[4] * src[x + +1 * stride] + \ 2000cabdff1aSopenharmony_ci F[5] * src[x + +2 * stride] + \ 2001cabdff1aSopenharmony_ci F[6] * src[x + +3 * stride] + \ 2002cabdff1aSopenharmony_ci F[7] * src[x + +4 * stride] + 64) >> 7) 2003cabdff1aSopenharmony_ci 2004cabdff1aSopenharmony_cistatic av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride, 2005cabdff1aSopenharmony_ci const uint8_t *_src, ptrdiff_t src_stride, 2006cabdff1aSopenharmony_ci int w, int h, ptrdiff_t ds, 2007cabdff1aSopenharmony_ci const int16_t *filter, int avg) 2008cabdff1aSopenharmony_ci{ 2009cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 2010cabdff1aSopenharmony_ci const pixel *src = (const pixel *) _src; 2011cabdff1aSopenharmony_ci 2012cabdff1aSopenharmony_ci dst_stride /= sizeof(pixel); 2013cabdff1aSopenharmony_ci src_stride /= sizeof(pixel); 2014cabdff1aSopenharmony_ci do { 2015cabdff1aSopenharmony_ci int x; 2016cabdff1aSopenharmony_ci 2017cabdff1aSopenharmony_ci for (x = 0; x < w; x++) 2018cabdff1aSopenharmony_ci if (avg) { 2019cabdff1aSopenharmony_ci dst[x] = (dst[x] + FILTER_8TAP(src, x, filter, ds) + 1) >> 1; 2020cabdff1aSopenharmony_ci } else { 2021cabdff1aSopenharmony_ci dst[x] = FILTER_8TAP(src, x, filter, ds); 2022cabdff1aSopenharmony_ci } 2023cabdff1aSopenharmony_ci 2024cabdff1aSopenharmony_ci dst += dst_stride; 2025cabdff1aSopenharmony_ci src += src_stride; 2026cabdff1aSopenharmony_ci } while (--h); 2027cabdff1aSopenharmony_ci} 2028cabdff1aSopenharmony_ci 2029cabdff1aSopenharmony_ci#define filter_8tap_1d_fn(opn, opa, dir, ds) \ 2030cabdff1aSopenharmony_cistatic av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2031cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2032cabdff1aSopenharmony_ci int w, int h, const int16_t *filter) \ 2033cabdff1aSopenharmony_ci{ \ 2034cabdff1aSopenharmony_ci do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \ 2035cabdff1aSopenharmony_ci} 2036cabdff1aSopenharmony_ci 2037cabdff1aSopenharmony_cifilter_8tap_1d_fn(put, 0, v, src_stride / sizeof(pixel)) 2038cabdff1aSopenharmony_cifilter_8tap_1d_fn(put, 0, h, 1) 2039cabdff1aSopenharmony_cifilter_8tap_1d_fn(avg, 1, v, src_stride / sizeof(pixel)) 2040cabdff1aSopenharmony_cifilter_8tap_1d_fn(avg, 1, h, 1) 2041cabdff1aSopenharmony_ci 2042cabdff1aSopenharmony_ci#undef filter_8tap_1d_fn 2043cabdff1aSopenharmony_ci 2044cabdff1aSopenharmony_cistatic av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, 2045cabdff1aSopenharmony_ci const uint8_t *_src, ptrdiff_t src_stride, 2046cabdff1aSopenharmony_ci int w, int h, const int16_t *filterx, 2047cabdff1aSopenharmony_ci const int16_t *filtery, int avg) 2048cabdff1aSopenharmony_ci{ 2049cabdff1aSopenharmony_ci int tmp_h = h + 7; 2050cabdff1aSopenharmony_ci pixel tmp[64 * 71], *tmp_ptr = tmp; 2051cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 2052cabdff1aSopenharmony_ci const pixel *src = (const pixel *) _src; 2053cabdff1aSopenharmony_ci 2054cabdff1aSopenharmony_ci dst_stride /= sizeof(pixel); 2055cabdff1aSopenharmony_ci src_stride /= sizeof(pixel); 2056cabdff1aSopenharmony_ci src -= src_stride * 3; 2057cabdff1aSopenharmony_ci do { 2058cabdff1aSopenharmony_ci int x; 2059cabdff1aSopenharmony_ci 2060cabdff1aSopenharmony_ci for (x = 0; x < w; x++) 2061cabdff1aSopenharmony_ci tmp_ptr[x] = FILTER_8TAP(src, x, filterx, 1); 2062cabdff1aSopenharmony_ci 2063cabdff1aSopenharmony_ci tmp_ptr += 64; 2064cabdff1aSopenharmony_ci src += src_stride; 2065cabdff1aSopenharmony_ci } while (--tmp_h); 2066cabdff1aSopenharmony_ci 2067cabdff1aSopenharmony_ci tmp_ptr = tmp + 64 * 3; 2068cabdff1aSopenharmony_ci do { 2069cabdff1aSopenharmony_ci int x; 2070cabdff1aSopenharmony_ci 2071cabdff1aSopenharmony_ci for (x = 0; x < w; x++) 2072cabdff1aSopenharmony_ci if (avg) { 2073cabdff1aSopenharmony_ci dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1; 2074cabdff1aSopenharmony_ci } else { 2075cabdff1aSopenharmony_ci dst[x] = FILTER_8TAP(tmp_ptr, x, filtery, 64); 2076cabdff1aSopenharmony_ci } 2077cabdff1aSopenharmony_ci 2078cabdff1aSopenharmony_ci tmp_ptr += 64; 2079cabdff1aSopenharmony_ci dst += dst_stride; 2080cabdff1aSopenharmony_ci } while (--h); 2081cabdff1aSopenharmony_ci} 2082cabdff1aSopenharmony_ci 2083cabdff1aSopenharmony_ci#define filter_8tap_2d_fn(opn, opa) \ 2084cabdff1aSopenharmony_cistatic av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2085cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2086cabdff1aSopenharmony_ci int w, int h, const int16_t *filterx, \ 2087cabdff1aSopenharmony_ci const int16_t *filtery) \ 2088cabdff1aSopenharmony_ci{ \ 2089cabdff1aSopenharmony_ci do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \ 2090cabdff1aSopenharmony_ci} 2091cabdff1aSopenharmony_ci 2092cabdff1aSopenharmony_cifilter_8tap_2d_fn(put, 0) 2093cabdff1aSopenharmony_cifilter_8tap_2d_fn(avg, 1) 2094cabdff1aSopenharmony_ci 2095cabdff1aSopenharmony_ci#undef filter_8tap_2d_fn 2096cabdff1aSopenharmony_ci 2097cabdff1aSopenharmony_ci#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \ 2098cabdff1aSopenharmony_cistatic void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2099cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2100cabdff1aSopenharmony_ci int h, int mx, int my) \ 2101cabdff1aSopenharmony_ci{ \ 2102cabdff1aSopenharmony_ci avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \ 2103cabdff1aSopenharmony_ci ff_vp9_subpel_filters[type_idx][dir_m]); \ 2104cabdff1aSopenharmony_ci} 2105cabdff1aSopenharmony_ci 2106cabdff1aSopenharmony_ci#define filter_fn_2d(sz, type, type_idx, avg) \ 2107cabdff1aSopenharmony_cistatic void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2108cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2109cabdff1aSopenharmony_ci int h, int mx, int my) \ 2110cabdff1aSopenharmony_ci{ \ 2111cabdff1aSopenharmony_ci avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \ 2112cabdff1aSopenharmony_ci ff_vp9_subpel_filters[type_idx][mx], \ 2113cabdff1aSopenharmony_ci ff_vp9_subpel_filters[type_idx][my]); \ 2114cabdff1aSopenharmony_ci} 2115cabdff1aSopenharmony_ci 2116cabdff1aSopenharmony_ci#if BIT_DEPTH != 12 2117cabdff1aSopenharmony_ci 2118cabdff1aSopenharmony_ci#define FILTER_BILIN(src, x, mxy, stride) \ 2119cabdff1aSopenharmony_ci (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4)) 2120cabdff1aSopenharmony_ci 2121cabdff1aSopenharmony_cistatic av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride, 2122cabdff1aSopenharmony_ci const uint8_t *_src, ptrdiff_t src_stride, 2123cabdff1aSopenharmony_ci int w, int h, ptrdiff_t ds, int mxy, int avg) 2124cabdff1aSopenharmony_ci{ 2125cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 2126cabdff1aSopenharmony_ci const pixel *src = (const pixel *) _src; 2127cabdff1aSopenharmony_ci 2128cabdff1aSopenharmony_ci dst_stride /= sizeof(pixel); 2129cabdff1aSopenharmony_ci src_stride /= sizeof(pixel); 2130cabdff1aSopenharmony_ci do { 2131cabdff1aSopenharmony_ci int x; 2132cabdff1aSopenharmony_ci 2133cabdff1aSopenharmony_ci for (x = 0; x < w; x++) 2134cabdff1aSopenharmony_ci if (avg) { 2135cabdff1aSopenharmony_ci dst[x] = (dst[x] + FILTER_BILIN(src, x, mxy, ds) + 1) >> 1; 2136cabdff1aSopenharmony_ci } else { 2137cabdff1aSopenharmony_ci dst[x] = FILTER_BILIN(src, x, mxy, ds); 2138cabdff1aSopenharmony_ci } 2139cabdff1aSopenharmony_ci 2140cabdff1aSopenharmony_ci dst += dst_stride; 2141cabdff1aSopenharmony_ci src += src_stride; 2142cabdff1aSopenharmony_ci } while (--h); 2143cabdff1aSopenharmony_ci} 2144cabdff1aSopenharmony_ci 2145cabdff1aSopenharmony_ci#define bilin_1d_fn(opn, opa, dir, ds) \ 2146cabdff1aSopenharmony_cistatic av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2147cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2148cabdff1aSopenharmony_ci int w, int h, int mxy) \ 2149cabdff1aSopenharmony_ci{ \ 2150cabdff1aSopenharmony_ci do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \ 2151cabdff1aSopenharmony_ci} 2152cabdff1aSopenharmony_ci 2153cabdff1aSopenharmony_cibilin_1d_fn(put, 0, v, src_stride / sizeof(pixel)) 2154cabdff1aSopenharmony_cibilin_1d_fn(put, 0, h, 1) 2155cabdff1aSopenharmony_cibilin_1d_fn(avg, 1, v, src_stride / sizeof(pixel)) 2156cabdff1aSopenharmony_cibilin_1d_fn(avg, 1, h, 1) 2157cabdff1aSopenharmony_ci 2158cabdff1aSopenharmony_ci#undef bilin_1d_fn 2159cabdff1aSopenharmony_ci 2160cabdff1aSopenharmony_cistatic av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride, 2161cabdff1aSopenharmony_ci const uint8_t *_src, ptrdiff_t src_stride, 2162cabdff1aSopenharmony_ci int w, int h, int mx, int my, int avg) 2163cabdff1aSopenharmony_ci{ 2164cabdff1aSopenharmony_ci pixel tmp[64 * 65], *tmp_ptr = tmp; 2165cabdff1aSopenharmony_ci int tmp_h = h + 1; 2166cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 2167cabdff1aSopenharmony_ci const pixel *src = (const pixel *) _src; 2168cabdff1aSopenharmony_ci 2169cabdff1aSopenharmony_ci dst_stride /= sizeof(pixel); 2170cabdff1aSopenharmony_ci src_stride /= sizeof(pixel); 2171cabdff1aSopenharmony_ci do { 2172cabdff1aSopenharmony_ci int x; 2173cabdff1aSopenharmony_ci 2174cabdff1aSopenharmony_ci for (x = 0; x < w; x++) 2175cabdff1aSopenharmony_ci tmp_ptr[x] = FILTER_BILIN(src, x, mx, 1); 2176cabdff1aSopenharmony_ci 2177cabdff1aSopenharmony_ci tmp_ptr += 64; 2178cabdff1aSopenharmony_ci src += src_stride; 2179cabdff1aSopenharmony_ci } while (--tmp_h); 2180cabdff1aSopenharmony_ci 2181cabdff1aSopenharmony_ci tmp_ptr = tmp; 2182cabdff1aSopenharmony_ci do { 2183cabdff1aSopenharmony_ci int x; 2184cabdff1aSopenharmony_ci 2185cabdff1aSopenharmony_ci for (x = 0; x < w; x++) 2186cabdff1aSopenharmony_ci if (avg) { 2187cabdff1aSopenharmony_ci dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1; 2188cabdff1aSopenharmony_ci } else { 2189cabdff1aSopenharmony_ci dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64); 2190cabdff1aSopenharmony_ci } 2191cabdff1aSopenharmony_ci 2192cabdff1aSopenharmony_ci tmp_ptr += 64; 2193cabdff1aSopenharmony_ci dst += dst_stride; 2194cabdff1aSopenharmony_ci } while (--h); 2195cabdff1aSopenharmony_ci} 2196cabdff1aSopenharmony_ci 2197cabdff1aSopenharmony_ci#define bilin_2d_fn(opn, opa) \ 2198cabdff1aSopenharmony_cistatic av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2199cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2200cabdff1aSopenharmony_ci int w, int h, int mx, int my) \ 2201cabdff1aSopenharmony_ci{ \ 2202cabdff1aSopenharmony_ci do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \ 2203cabdff1aSopenharmony_ci} 2204cabdff1aSopenharmony_ci 2205cabdff1aSopenharmony_cibilin_2d_fn(put, 0) 2206cabdff1aSopenharmony_cibilin_2d_fn(avg, 1) 2207cabdff1aSopenharmony_ci 2208cabdff1aSopenharmony_ci#undef bilin_2d_fn 2209cabdff1aSopenharmony_ci 2210cabdff1aSopenharmony_ci#define bilinf_fn_1d(sz, dir, dir_m, avg) \ 2211cabdff1aSopenharmony_cistatic void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2212cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2213cabdff1aSopenharmony_ci int h, int mx, int my) \ 2214cabdff1aSopenharmony_ci{ \ 2215cabdff1aSopenharmony_ci avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \ 2216cabdff1aSopenharmony_ci} 2217cabdff1aSopenharmony_ci 2218cabdff1aSopenharmony_ci#define bilinf_fn_2d(sz, avg) \ 2219cabdff1aSopenharmony_cistatic void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2220cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2221cabdff1aSopenharmony_ci int h, int mx, int my) \ 2222cabdff1aSopenharmony_ci{ \ 2223cabdff1aSopenharmony_ci avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \ 2224cabdff1aSopenharmony_ci} 2225cabdff1aSopenharmony_ci 2226cabdff1aSopenharmony_ci#else 2227cabdff1aSopenharmony_ci 2228cabdff1aSopenharmony_ci#define bilinf_fn_1d(a, b, c, d) 2229cabdff1aSopenharmony_ci#define bilinf_fn_2d(a, b) 2230cabdff1aSopenharmony_ci 2231cabdff1aSopenharmony_ci#endif 2232cabdff1aSopenharmony_ci 2233cabdff1aSopenharmony_ci#define filter_fn(sz, avg) \ 2234cabdff1aSopenharmony_cifilter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \ 2235cabdff1aSopenharmony_cifilter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \ 2236cabdff1aSopenharmony_cifilter_fn_2d(sz, regular, FILTER_8TAP_REGULAR, avg) \ 2237cabdff1aSopenharmony_cifilter_fn_1d(sz, h, mx, smooth, FILTER_8TAP_SMOOTH, avg) \ 2238cabdff1aSopenharmony_cifilter_fn_1d(sz, v, my, smooth, FILTER_8TAP_SMOOTH, avg) \ 2239cabdff1aSopenharmony_cifilter_fn_2d(sz, smooth, FILTER_8TAP_SMOOTH, avg) \ 2240cabdff1aSopenharmony_cifilter_fn_1d(sz, h, mx, sharp, FILTER_8TAP_SHARP, avg) \ 2241cabdff1aSopenharmony_cifilter_fn_1d(sz, v, my, sharp, FILTER_8TAP_SHARP, avg) \ 2242cabdff1aSopenharmony_cifilter_fn_2d(sz, sharp, FILTER_8TAP_SHARP, avg) \ 2243cabdff1aSopenharmony_cibilinf_fn_1d(sz, h, mx, avg) \ 2244cabdff1aSopenharmony_cibilinf_fn_1d(sz, v, my, avg) \ 2245cabdff1aSopenharmony_cibilinf_fn_2d(sz, avg) 2246cabdff1aSopenharmony_ci 2247cabdff1aSopenharmony_ci#define filter_fn_set(avg) \ 2248cabdff1aSopenharmony_cifilter_fn(64, avg) \ 2249cabdff1aSopenharmony_cifilter_fn(32, avg) \ 2250cabdff1aSopenharmony_cifilter_fn(16, avg) \ 2251cabdff1aSopenharmony_cifilter_fn(8, avg) \ 2252cabdff1aSopenharmony_cifilter_fn(4, avg) 2253cabdff1aSopenharmony_ci 2254cabdff1aSopenharmony_cifilter_fn_set(put) 2255cabdff1aSopenharmony_cifilter_fn_set(avg) 2256cabdff1aSopenharmony_ci 2257cabdff1aSopenharmony_ci#undef filter_fn 2258cabdff1aSopenharmony_ci#undef filter_fn_set 2259cabdff1aSopenharmony_ci#undef filter_fn_1d 2260cabdff1aSopenharmony_ci#undef filter_fn_2d 2261cabdff1aSopenharmony_ci#undef bilinf_fn_1d 2262cabdff1aSopenharmony_ci#undef bilinf_fn_2d 2263cabdff1aSopenharmony_ci 2264cabdff1aSopenharmony_ci#if BIT_DEPTH != 8 2265cabdff1aSopenharmony_civoid ff_vp9dsp_mc_init_10(VP9DSPContext *dsp); 2266cabdff1aSopenharmony_ci#endif 2267cabdff1aSopenharmony_ci#if BIT_DEPTH != 10 2268cabdff1aSopenharmony_cistatic 2269cabdff1aSopenharmony_ci#endif 2270cabdff1aSopenharmony_ciav_cold void FUNC(ff_vp9dsp_mc_init)(VP9DSPContext *dsp) 2271cabdff1aSopenharmony_ci{ 2272cabdff1aSopenharmony_ci#if BIT_DEPTH == 12 2273cabdff1aSopenharmony_ci ff_vp9dsp_mc_init_10(dsp); 2274cabdff1aSopenharmony_ci#else /* BIT_DEPTH == 12 */ 2275cabdff1aSopenharmony_ci 2276cabdff1aSopenharmony_ci#define init_fpel(idx1, idx2, sz, type) \ 2277cabdff1aSopenharmony_ci dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \ 2278cabdff1aSopenharmony_ci dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \ 2279cabdff1aSopenharmony_ci dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][0][0] = type##sz##_c; \ 2280cabdff1aSopenharmony_ci dsp->mc[idx1][FILTER_BILINEAR ][idx2][0][0] = type##sz##_c 2281cabdff1aSopenharmony_ci 2282cabdff1aSopenharmony_ci#define init_copy_avg(idx, sz) \ 2283cabdff1aSopenharmony_ci init_fpel(idx, 0, sz, copy); \ 2284cabdff1aSopenharmony_ci init_fpel(idx, 1, sz, avg) 2285cabdff1aSopenharmony_ci 2286cabdff1aSopenharmony_ci init_copy_avg(0, 64); 2287cabdff1aSopenharmony_ci init_copy_avg(1, 32); 2288cabdff1aSopenharmony_ci init_copy_avg(2, 16); 2289cabdff1aSopenharmony_ci init_copy_avg(3, 8); 2290cabdff1aSopenharmony_ci init_copy_avg(4, 4); 2291cabdff1aSopenharmony_ci 2292cabdff1aSopenharmony_ci#undef init_copy_avg 2293cabdff1aSopenharmony_ci#undef init_fpel 2294cabdff1aSopenharmony_ci 2295cabdff1aSopenharmony_ci#endif /* BIT_DEPTH == 12 */ 2296cabdff1aSopenharmony_ci 2297cabdff1aSopenharmony_ci#define init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type) \ 2298cabdff1aSopenharmony_ci dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \ 2299cabdff1aSopenharmony_ci dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \ 2300cabdff1aSopenharmony_ci dsp->mc[idx1][FILTER_8TAP_SHARP ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c 2301cabdff1aSopenharmony_ci 2302cabdff1aSopenharmony_ci#if BIT_DEPTH == 12 2303cabdff1aSopenharmony_ci#define init_subpel1 init_subpel1_bd_aware 2304cabdff1aSopenharmony_ci#else 2305cabdff1aSopenharmony_ci#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \ 2306cabdff1aSopenharmony_ci init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type); \ 2307cabdff1aSopenharmony_ci dsp->mc[idx1][FILTER_BILINEAR ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c 2308cabdff1aSopenharmony_ci#endif 2309cabdff1aSopenharmony_ci 2310cabdff1aSopenharmony_ci#define init_subpel2(idx, idxh, idxv, dir, type) \ 2311cabdff1aSopenharmony_ci init_subpel1(0, idx, idxh, idxv, 64, dir, type); \ 2312cabdff1aSopenharmony_ci init_subpel1(1, idx, idxh, idxv, 32, dir, type); \ 2313cabdff1aSopenharmony_ci init_subpel1(2, idx, idxh, idxv, 16, dir, type); \ 2314cabdff1aSopenharmony_ci init_subpel1(3, idx, idxh, idxv, 8, dir, type); \ 2315cabdff1aSopenharmony_ci init_subpel1(4, idx, idxh, idxv, 4, dir, type) 2316cabdff1aSopenharmony_ci 2317cabdff1aSopenharmony_ci#define init_subpel3(idx, type) \ 2318cabdff1aSopenharmony_ci init_subpel2(idx, 1, 1, hv, type); \ 2319cabdff1aSopenharmony_ci init_subpel2(idx, 0, 1, v, type); \ 2320cabdff1aSopenharmony_ci init_subpel2(idx, 1, 0, h, type) 2321cabdff1aSopenharmony_ci 2322cabdff1aSopenharmony_ci init_subpel3(0, put); 2323cabdff1aSopenharmony_ci init_subpel3(1, avg); 2324cabdff1aSopenharmony_ci 2325cabdff1aSopenharmony_ci#undef init_subpel1 2326cabdff1aSopenharmony_ci#undef init_subpel2 2327cabdff1aSopenharmony_ci#undef init_subpel3 2328cabdff1aSopenharmony_ci#undef init_subpel1_bd_aware 2329cabdff1aSopenharmony_ci} 2330cabdff1aSopenharmony_ci 2331cabdff1aSopenharmony_cistatic av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride, 2332cabdff1aSopenharmony_ci const uint8_t *_src, ptrdiff_t src_stride, 2333cabdff1aSopenharmony_ci int w, int h, int mx, int my, 2334cabdff1aSopenharmony_ci int dx, int dy, int avg, 2335cabdff1aSopenharmony_ci const int16_t (*filters)[8]) 2336cabdff1aSopenharmony_ci{ 2337cabdff1aSopenharmony_ci int tmp_h = (((h - 1) * dy + my) >> 4) + 8; 2338cabdff1aSopenharmony_ci pixel tmp[64 * 135], *tmp_ptr = tmp; 2339cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 2340cabdff1aSopenharmony_ci const pixel *src = (const pixel *) _src; 2341cabdff1aSopenharmony_ci 2342cabdff1aSopenharmony_ci dst_stride /= sizeof(pixel); 2343cabdff1aSopenharmony_ci src_stride /= sizeof(pixel); 2344cabdff1aSopenharmony_ci src -= src_stride * 3; 2345cabdff1aSopenharmony_ci do { 2346cabdff1aSopenharmony_ci int x; 2347cabdff1aSopenharmony_ci int imx = mx, ioff = 0; 2348cabdff1aSopenharmony_ci 2349cabdff1aSopenharmony_ci for (x = 0; x < w; x++) { 2350cabdff1aSopenharmony_ci tmp_ptr[x] = FILTER_8TAP(src, ioff, filters[imx], 1); 2351cabdff1aSopenharmony_ci imx += dx; 2352cabdff1aSopenharmony_ci ioff += imx >> 4; 2353cabdff1aSopenharmony_ci imx &= 0xf; 2354cabdff1aSopenharmony_ci } 2355cabdff1aSopenharmony_ci 2356cabdff1aSopenharmony_ci tmp_ptr += 64; 2357cabdff1aSopenharmony_ci src += src_stride; 2358cabdff1aSopenharmony_ci } while (--tmp_h); 2359cabdff1aSopenharmony_ci 2360cabdff1aSopenharmony_ci tmp_ptr = tmp + 64 * 3; 2361cabdff1aSopenharmony_ci do { 2362cabdff1aSopenharmony_ci int x; 2363cabdff1aSopenharmony_ci const int16_t *filter = filters[my]; 2364cabdff1aSopenharmony_ci 2365cabdff1aSopenharmony_ci for (x = 0; x < w; x++) 2366cabdff1aSopenharmony_ci if (avg) { 2367cabdff1aSopenharmony_ci dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filter, 64) + 1) >> 1; 2368cabdff1aSopenharmony_ci } else { 2369cabdff1aSopenharmony_ci dst[x] = FILTER_8TAP(tmp_ptr, x, filter, 64); 2370cabdff1aSopenharmony_ci } 2371cabdff1aSopenharmony_ci 2372cabdff1aSopenharmony_ci my += dy; 2373cabdff1aSopenharmony_ci tmp_ptr += (my >> 4) * 64; 2374cabdff1aSopenharmony_ci my &= 0xf; 2375cabdff1aSopenharmony_ci dst += dst_stride; 2376cabdff1aSopenharmony_ci } while (--h); 2377cabdff1aSopenharmony_ci} 2378cabdff1aSopenharmony_ci 2379cabdff1aSopenharmony_ci#define scaled_filter_8tap_fn(opn, opa) \ 2380cabdff1aSopenharmony_cistatic av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2381cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2382cabdff1aSopenharmony_ci int w, int h, int mx, int my, int dx, int dy, \ 2383cabdff1aSopenharmony_ci const int16_t (*filters)[8]) \ 2384cabdff1aSopenharmony_ci{ \ 2385cabdff1aSopenharmony_ci do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \ 2386cabdff1aSopenharmony_ci opa, filters); \ 2387cabdff1aSopenharmony_ci} 2388cabdff1aSopenharmony_ci 2389cabdff1aSopenharmony_ciscaled_filter_8tap_fn(put, 0) 2390cabdff1aSopenharmony_ciscaled_filter_8tap_fn(avg, 1) 2391cabdff1aSopenharmony_ci 2392cabdff1aSopenharmony_ci#undef scaled_filter_8tap_fn 2393cabdff1aSopenharmony_ci 2394cabdff1aSopenharmony_ci#undef FILTER_8TAP 2395cabdff1aSopenharmony_ci 2396cabdff1aSopenharmony_ci#define scaled_filter_fn(sz, type, type_idx, avg) \ 2397cabdff1aSopenharmony_cistatic void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2398cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2399cabdff1aSopenharmony_ci int h, int mx, int my, int dx, int dy) \ 2400cabdff1aSopenharmony_ci{ \ 2401cabdff1aSopenharmony_ci avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \ 2402cabdff1aSopenharmony_ci ff_vp9_subpel_filters[type_idx]); \ 2403cabdff1aSopenharmony_ci} 2404cabdff1aSopenharmony_ci 2405cabdff1aSopenharmony_ci#if BIT_DEPTH != 12 2406cabdff1aSopenharmony_ci 2407cabdff1aSopenharmony_cistatic av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride, 2408cabdff1aSopenharmony_ci const uint8_t *_src, ptrdiff_t src_stride, 2409cabdff1aSopenharmony_ci int w, int h, int mx, int my, 2410cabdff1aSopenharmony_ci int dx, int dy, int avg) 2411cabdff1aSopenharmony_ci{ 2412cabdff1aSopenharmony_ci pixel tmp[64 * 129], *tmp_ptr = tmp; 2413cabdff1aSopenharmony_ci int tmp_h = (((h - 1) * dy + my) >> 4) + 2; 2414cabdff1aSopenharmony_ci pixel *dst = (pixel *) _dst; 2415cabdff1aSopenharmony_ci const pixel *src = (const pixel *) _src; 2416cabdff1aSopenharmony_ci 2417cabdff1aSopenharmony_ci dst_stride /= sizeof(pixel); 2418cabdff1aSopenharmony_ci src_stride /= sizeof(pixel); 2419cabdff1aSopenharmony_ci do { 2420cabdff1aSopenharmony_ci int x; 2421cabdff1aSopenharmony_ci int imx = mx, ioff = 0; 2422cabdff1aSopenharmony_ci 2423cabdff1aSopenharmony_ci for (x = 0; x < w; x++) { 2424cabdff1aSopenharmony_ci tmp_ptr[x] = FILTER_BILIN(src, ioff, imx, 1); 2425cabdff1aSopenharmony_ci imx += dx; 2426cabdff1aSopenharmony_ci ioff += imx >> 4; 2427cabdff1aSopenharmony_ci imx &= 0xf; 2428cabdff1aSopenharmony_ci } 2429cabdff1aSopenharmony_ci 2430cabdff1aSopenharmony_ci tmp_ptr += 64; 2431cabdff1aSopenharmony_ci src += src_stride; 2432cabdff1aSopenharmony_ci } while (--tmp_h); 2433cabdff1aSopenharmony_ci 2434cabdff1aSopenharmony_ci tmp_ptr = tmp; 2435cabdff1aSopenharmony_ci do { 2436cabdff1aSopenharmony_ci int x; 2437cabdff1aSopenharmony_ci 2438cabdff1aSopenharmony_ci for (x = 0; x < w; x++) 2439cabdff1aSopenharmony_ci if (avg) { 2440cabdff1aSopenharmony_ci dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1; 2441cabdff1aSopenharmony_ci } else { 2442cabdff1aSopenharmony_ci dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64); 2443cabdff1aSopenharmony_ci } 2444cabdff1aSopenharmony_ci 2445cabdff1aSopenharmony_ci my += dy; 2446cabdff1aSopenharmony_ci tmp_ptr += (my >> 4) * 64; 2447cabdff1aSopenharmony_ci my &= 0xf; 2448cabdff1aSopenharmony_ci dst += dst_stride; 2449cabdff1aSopenharmony_ci } while (--h); 2450cabdff1aSopenharmony_ci} 2451cabdff1aSopenharmony_ci 2452cabdff1aSopenharmony_ci#define scaled_bilin_fn(opn, opa) \ 2453cabdff1aSopenharmony_cistatic av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2454cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2455cabdff1aSopenharmony_ci int w, int h, int mx, int my, int dx, int dy) \ 2456cabdff1aSopenharmony_ci{ \ 2457cabdff1aSopenharmony_ci do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \ 2458cabdff1aSopenharmony_ci} 2459cabdff1aSopenharmony_ci 2460cabdff1aSopenharmony_ciscaled_bilin_fn(put, 0) 2461cabdff1aSopenharmony_ciscaled_bilin_fn(avg, 1) 2462cabdff1aSopenharmony_ci 2463cabdff1aSopenharmony_ci#undef scaled_bilin_fn 2464cabdff1aSopenharmony_ci 2465cabdff1aSopenharmony_ci#undef FILTER_BILIN 2466cabdff1aSopenharmony_ci 2467cabdff1aSopenharmony_ci#define scaled_bilinf_fn(sz, avg) \ 2468cabdff1aSopenharmony_cistatic void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \ 2469cabdff1aSopenharmony_ci const uint8_t *src, ptrdiff_t src_stride, \ 2470cabdff1aSopenharmony_ci int h, int mx, int my, int dx, int dy) \ 2471cabdff1aSopenharmony_ci{ \ 2472cabdff1aSopenharmony_ci avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \ 2473cabdff1aSopenharmony_ci} 2474cabdff1aSopenharmony_ci 2475cabdff1aSopenharmony_ci#else 2476cabdff1aSopenharmony_ci 2477cabdff1aSopenharmony_ci#define scaled_bilinf_fn(a, b) 2478cabdff1aSopenharmony_ci 2479cabdff1aSopenharmony_ci#endif 2480cabdff1aSopenharmony_ci 2481cabdff1aSopenharmony_ci#define scaled_filter_fns(sz, avg) \ 2482cabdff1aSopenharmony_ciscaled_filter_fn(sz, regular, FILTER_8TAP_REGULAR, avg) \ 2483cabdff1aSopenharmony_ciscaled_filter_fn(sz, smooth, FILTER_8TAP_SMOOTH, avg) \ 2484cabdff1aSopenharmony_ciscaled_filter_fn(sz, sharp, FILTER_8TAP_SHARP, avg) \ 2485cabdff1aSopenharmony_ciscaled_bilinf_fn(sz, avg) 2486cabdff1aSopenharmony_ci 2487cabdff1aSopenharmony_ci#define scaled_filter_fn_set(avg) \ 2488cabdff1aSopenharmony_ciscaled_filter_fns(64, avg) \ 2489cabdff1aSopenharmony_ciscaled_filter_fns(32, avg) \ 2490cabdff1aSopenharmony_ciscaled_filter_fns(16, avg) \ 2491cabdff1aSopenharmony_ciscaled_filter_fns(8, avg) \ 2492cabdff1aSopenharmony_ciscaled_filter_fns(4, avg) 2493cabdff1aSopenharmony_ci 2494cabdff1aSopenharmony_ciscaled_filter_fn_set(put) 2495cabdff1aSopenharmony_ciscaled_filter_fn_set(avg) 2496cabdff1aSopenharmony_ci 2497cabdff1aSopenharmony_ci#undef scaled_filter_fns 2498cabdff1aSopenharmony_ci#undef scaled_filter_fn_set 2499cabdff1aSopenharmony_ci#undef scaled_filter_fn 2500cabdff1aSopenharmony_ci#undef scaled_bilinf_fn 2501cabdff1aSopenharmony_ci 2502cabdff1aSopenharmony_ci#if BIT_DEPTH != 8 2503cabdff1aSopenharmony_civoid ff_vp9dsp_scaled_mc_init_10(VP9DSPContext *dsp); 2504cabdff1aSopenharmony_ci#endif 2505cabdff1aSopenharmony_ci#if BIT_DEPTH != 10 2506cabdff1aSopenharmony_cistatic 2507cabdff1aSopenharmony_ci#endif 2508cabdff1aSopenharmony_ciav_cold void FUNC(ff_vp9dsp_scaled_mc_init)(VP9DSPContext *dsp) 2509cabdff1aSopenharmony_ci{ 2510cabdff1aSopenharmony_ci#define init_scaled_bd_aware(idx1, idx2, sz, type) \ 2511cabdff1aSopenharmony_ci dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \ 2512cabdff1aSopenharmony_ci dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \ 2513cabdff1aSopenharmony_ci dsp->smc[idx1][FILTER_8TAP_SHARP ][idx2] = type##_scaled_sharp_##sz##_c 2514cabdff1aSopenharmony_ci 2515cabdff1aSopenharmony_ci#if BIT_DEPTH == 12 2516cabdff1aSopenharmony_ci ff_vp9dsp_scaled_mc_init_10(dsp); 2517cabdff1aSopenharmony_ci#define init_scaled(a,b,c,d) init_scaled_bd_aware(a,b,c,d) 2518cabdff1aSopenharmony_ci#else 2519cabdff1aSopenharmony_ci#define init_scaled(idx1, idx2, sz, type) \ 2520cabdff1aSopenharmony_ci init_scaled_bd_aware(idx1, idx2, sz, type); \ 2521cabdff1aSopenharmony_ci dsp->smc[idx1][FILTER_BILINEAR ][idx2] = type##_scaled_bilin_##sz##_c 2522cabdff1aSopenharmony_ci#endif 2523cabdff1aSopenharmony_ci 2524cabdff1aSopenharmony_ci#define init_scaled_put_avg(idx, sz) \ 2525cabdff1aSopenharmony_ci init_scaled(idx, 0, sz, put); \ 2526cabdff1aSopenharmony_ci init_scaled(idx, 1, sz, avg) 2527cabdff1aSopenharmony_ci 2528cabdff1aSopenharmony_ci init_scaled_put_avg(0, 64); 2529cabdff1aSopenharmony_ci init_scaled_put_avg(1, 32); 2530cabdff1aSopenharmony_ci init_scaled_put_avg(2, 16); 2531cabdff1aSopenharmony_ci init_scaled_put_avg(3, 8); 2532cabdff1aSopenharmony_ci init_scaled_put_avg(4, 4); 2533cabdff1aSopenharmony_ci 2534cabdff1aSopenharmony_ci#undef init_scaled_put_avg 2535cabdff1aSopenharmony_ci#undef init_scaled 2536cabdff1aSopenharmony_ci#undef init_scaled_bd_aware 2537cabdff1aSopenharmony_ci} 2538cabdff1aSopenharmony_ci 2539cabdff1aSopenharmony_ciav_cold void FUNC(ff_vp9dsp_init)(VP9DSPContext *dsp) 2540cabdff1aSopenharmony_ci{ 2541cabdff1aSopenharmony_ci FUNC(ff_vp9dsp_intrapred_init)(dsp); 2542cabdff1aSopenharmony_ci vp9dsp_itxfm_init(dsp); 2543cabdff1aSopenharmony_ci vp9dsp_loopfilter_init(dsp); 2544cabdff1aSopenharmony_ci FUNC(ff_vp9dsp_mc_init)(dsp); 2545cabdff1aSopenharmony_ci FUNC(ff_vp9dsp_scaled_mc_init)(dsp); 2546cabdff1aSopenharmony_ci} 2547