1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * Copyright (C) 2004 The FFmpeg project 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * This file is part of FFmpeg. 5cabdff1aSopenharmony_ci * 6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 10cabdff1aSopenharmony_ci * 11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14cabdff1aSopenharmony_ci * Lesser General Public License for more details. 15cabdff1aSopenharmony_ci * 16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19cabdff1aSopenharmony_ci */ 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci/** 22cabdff1aSopenharmony_ci * @file 23cabdff1aSopenharmony_ci * Standard C DSP-oriented functions cribbed from the original VP3 24cabdff1aSopenharmony_ci * source code. 25cabdff1aSopenharmony_ci */ 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_ci#include <string.h> 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ci#include "config.h" 30cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 31cabdff1aSopenharmony_ci#include "libavutil/common.h" 32cabdff1aSopenharmony_ci#include "libavutil/internal.h" 33cabdff1aSopenharmony_ci#include "libavutil/intreadwrite.h" 34cabdff1aSopenharmony_ci#include "libavutil/avassert.h" 35cabdff1aSopenharmony_ci 36cabdff1aSopenharmony_ci#include "rnd_avg.h" 37cabdff1aSopenharmony_ci#include "vp3dsp.h" 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_ci#define IdctAdjustBeforeShift 8 40cabdff1aSopenharmony_ci#define xC1S7 64277 41cabdff1aSopenharmony_ci#define xC2S6 60547 42cabdff1aSopenharmony_ci#define xC3S5 54491 43cabdff1aSopenharmony_ci#define xC4S4 46341 44cabdff1aSopenharmony_ci#define xC5S3 36410 45cabdff1aSopenharmony_ci#define xC6S2 25080 46cabdff1aSopenharmony_ci#define xC7S1 12785 47cabdff1aSopenharmony_ci 48cabdff1aSopenharmony_ci#define M(a, b) ((int)((SUINT)(a) * (b)) >> 16) 49cabdff1aSopenharmony_ci 50cabdff1aSopenharmony_cistatic av_always_inline void idct(uint8_t *dst, ptrdiff_t stride, 51cabdff1aSopenharmony_ci int16_t *input, int type) 52cabdff1aSopenharmony_ci{ 53cabdff1aSopenharmony_ci int16_t *ip = input; 54cabdff1aSopenharmony_ci 55cabdff1aSopenharmony_ci int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; 56cabdff1aSopenharmony_ci int Ed, Gd, Add, Bdd, Fd, Hd; 57cabdff1aSopenharmony_ci 58cabdff1aSopenharmony_ci int i; 59cabdff1aSopenharmony_ci 60cabdff1aSopenharmony_ci /* Inverse DCT on the rows now */ 61cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 62cabdff1aSopenharmony_ci /* Check for non-zero values */ 63cabdff1aSopenharmony_ci if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | 64cabdff1aSopenharmony_ci ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) { 65cabdff1aSopenharmony_ci A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]); 66cabdff1aSopenharmony_ci B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]); 67cabdff1aSopenharmony_ci C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]); 68cabdff1aSopenharmony_ci D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]); 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_ci Ad = M(xC4S4, (A - C)); 71cabdff1aSopenharmony_ci Bd = M(xC4S4, (B - D)); 72cabdff1aSopenharmony_ci 73cabdff1aSopenharmony_ci Cd = A + C; 74cabdff1aSopenharmony_ci Dd = B + D; 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ci E = M(xC4S4, (ip[0 * 8] + ip[4 * 8])); 77cabdff1aSopenharmony_ci F = M(xC4S4, (ip[0 * 8] - ip[4 * 8])); 78cabdff1aSopenharmony_ci 79cabdff1aSopenharmony_ci G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]); 80cabdff1aSopenharmony_ci H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]); 81cabdff1aSopenharmony_ci 82cabdff1aSopenharmony_ci Ed = E - G; 83cabdff1aSopenharmony_ci Gd = E + G; 84cabdff1aSopenharmony_ci 85cabdff1aSopenharmony_ci Add = F + Ad; 86cabdff1aSopenharmony_ci Bdd = Bd - H; 87cabdff1aSopenharmony_ci 88cabdff1aSopenharmony_ci Fd = F - Ad; 89cabdff1aSopenharmony_ci Hd = Bd + H; 90cabdff1aSopenharmony_ci 91cabdff1aSopenharmony_ci /* Final sequence of operations over-write original inputs. */ 92cabdff1aSopenharmony_ci ip[0 * 8] = Gd + Cd; 93cabdff1aSopenharmony_ci ip[7 * 8] = Gd - Cd; 94cabdff1aSopenharmony_ci 95cabdff1aSopenharmony_ci ip[1 * 8] = Add + Hd; 96cabdff1aSopenharmony_ci ip[2 * 8] = Add - Hd; 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_ci ip[3 * 8] = Ed + Dd; 99cabdff1aSopenharmony_ci ip[4 * 8] = Ed - Dd; 100cabdff1aSopenharmony_ci 101cabdff1aSopenharmony_ci ip[5 * 8] = Fd + Bdd; 102cabdff1aSopenharmony_ci ip[6 * 8] = Fd - Bdd; 103cabdff1aSopenharmony_ci } 104cabdff1aSopenharmony_ci 105cabdff1aSopenharmony_ci ip += 1; /* next row */ 106cabdff1aSopenharmony_ci } 107cabdff1aSopenharmony_ci 108cabdff1aSopenharmony_ci ip = input; 109cabdff1aSopenharmony_ci 110cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 111cabdff1aSopenharmony_ci /* Check for non-zero values (bitwise or faster than ||) */ 112cabdff1aSopenharmony_ci if (ip[1] | ip[2] | ip[3] | 113cabdff1aSopenharmony_ci ip[4] | ip[5] | ip[6] | ip[7]) { 114cabdff1aSopenharmony_ci A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]); 115cabdff1aSopenharmony_ci B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]); 116cabdff1aSopenharmony_ci C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]); 117cabdff1aSopenharmony_ci D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]); 118cabdff1aSopenharmony_ci 119cabdff1aSopenharmony_ci Ad = M(xC4S4, (A - C)); 120cabdff1aSopenharmony_ci Bd = M(xC4S4, (B - D)); 121cabdff1aSopenharmony_ci 122cabdff1aSopenharmony_ci Cd = A + C; 123cabdff1aSopenharmony_ci Dd = B + D; 124cabdff1aSopenharmony_ci 125cabdff1aSopenharmony_ci E = M(xC4S4, (ip[0] + ip[4])) + 8; 126cabdff1aSopenharmony_ci F = M(xC4S4, (ip[0] - ip[4])) + 8; 127cabdff1aSopenharmony_ci 128cabdff1aSopenharmony_ci if (type == 1) { // HACK 129cabdff1aSopenharmony_ci E += 16 * 128; 130cabdff1aSopenharmony_ci F += 16 * 128; 131cabdff1aSopenharmony_ci } 132cabdff1aSopenharmony_ci 133cabdff1aSopenharmony_ci G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]); 134cabdff1aSopenharmony_ci H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]); 135cabdff1aSopenharmony_ci 136cabdff1aSopenharmony_ci Ed = E - G; 137cabdff1aSopenharmony_ci Gd = E + G; 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_ci Add = F + Ad; 140cabdff1aSopenharmony_ci Bdd = Bd - H; 141cabdff1aSopenharmony_ci 142cabdff1aSopenharmony_ci Fd = F - Ad; 143cabdff1aSopenharmony_ci Hd = Bd + H; 144cabdff1aSopenharmony_ci 145cabdff1aSopenharmony_ci /* Final sequence of operations over-write original inputs. */ 146cabdff1aSopenharmony_ci if (type == 1) { 147cabdff1aSopenharmony_ci dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4); 148cabdff1aSopenharmony_ci dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4); 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ci dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4); 151cabdff1aSopenharmony_ci dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4); 152cabdff1aSopenharmony_ci 153cabdff1aSopenharmony_ci dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4); 154cabdff1aSopenharmony_ci dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4); 155cabdff1aSopenharmony_ci 156cabdff1aSopenharmony_ci dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4); 157cabdff1aSopenharmony_ci dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4); 158cabdff1aSopenharmony_ci } else { 159cabdff1aSopenharmony_ci dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4)); 160cabdff1aSopenharmony_ci dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4)); 161cabdff1aSopenharmony_ci 162cabdff1aSopenharmony_ci dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4)); 163cabdff1aSopenharmony_ci dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4)); 164cabdff1aSopenharmony_ci 165cabdff1aSopenharmony_ci dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4)); 166cabdff1aSopenharmony_ci dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4)); 167cabdff1aSopenharmony_ci 168cabdff1aSopenharmony_ci dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4)); 169cabdff1aSopenharmony_ci dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4)); 170cabdff1aSopenharmony_ci } 171cabdff1aSopenharmony_ci } else { 172cabdff1aSopenharmony_ci if (type == 1) { 173cabdff1aSopenharmony_ci dst[0*stride] = 174cabdff1aSopenharmony_ci dst[1*stride] = 175cabdff1aSopenharmony_ci dst[2*stride] = 176cabdff1aSopenharmony_ci dst[3*stride] = 177cabdff1aSopenharmony_ci dst[4*stride] = 178cabdff1aSopenharmony_ci dst[5*stride] = 179cabdff1aSopenharmony_ci dst[6*stride] = 180cabdff1aSopenharmony_ci dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20)); 181cabdff1aSopenharmony_ci } else { 182cabdff1aSopenharmony_ci if (ip[0]) { 183cabdff1aSopenharmony_ci int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20; 184cabdff1aSopenharmony_ci dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v); 185cabdff1aSopenharmony_ci dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v); 186cabdff1aSopenharmony_ci dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v); 187cabdff1aSopenharmony_ci dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v); 188cabdff1aSopenharmony_ci dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v); 189cabdff1aSopenharmony_ci dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v); 190cabdff1aSopenharmony_ci dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v); 191cabdff1aSopenharmony_ci dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v); 192cabdff1aSopenharmony_ci } 193cabdff1aSopenharmony_ci } 194cabdff1aSopenharmony_ci } 195cabdff1aSopenharmony_ci 196cabdff1aSopenharmony_ci ip += 8; /* next column */ 197cabdff1aSopenharmony_ci dst++; 198cabdff1aSopenharmony_ci } 199cabdff1aSopenharmony_ci} 200cabdff1aSopenharmony_ci 201cabdff1aSopenharmony_cistatic av_always_inline void idct10(uint8_t *dst, ptrdiff_t stride, 202cabdff1aSopenharmony_ci int16_t *input, int type) 203cabdff1aSopenharmony_ci{ 204cabdff1aSopenharmony_ci int16_t *ip = input; 205cabdff1aSopenharmony_ci 206cabdff1aSopenharmony_ci int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; 207cabdff1aSopenharmony_ci int Ed, Gd, Add, Bdd, Fd, Hd; 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_ci int i; 210cabdff1aSopenharmony_ci 211cabdff1aSopenharmony_ci /* Inverse DCT on the rows now */ 212cabdff1aSopenharmony_ci for (i = 0; i < 4; i++) { 213cabdff1aSopenharmony_ci /* Check for non-zero values */ 214cabdff1aSopenharmony_ci if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8]) { 215cabdff1aSopenharmony_ci A = M(xC1S7, ip[1 * 8]); 216cabdff1aSopenharmony_ci B = M(xC7S1, ip[1 * 8]); 217cabdff1aSopenharmony_ci C = M(xC3S5, ip[3 * 8]); 218cabdff1aSopenharmony_ci D = -M(xC5S3, ip[3 * 8]); 219cabdff1aSopenharmony_ci 220cabdff1aSopenharmony_ci Ad = M(xC4S4, (A - C)); 221cabdff1aSopenharmony_ci Bd = M(xC4S4, (B - D)); 222cabdff1aSopenharmony_ci 223cabdff1aSopenharmony_ci Cd = A + C; 224cabdff1aSopenharmony_ci Dd = B + D; 225cabdff1aSopenharmony_ci 226cabdff1aSopenharmony_ci E = M(xC4S4, ip[0 * 8]); 227cabdff1aSopenharmony_ci F = E; 228cabdff1aSopenharmony_ci 229cabdff1aSopenharmony_ci G = M(xC2S6, ip[2 * 8]); 230cabdff1aSopenharmony_ci H = M(xC6S2, ip[2 * 8]); 231cabdff1aSopenharmony_ci 232cabdff1aSopenharmony_ci Ed = E - G; 233cabdff1aSopenharmony_ci Gd = E + G; 234cabdff1aSopenharmony_ci 235cabdff1aSopenharmony_ci Add = F + Ad; 236cabdff1aSopenharmony_ci Bdd = Bd - H; 237cabdff1aSopenharmony_ci 238cabdff1aSopenharmony_ci Fd = F - Ad; 239cabdff1aSopenharmony_ci Hd = Bd + H; 240cabdff1aSopenharmony_ci 241cabdff1aSopenharmony_ci /* Final sequence of operations over-write original inputs */ 242cabdff1aSopenharmony_ci ip[0 * 8] = Gd + Cd; 243cabdff1aSopenharmony_ci ip[7 * 8] = Gd - Cd; 244cabdff1aSopenharmony_ci 245cabdff1aSopenharmony_ci ip[1 * 8] = Add + Hd; 246cabdff1aSopenharmony_ci ip[2 * 8] = Add - Hd; 247cabdff1aSopenharmony_ci 248cabdff1aSopenharmony_ci ip[3 * 8] = Ed + Dd; 249cabdff1aSopenharmony_ci ip[4 * 8] = Ed - Dd; 250cabdff1aSopenharmony_ci 251cabdff1aSopenharmony_ci ip[5 * 8] = Fd + Bdd; 252cabdff1aSopenharmony_ci ip[6 * 8] = Fd - Bdd; 253cabdff1aSopenharmony_ci 254cabdff1aSopenharmony_ci } 255cabdff1aSopenharmony_ci 256cabdff1aSopenharmony_ci ip += 1; 257cabdff1aSopenharmony_ci } 258cabdff1aSopenharmony_ci 259cabdff1aSopenharmony_ci ip = input; 260cabdff1aSopenharmony_ci 261cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 262cabdff1aSopenharmony_ci /* Check for non-zero values (bitwise or faster than ||) */ 263cabdff1aSopenharmony_ci if (ip[0] | ip[1] | ip[2] | ip[3]) { 264cabdff1aSopenharmony_ci A = M(xC1S7, ip[1]); 265cabdff1aSopenharmony_ci B = M(xC7S1, ip[1]); 266cabdff1aSopenharmony_ci C = M(xC3S5, ip[3]); 267cabdff1aSopenharmony_ci D = -M(xC5S3, ip[3]); 268cabdff1aSopenharmony_ci 269cabdff1aSopenharmony_ci Ad = M(xC4S4, (A - C)); 270cabdff1aSopenharmony_ci Bd = M(xC4S4, (B - D)); 271cabdff1aSopenharmony_ci 272cabdff1aSopenharmony_ci Cd = A + C; 273cabdff1aSopenharmony_ci Dd = B + D; 274cabdff1aSopenharmony_ci 275cabdff1aSopenharmony_ci E = M(xC4S4, ip[0]); 276cabdff1aSopenharmony_ci if (type == 1) 277cabdff1aSopenharmony_ci E += 16 * 128; 278cabdff1aSopenharmony_ci F = E; 279cabdff1aSopenharmony_ci 280cabdff1aSopenharmony_ci G = M(xC2S6, ip[2]); 281cabdff1aSopenharmony_ci H = M(xC6S2, ip[2]); 282cabdff1aSopenharmony_ci 283cabdff1aSopenharmony_ci Ed = E - G; 284cabdff1aSopenharmony_ci Gd = E + G; 285cabdff1aSopenharmony_ci 286cabdff1aSopenharmony_ci Add = F + Ad; 287cabdff1aSopenharmony_ci Bdd = Bd - H; 288cabdff1aSopenharmony_ci 289cabdff1aSopenharmony_ci Fd = F - Ad; 290cabdff1aSopenharmony_ci Hd = Bd + H; 291cabdff1aSopenharmony_ci 292cabdff1aSopenharmony_ci Gd += 8; 293cabdff1aSopenharmony_ci Add += 8; 294cabdff1aSopenharmony_ci Ed += 8; 295cabdff1aSopenharmony_ci Fd += 8; 296cabdff1aSopenharmony_ci 297cabdff1aSopenharmony_ci /* Final sequence of operations over-write original inputs. */ 298cabdff1aSopenharmony_ci if (type == 1) { 299cabdff1aSopenharmony_ci dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4); 300cabdff1aSopenharmony_ci dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4); 301cabdff1aSopenharmony_ci 302cabdff1aSopenharmony_ci dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4); 303cabdff1aSopenharmony_ci dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4); 304cabdff1aSopenharmony_ci 305cabdff1aSopenharmony_ci dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4); 306cabdff1aSopenharmony_ci dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4); 307cabdff1aSopenharmony_ci 308cabdff1aSopenharmony_ci dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4); 309cabdff1aSopenharmony_ci dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4); 310cabdff1aSopenharmony_ci } else { 311cabdff1aSopenharmony_ci dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4)); 312cabdff1aSopenharmony_ci dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4)); 313cabdff1aSopenharmony_ci 314cabdff1aSopenharmony_ci dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4)); 315cabdff1aSopenharmony_ci dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4)); 316cabdff1aSopenharmony_ci 317cabdff1aSopenharmony_ci dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4)); 318cabdff1aSopenharmony_ci dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4)); 319cabdff1aSopenharmony_ci 320cabdff1aSopenharmony_ci dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4)); 321cabdff1aSopenharmony_ci dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4)); 322cabdff1aSopenharmony_ci } 323cabdff1aSopenharmony_ci } else { 324cabdff1aSopenharmony_ci if (type == 1) { 325cabdff1aSopenharmony_ci dst[0*stride] = 326cabdff1aSopenharmony_ci dst[1*stride] = 327cabdff1aSopenharmony_ci dst[2*stride] = 328cabdff1aSopenharmony_ci dst[3*stride] = 329cabdff1aSopenharmony_ci dst[4*stride] = 330cabdff1aSopenharmony_ci dst[5*stride] = 331cabdff1aSopenharmony_ci dst[6*stride] = 332cabdff1aSopenharmony_ci dst[7*stride] = 128; 333cabdff1aSopenharmony_ci } 334cabdff1aSopenharmony_ci } 335cabdff1aSopenharmony_ci 336cabdff1aSopenharmony_ci ip += 8; 337cabdff1aSopenharmony_ci dst++; 338cabdff1aSopenharmony_ci } 339cabdff1aSopenharmony_ci} 340cabdff1aSopenharmony_ci 341cabdff1aSopenharmony_civoid ff_vp3dsp_idct10_put(uint8_t *dest, ptrdiff_t stride, int16_t *block) 342cabdff1aSopenharmony_ci{ 343cabdff1aSopenharmony_ci idct10(dest, stride, block, 1); 344cabdff1aSopenharmony_ci memset(block, 0, sizeof(*block) * 64); 345cabdff1aSopenharmony_ci} 346cabdff1aSopenharmony_ci 347cabdff1aSopenharmony_civoid ff_vp3dsp_idct10_add(uint8_t *dest, ptrdiff_t stride, int16_t *block) 348cabdff1aSopenharmony_ci{ 349cabdff1aSopenharmony_ci idct10(dest, stride, block, 2); 350cabdff1aSopenharmony_ci memset(block, 0, sizeof(*block) * 64); 351cabdff1aSopenharmony_ci} 352cabdff1aSopenharmony_ci 353cabdff1aSopenharmony_cistatic void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, 354cabdff1aSopenharmony_ci int16_t *block /* align 16 */) 355cabdff1aSopenharmony_ci{ 356cabdff1aSopenharmony_ci idct(dest, stride, block, 1); 357cabdff1aSopenharmony_ci memset(block, 0, sizeof(*block) * 64); 358cabdff1aSopenharmony_ci} 359cabdff1aSopenharmony_ci 360cabdff1aSopenharmony_cistatic void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, 361cabdff1aSopenharmony_ci int16_t *block /* align 16 */) 362cabdff1aSopenharmony_ci{ 363cabdff1aSopenharmony_ci idct(dest, stride, block, 2); 364cabdff1aSopenharmony_ci memset(block, 0, sizeof(*block) * 64); 365cabdff1aSopenharmony_ci} 366cabdff1aSopenharmony_ci 367cabdff1aSopenharmony_cistatic void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride, 368cabdff1aSopenharmony_ci int16_t *block /* align 16 */) 369cabdff1aSopenharmony_ci{ 370cabdff1aSopenharmony_ci int i, dc = (block[0] + 15) >> 5; 371cabdff1aSopenharmony_ci 372cabdff1aSopenharmony_ci for (i = 0; i < 8; i++) { 373cabdff1aSopenharmony_ci dest[0] = av_clip_uint8(dest[0] + dc); 374cabdff1aSopenharmony_ci dest[1] = av_clip_uint8(dest[1] + dc); 375cabdff1aSopenharmony_ci dest[2] = av_clip_uint8(dest[2] + dc); 376cabdff1aSopenharmony_ci dest[3] = av_clip_uint8(dest[3] + dc); 377cabdff1aSopenharmony_ci dest[4] = av_clip_uint8(dest[4] + dc); 378cabdff1aSopenharmony_ci dest[5] = av_clip_uint8(dest[5] + dc); 379cabdff1aSopenharmony_ci dest[6] = av_clip_uint8(dest[6] + dc); 380cabdff1aSopenharmony_ci dest[7] = av_clip_uint8(dest[7] + dc); 381cabdff1aSopenharmony_ci dest += stride; 382cabdff1aSopenharmony_ci } 383cabdff1aSopenharmony_ci block[0] = 0; 384cabdff1aSopenharmony_ci} 385cabdff1aSopenharmony_ci 386cabdff1aSopenharmony_cistatic av_always_inline void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, 387cabdff1aSopenharmony_ci int *bounding_values, int count) 388cabdff1aSopenharmony_ci{ 389cabdff1aSopenharmony_ci unsigned char *end; 390cabdff1aSopenharmony_ci int filter_value; 391cabdff1aSopenharmony_ci const ptrdiff_t nstride = -stride; 392cabdff1aSopenharmony_ci 393cabdff1aSopenharmony_ci for (end = first_pixel + count; first_pixel < end; first_pixel++) { 394cabdff1aSopenharmony_ci filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) + 395cabdff1aSopenharmony_ci (first_pixel[0] - first_pixel[nstride]) * 3; 396cabdff1aSopenharmony_ci filter_value = bounding_values[(filter_value + 4) >> 3]; 397cabdff1aSopenharmony_ci 398cabdff1aSopenharmony_ci first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value); 399cabdff1aSopenharmony_ci first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value); 400cabdff1aSopenharmony_ci } 401cabdff1aSopenharmony_ci} 402cabdff1aSopenharmony_ci 403cabdff1aSopenharmony_cistatic av_always_inline void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride, 404cabdff1aSopenharmony_ci int *bounding_values, int count) 405cabdff1aSopenharmony_ci{ 406cabdff1aSopenharmony_ci unsigned char *end; 407cabdff1aSopenharmony_ci int filter_value; 408cabdff1aSopenharmony_ci 409cabdff1aSopenharmony_ci for (end = first_pixel + count * stride; first_pixel != end; first_pixel += stride) { 410cabdff1aSopenharmony_ci filter_value = (first_pixel[-2] - first_pixel[1]) + 411cabdff1aSopenharmony_ci (first_pixel[ 0] - first_pixel[-1]) * 3; 412cabdff1aSopenharmony_ci filter_value = bounding_values[(filter_value + 4) >> 3]; 413cabdff1aSopenharmony_ci 414cabdff1aSopenharmony_ci first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value); 415cabdff1aSopenharmony_ci first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value); 416cabdff1aSopenharmony_ci } 417cabdff1aSopenharmony_ci} 418cabdff1aSopenharmony_ci 419cabdff1aSopenharmony_ci#define LOOP_FILTER(prefix, suffix, dim, count) \ 420cabdff1aSopenharmony_civoid prefix##_##dim##_loop_filter_##count##suffix(uint8_t *first_pixel, ptrdiff_t stride, \ 421cabdff1aSopenharmony_ci int *bounding_values) \ 422cabdff1aSopenharmony_ci{ \ 423cabdff1aSopenharmony_ci vp3_##dim##_loop_filter_c(first_pixel, stride, bounding_values, count); \ 424cabdff1aSopenharmony_ci} 425cabdff1aSopenharmony_ci 426cabdff1aSopenharmony_cistatic LOOP_FILTER(vp3,_c, v, 8) 427cabdff1aSopenharmony_cistatic LOOP_FILTER(vp3,_c, h, 8) 428cabdff1aSopenharmony_ciLOOP_FILTER(ff_vp3dsp, , v, 12) 429cabdff1aSopenharmony_ciLOOP_FILTER(ff_vp3dsp, , h, 12) 430cabdff1aSopenharmony_ci 431cabdff1aSopenharmony_cistatic void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1, 432cabdff1aSopenharmony_ci const uint8_t *src2, ptrdiff_t stride, int h) 433cabdff1aSopenharmony_ci{ 434cabdff1aSopenharmony_ci int i; 435cabdff1aSopenharmony_ci 436cabdff1aSopenharmony_ci for (i = 0; i < h; i++) { 437cabdff1aSopenharmony_ci uint32_t a, b; 438cabdff1aSopenharmony_ci 439cabdff1aSopenharmony_ci a = AV_RN32(&src1[i * stride]); 440cabdff1aSopenharmony_ci b = AV_RN32(&src2[i * stride]); 441cabdff1aSopenharmony_ci AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b)); 442cabdff1aSopenharmony_ci a = AV_RN32(&src1[i * stride + 4]); 443cabdff1aSopenharmony_ci b = AV_RN32(&src2[i * stride + 4]); 444cabdff1aSopenharmony_ci AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b)); 445cabdff1aSopenharmony_ci } 446cabdff1aSopenharmony_ci} 447cabdff1aSopenharmony_ci 448cabdff1aSopenharmony_ciav_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) 449cabdff1aSopenharmony_ci{ 450cabdff1aSopenharmony_ci c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2; 451cabdff1aSopenharmony_ci 452cabdff1aSopenharmony_ci c->idct_put = vp3_idct_put_c; 453cabdff1aSopenharmony_ci c->idct_add = vp3_idct_add_c; 454cabdff1aSopenharmony_ci c->idct_dc_add = vp3_idct_dc_add_c; 455cabdff1aSopenharmony_ci c->v_loop_filter = c->v_loop_filter_unaligned = vp3_v_loop_filter_8_c; 456cabdff1aSopenharmony_ci c->h_loop_filter = c->h_loop_filter_unaligned = vp3_h_loop_filter_8_c; 457cabdff1aSopenharmony_ci 458cabdff1aSopenharmony_ci#if ARCH_ARM 459cabdff1aSopenharmony_ci ff_vp3dsp_init_arm(c, flags); 460cabdff1aSopenharmony_ci#elif ARCH_PPC 461cabdff1aSopenharmony_ci ff_vp3dsp_init_ppc(c, flags); 462cabdff1aSopenharmony_ci#elif ARCH_X86 463cabdff1aSopenharmony_ci ff_vp3dsp_init_x86(c, flags); 464cabdff1aSopenharmony_ci#elif ARCH_MIPS 465cabdff1aSopenharmony_ci ff_vp3dsp_init_mips(c, flags); 466cabdff1aSopenharmony_ci#endif 467cabdff1aSopenharmony_ci} 468cabdff1aSopenharmony_ci 469cabdff1aSopenharmony_ci/* 470cabdff1aSopenharmony_ci * This function initializes the loop filter boundary limits if the frame's 471cabdff1aSopenharmony_ci * quality index is different from the previous frame's. 472cabdff1aSopenharmony_ci * 473cabdff1aSopenharmony_ci * where sizeof(bounding_values_array) is 256 * sizeof(int) 474cabdff1aSopenharmony_ci * 475cabdff1aSopenharmony_ci * The filter_limit_values may not be larger than 127. 476cabdff1aSopenharmony_ci */ 477cabdff1aSopenharmony_civoid ff_vp3dsp_set_bounding_values(int * bounding_values_array, int filter_limit) 478cabdff1aSopenharmony_ci{ 479cabdff1aSopenharmony_ci int *bounding_values = bounding_values_array + 127; 480cabdff1aSopenharmony_ci int x; 481cabdff1aSopenharmony_ci int value; 482cabdff1aSopenharmony_ci 483cabdff1aSopenharmony_ci av_assert0(filter_limit < 128U); 484cabdff1aSopenharmony_ci 485cabdff1aSopenharmony_ci /* set up the bounding values */ 486cabdff1aSopenharmony_ci memset(bounding_values_array, 0, 256 * sizeof(int)); 487cabdff1aSopenharmony_ci for (x = 0; x < filter_limit; x++) { 488cabdff1aSopenharmony_ci bounding_values[-x] = -x; 489cabdff1aSopenharmony_ci bounding_values[x] = x; 490cabdff1aSopenharmony_ci } 491cabdff1aSopenharmony_ci for (x = value = filter_limit; x < 128 && value; x++, value--) { 492cabdff1aSopenharmony_ci bounding_values[ x] = value; 493cabdff1aSopenharmony_ci bounding_values[-x] = -value; 494cabdff1aSopenharmony_ci } 495cabdff1aSopenharmony_ci if (value) 496cabdff1aSopenharmony_ci bounding_values[128] = value; 497cabdff1aSopenharmony_ci bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202U; 498cabdff1aSopenharmony_ci} 499