1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * AltiVec-enhanced yuv2yuvX 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> 5cabdff1aSopenharmony_ci * based on the equivalent C code in swscale.c 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * This file is part of FFmpeg. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci * Lesser General Public License for more details. 18cabdff1aSopenharmony_ci * 19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 25cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_cistatic void FUNC(yuv2planeX_8_16)(const int16_t *filter, int filterSize, 28cabdff1aSopenharmony_ci const int16_t **src, uint8_t *dest, 29cabdff1aSopenharmony_ci const uint8_t *dither, int offset, int x) 30cabdff1aSopenharmony_ci{ 31cabdff1aSopenharmony_ci register int i, j; 32cabdff1aSopenharmony_ci LOCAL_ALIGNED(16, int, val, [16]); 33cabdff1aSopenharmony_ci vector signed int vo1, vo2, vo3, vo4; 34cabdff1aSopenharmony_ci vector unsigned short vs1, vs2; 35cabdff1aSopenharmony_ci vector unsigned char vf; 36cabdff1aSopenharmony_ci vector unsigned int altivec_vectorShiftInt19 = 37cabdff1aSopenharmony_ci vec_add(vec_splat_u32(10), vec_splat_u32(9)); 38cabdff1aSopenharmony_ci 39cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) 40cabdff1aSopenharmony_ci val[i] = dither[(x + i + offset) & 7] << 12; 41cabdff1aSopenharmony_ci 42cabdff1aSopenharmony_ci vo1 = vec_ld(0, val); 43cabdff1aSopenharmony_ci vo2 = vec_ld(16, val); 44cabdff1aSopenharmony_ci vo3 = vec_ld(32, val); 45cabdff1aSopenharmony_ci vo4 = vec_ld(48, val); 46cabdff1aSopenharmony_ci 47cabdff1aSopenharmony_ci for (j = 0; j < filterSize; j++) { 48cabdff1aSopenharmony_ci unsigned int joffset=j<<1; 49cabdff1aSopenharmony_ci unsigned int xoffset=x<<1; 50cabdff1aSopenharmony_ci vector unsigned char av_unused perm; 51cabdff1aSopenharmony_ci vector signed short l1,vLumFilter; 52cabdff1aSopenharmony_ci LOAD_FILTER(vLumFilter,filter); 53cabdff1aSopenharmony_ci vLumFilter = vec_splat(vLumFilter, 0); 54cabdff1aSopenharmony_ci LOAD_L1(l1,src[j],perm); 55cabdff1aSopenharmony_ci yuv2planeX_8(vo1, vo2, l1, src[j], x, perm, vLumFilter); 56cabdff1aSopenharmony_ci yuv2planeX_8(vo3, vo4, l1, src[j], x + 8, perm, vLumFilter); 57cabdff1aSopenharmony_ci } 58cabdff1aSopenharmony_ci 59cabdff1aSopenharmony_ci vo1 = vec_sra(vo1, altivec_vectorShiftInt19); 60cabdff1aSopenharmony_ci vo2 = vec_sra(vo2, altivec_vectorShiftInt19); 61cabdff1aSopenharmony_ci vo3 = vec_sra(vo3, altivec_vectorShiftInt19); 62cabdff1aSopenharmony_ci vo4 = vec_sra(vo4, altivec_vectorShiftInt19); 63cabdff1aSopenharmony_ci vs1 = vec_packsu(vo1, vo2); 64cabdff1aSopenharmony_ci vs2 = vec_packsu(vo3, vo4); 65cabdff1aSopenharmony_ci vf = vec_packsu(vs1, vs2); 66cabdff1aSopenharmony_ci VEC_ST(vf, 0, dest); 67cabdff1aSopenharmony_ci} 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ci 70cabdff1aSopenharmony_cistatic inline void yuv2planeX_u(const int16_t *filter, int filterSize, 71cabdff1aSopenharmony_ci const int16_t **src, uint8_t *dest, int dstW, 72cabdff1aSopenharmony_ci const uint8_t *dither, int offset, int x) 73cabdff1aSopenharmony_ci{ 74cabdff1aSopenharmony_ci int i, j; 75cabdff1aSopenharmony_ci 76cabdff1aSopenharmony_ci for (i = x; i < dstW; i++) { 77cabdff1aSopenharmony_ci int t = dither[(i + offset) & 7] << 12; 78cabdff1aSopenharmony_ci for (j = 0; j < filterSize; j++) 79cabdff1aSopenharmony_ci t += src[j][i] * filter[j]; 80cabdff1aSopenharmony_ci dest[i] = av_clip_uint8(t >> 19); 81cabdff1aSopenharmony_ci } 82cabdff1aSopenharmony_ci} 83cabdff1aSopenharmony_ci 84cabdff1aSopenharmony_cistatic void FUNC(yuv2planeX)(const int16_t *filter, int filterSize, 85cabdff1aSopenharmony_ci const int16_t **src, uint8_t *dest, int dstW, 86cabdff1aSopenharmony_ci const uint8_t *dither, int offset) 87cabdff1aSopenharmony_ci{ 88cabdff1aSopenharmony_ci int dst_u = -(uintptr_t)dest & 15; 89cabdff1aSopenharmony_ci int i; 90cabdff1aSopenharmony_ci 91cabdff1aSopenharmony_ci yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0); 92cabdff1aSopenharmony_ci 93cabdff1aSopenharmony_ci for (i = dst_u; i < dstW - 15; i += 16) 94cabdff1aSopenharmony_ci FUNC(yuv2planeX_8_16)(filter, filterSize, src, dest + i, dither, 95cabdff1aSopenharmony_ci offset, i); 96cabdff1aSopenharmony_ci 97cabdff1aSopenharmony_ci yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i); 98cabdff1aSopenharmony_ci} 99cabdff1aSopenharmony_ci 100cabdff1aSopenharmony_cistatic void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW, 101cabdff1aSopenharmony_ci const uint8_t *src, const int16_t *filter, 102cabdff1aSopenharmony_ci const int32_t *filterPos, int filterSize) 103cabdff1aSopenharmony_ci{ 104cabdff1aSopenharmony_ci register int i; 105cabdff1aSopenharmony_ci LOCAL_ALIGNED(16, int, tempo, [4]); 106cabdff1aSopenharmony_ci 107cabdff1aSopenharmony_ci if (filterSize % 4) { 108cabdff1aSopenharmony_ci for (i = 0; i < dstW; i++) { 109cabdff1aSopenharmony_ci register int j; 110cabdff1aSopenharmony_ci register int srcPos = filterPos[i]; 111cabdff1aSopenharmony_ci register int val = 0; 112cabdff1aSopenharmony_ci for (j = 0; j < filterSize; j++) 113cabdff1aSopenharmony_ci val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; 114cabdff1aSopenharmony_ci dst[i] = FFMIN(val >> 7, (1 << 15) - 1); 115cabdff1aSopenharmony_ci } 116cabdff1aSopenharmony_ci } else 117cabdff1aSopenharmony_ci switch (filterSize) { 118cabdff1aSopenharmony_ci case 4: 119cabdff1aSopenharmony_ci for (i = 0; i < dstW; i++) { 120cabdff1aSopenharmony_ci register int srcPos = filterPos[i]; 121cabdff1aSopenharmony_ci 122cabdff1aSopenharmony_ci vector unsigned char src_vF = unaligned_load(srcPos, src); 123cabdff1aSopenharmony_ci vector signed short src_v, filter_v; 124cabdff1aSopenharmony_ci vector signed int val_vEven, val_s; 125cabdff1aSopenharmony_ci src_v = // vec_unpackh sign-extends... 126cabdff1aSopenharmony_ci (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); 127cabdff1aSopenharmony_ci // now put our elements in the even slots 128cabdff1aSopenharmony_ci src_v = vec_mergeh(src_v, (vector signed short)vzero); 129cabdff1aSopenharmony_ci GET_VF4(i, filter_v, filter); 130cabdff1aSopenharmony_ci val_vEven = vec_mule(src_v, filter_v); 131cabdff1aSopenharmony_ci val_s = vec_sums(val_vEven, vzero); 132cabdff1aSopenharmony_ci vec_st(val_s, 0, tempo); 133cabdff1aSopenharmony_ci dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); 134cabdff1aSopenharmony_ci } 135cabdff1aSopenharmony_ci break; 136cabdff1aSopenharmony_ci case 8: 137cabdff1aSopenharmony_ci for (i = 0; i < dstW; i++) { 138cabdff1aSopenharmony_ci register int srcPos = filterPos[i]; 139cabdff1aSopenharmony_ci vector unsigned char src_vF, av_unused src_v0, av_unused src_v1; 140cabdff1aSopenharmony_ci vector unsigned char av_unused permS; 141cabdff1aSopenharmony_ci vector signed short src_v, filter_v; 142cabdff1aSopenharmony_ci vector signed int val_v, val_s; 143cabdff1aSopenharmony_ci FIRST_LOAD(src_v0, srcPos, src, permS); 144cabdff1aSopenharmony_ci LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF); 145cabdff1aSopenharmony_ci src_v = // vec_unpackh sign-extends... 146cabdff1aSopenharmony_ci (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); 147cabdff1aSopenharmony_ci filter_v = vec_ld(i << 4, filter); 148cabdff1aSopenharmony_ci val_v = vec_msums(src_v, filter_v, (vector signed int)vzero); 149cabdff1aSopenharmony_ci val_s = vec_sums(val_v, vzero); 150cabdff1aSopenharmony_ci vec_st(val_s, 0, tempo); 151cabdff1aSopenharmony_ci dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); 152cabdff1aSopenharmony_ci } 153cabdff1aSopenharmony_ci break; 154cabdff1aSopenharmony_ci 155cabdff1aSopenharmony_ci case 16: 156cabdff1aSopenharmony_ci for (i = 0; i < dstW; i++) { 157cabdff1aSopenharmony_ci register int srcPos = filterPos[i]; 158cabdff1aSopenharmony_ci 159cabdff1aSopenharmony_ci vector unsigned char src_vF = unaligned_load(srcPos, src); 160cabdff1aSopenharmony_ci vector signed short src_vA = // vec_unpackh sign-extends... 161cabdff1aSopenharmony_ci (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); 162cabdff1aSopenharmony_ci vector signed short src_vB = // vec_unpackh sign-extends... 163cabdff1aSopenharmony_ci (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); 164cabdff1aSopenharmony_ci vector signed short filter_v0 = vec_ld(i << 5, filter); 165cabdff1aSopenharmony_ci vector signed short filter_v1 = vec_ld((i << 5) + 16, filter); 166cabdff1aSopenharmony_ci 167cabdff1aSopenharmony_ci vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero); 168cabdff1aSopenharmony_ci vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc); 169cabdff1aSopenharmony_ci 170cabdff1aSopenharmony_ci vector signed int val_s = vec_sums(val_v, vzero); 171cabdff1aSopenharmony_ci 172cabdff1aSopenharmony_ci VEC_ST(val_s, 0, tempo); 173cabdff1aSopenharmony_ci dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); 174cabdff1aSopenharmony_ci } 175cabdff1aSopenharmony_ci break; 176cabdff1aSopenharmony_ci 177cabdff1aSopenharmony_ci default: 178cabdff1aSopenharmony_ci for (i = 0; i < dstW; i++) { 179cabdff1aSopenharmony_ci register int j, av_unused offset = i * 2 * filterSize; 180cabdff1aSopenharmony_ci register int srcPos = filterPos[i]; 181cabdff1aSopenharmony_ci 182cabdff1aSopenharmony_ci vector signed int val_s, val_v = (vector signed int)vzero; 183cabdff1aSopenharmony_ci vector signed short av_unused filter_v0R; 184cabdff1aSopenharmony_ci vector unsigned char av_unused permF, av_unused src_v0, av_unused permS; 185cabdff1aSopenharmony_ci FIRST_LOAD(filter_v0R, offset, filter, permF); 186cabdff1aSopenharmony_ci FIRST_LOAD(src_v0, srcPos, src, permS); 187cabdff1aSopenharmony_ci 188cabdff1aSopenharmony_ci for (j = 0; j < filterSize - 15; j += 16) { 189cabdff1aSopenharmony_ci vector unsigned char av_unused src_v1, src_vF; 190cabdff1aSopenharmony_ci vector signed short av_unused filter_v1R, av_unused filter_v2R, 191cabdff1aSopenharmony_ci filter_v0, filter_v1, src_vA, src_vB; 192cabdff1aSopenharmony_ci vector signed int val_acc; 193cabdff1aSopenharmony_ci LOAD_SRCV(srcPos, j, src, permS, src_v0, src_v1, src_vF); 194cabdff1aSopenharmony_ci src_vA = // vec_unpackh sign-extends... 195cabdff1aSopenharmony_ci (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); 196cabdff1aSopenharmony_ci src_vB = // vec_unpackh sign-extends... 197cabdff1aSopenharmony_ci (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF)); 198cabdff1aSopenharmony_ci GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v0, 0); 199cabdff1aSopenharmony_ci GET_VFD(i, j, filter, filter_v1R, filter_v2R, permF, filter_v1, 16); 200cabdff1aSopenharmony_ci 201cabdff1aSopenharmony_ci val_acc = vec_msums(src_vA, filter_v0, val_v); 202cabdff1aSopenharmony_ci val_v = vec_msums(src_vB, filter_v1, val_acc); 203cabdff1aSopenharmony_ci UPDATE_PTR(filter_v2R, filter_v0R, src_v1, src_v0); 204cabdff1aSopenharmony_ci } 205cabdff1aSopenharmony_ci 206cabdff1aSopenharmony_ci if (j < filterSize - 7) { 207cabdff1aSopenharmony_ci // loading src_v0 is useless, it's already done above 208cabdff1aSopenharmony_ci vector unsigned char av_unused src_v1, src_vF; 209cabdff1aSopenharmony_ci vector signed short src_v, av_unused filter_v1R, filter_v; 210cabdff1aSopenharmony_ci LOAD_SRCV8(srcPos, j, src, permS, src_v0, src_v1, src_vF); 211cabdff1aSopenharmony_ci src_v = // vec_unpackh sign-extends... 212cabdff1aSopenharmony_ci (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF)); 213cabdff1aSopenharmony_ci GET_VFD(i, j, filter, filter_v0R, filter_v1R, permF, filter_v, 0); 214cabdff1aSopenharmony_ci val_v = vec_msums(src_v, filter_v, val_v); 215cabdff1aSopenharmony_ci } 216cabdff1aSopenharmony_ci val_s = vec_sums(val_v, vzero); 217cabdff1aSopenharmony_ci 218cabdff1aSopenharmony_ci VEC_ST(val_s, 0, tempo); 219cabdff1aSopenharmony_ci dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1); 220cabdff1aSopenharmony_ci } 221cabdff1aSopenharmony_ci } 222cabdff1aSopenharmony_ci} 223