1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * AltiVec-enhanced yuv-to-yuv conversion routines. 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> 5cabdff1aSopenharmony_ci * based on the equivalent C code in swscale.c 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * This file is part of FFmpeg. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17cabdff1aSopenharmony_ci * Lesser General Public License for more details. 18cabdff1aSopenharmony_ci * 19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22cabdff1aSopenharmony_ci */ 23cabdff1aSopenharmony_ci 24cabdff1aSopenharmony_ci#include <inttypes.h> 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#include "config.h" 27cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 28cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 29cabdff1aSopenharmony_ci#include "libswscale/swscale.h" 30cabdff1aSopenharmony_ci#include "libswscale/swscale_internal.h" 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci#if HAVE_ALTIVEC 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_cistatic int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[], 35cabdff1aSopenharmony_ci int srcStride[], int srcSliceY, 36cabdff1aSopenharmony_ci int srcSliceH, uint8_t *dstParam[], 37cabdff1aSopenharmony_ci int dstStride_a[]) 38cabdff1aSopenharmony_ci{ 39cabdff1aSopenharmony_ci uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY; 40cabdff1aSopenharmony_ci // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, 41cabdff1aSopenharmony_ci // srcStride[0], srcStride[1], dstStride[0]); 42cabdff1aSopenharmony_ci const uint8_t *ysrc = src[0]; 43cabdff1aSopenharmony_ci const uint8_t *usrc = src[1]; 44cabdff1aSopenharmony_ci const uint8_t *vsrc = src[2]; 45cabdff1aSopenharmony_ci const int width = c->srcW; 46cabdff1aSopenharmony_ci const int height = srcSliceH; 47cabdff1aSopenharmony_ci const int lumStride = srcStride[0]; 48cabdff1aSopenharmony_ci const int chromStride = srcStride[1]; 49cabdff1aSopenharmony_ci const int dstStride = dstStride_a[0]; 50cabdff1aSopenharmony_ci const vector unsigned char yperm = vec_lvsl(0, ysrc); 51cabdff1aSopenharmony_ci const int vertLumPerChroma = 2; 52cabdff1aSopenharmony_ci register unsigned int y; 53cabdff1aSopenharmony_ci 54cabdff1aSopenharmony_ci /* This code assumes: 55cabdff1aSopenharmony_ci * 56cabdff1aSopenharmony_ci * 1) dst is 16 bytes-aligned 57cabdff1aSopenharmony_ci * 2) dstStride is a multiple of 16 58cabdff1aSopenharmony_ci * 3) width is a multiple of 16 59cabdff1aSopenharmony_ci * 4) lum & chrom stride are multiples of 8 60cabdff1aSopenharmony_ci */ 61cabdff1aSopenharmony_ci 62cabdff1aSopenharmony_ci for (y = 0; y < height; y++) { 63cabdff1aSopenharmony_ci int i; 64cabdff1aSopenharmony_ci for (i = 0; i < width - 31; i += 32) { 65cabdff1aSopenharmony_ci const unsigned int j = i >> 1; 66cabdff1aSopenharmony_ci vector unsigned char v_yA = vec_ld(i, ysrc); 67cabdff1aSopenharmony_ci vector unsigned char v_yB = vec_ld(i + 16, ysrc); 68cabdff1aSopenharmony_ci vector unsigned char v_yC = vec_ld(i + 32, ysrc); 69cabdff1aSopenharmony_ci vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); 70cabdff1aSopenharmony_ci vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); 71cabdff1aSopenharmony_ci vector unsigned char v_uA = vec_ld(j, usrc); 72cabdff1aSopenharmony_ci vector unsigned char v_uB = vec_ld(j + 16, usrc); 73cabdff1aSopenharmony_ci vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); 74cabdff1aSopenharmony_ci vector unsigned char v_vA = vec_ld(j, vsrc); 75cabdff1aSopenharmony_ci vector unsigned char v_vB = vec_ld(j + 16, vsrc); 76cabdff1aSopenharmony_ci vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); 77cabdff1aSopenharmony_ci vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); 78cabdff1aSopenharmony_ci vector unsigned char v_uv_b = vec_mergel(v_u, v_v); 79cabdff1aSopenharmony_ci vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); 80cabdff1aSopenharmony_ci vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); 81cabdff1aSopenharmony_ci vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b); 82cabdff1aSopenharmony_ci vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b); 83cabdff1aSopenharmony_ci vec_st(v_yuy2_0, (i << 1), dst); 84cabdff1aSopenharmony_ci vec_st(v_yuy2_1, (i << 1) + 16, dst); 85cabdff1aSopenharmony_ci vec_st(v_yuy2_2, (i << 1) + 32, dst); 86cabdff1aSopenharmony_ci vec_st(v_yuy2_3, (i << 1) + 48, dst); 87cabdff1aSopenharmony_ci } 88cabdff1aSopenharmony_ci if (i < width) { 89cabdff1aSopenharmony_ci const unsigned int j = i >> 1; 90cabdff1aSopenharmony_ci vector unsigned char v_y1 = vec_ld(i, ysrc); 91cabdff1aSopenharmony_ci vector unsigned char v_u = vec_ld(j, usrc); 92cabdff1aSopenharmony_ci vector unsigned char v_v = vec_ld(j, vsrc); 93cabdff1aSopenharmony_ci vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); 94cabdff1aSopenharmony_ci vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a); 95cabdff1aSopenharmony_ci vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a); 96cabdff1aSopenharmony_ci vec_st(v_yuy2_0, (i << 1), dst); 97cabdff1aSopenharmony_ci vec_st(v_yuy2_1, (i << 1) + 16, dst); 98cabdff1aSopenharmony_ci } 99cabdff1aSopenharmony_ci if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { 100cabdff1aSopenharmony_ci usrc += chromStride; 101cabdff1aSopenharmony_ci vsrc += chromStride; 102cabdff1aSopenharmony_ci } 103cabdff1aSopenharmony_ci ysrc += lumStride; 104cabdff1aSopenharmony_ci dst += dstStride; 105cabdff1aSopenharmony_ci } 106cabdff1aSopenharmony_ci 107cabdff1aSopenharmony_ci return srcSliceH; 108cabdff1aSopenharmony_ci} 109cabdff1aSopenharmony_ci 110cabdff1aSopenharmony_cistatic int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[], 111cabdff1aSopenharmony_ci int srcStride[], int srcSliceY, 112cabdff1aSopenharmony_ci int srcSliceH, uint8_t *dstParam[], 113cabdff1aSopenharmony_ci int dstStride_a[]) 114cabdff1aSopenharmony_ci{ 115cabdff1aSopenharmony_ci uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY; 116cabdff1aSopenharmony_ci // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, 117cabdff1aSopenharmony_ci // srcStride[0], srcStride[1], dstStride[0]); 118cabdff1aSopenharmony_ci const uint8_t *ysrc = src[0]; 119cabdff1aSopenharmony_ci const uint8_t *usrc = src[1]; 120cabdff1aSopenharmony_ci const uint8_t *vsrc = src[2]; 121cabdff1aSopenharmony_ci const int width = c->srcW; 122cabdff1aSopenharmony_ci const int height = srcSliceH; 123cabdff1aSopenharmony_ci const int lumStride = srcStride[0]; 124cabdff1aSopenharmony_ci const int chromStride = srcStride[1]; 125cabdff1aSopenharmony_ci const int dstStride = dstStride_a[0]; 126cabdff1aSopenharmony_ci const int vertLumPerChroma = 2; 127cabdff1aSopenharmony_ci const vector unsigned char yperm = vec_lvsl(0, ysrc); 128cabdff1aSopenharmony_ci register unsigned int y; 129cabdff1aSopenharmony_ci 130cabdff1aSopenharmony_ci /* This code assumes: 131cabdff1aSopenharmony_ci * 132cabdff1aSopenharmony_ci * 1) dst is 16 bytes-aligned 133cabdff1aSopenharmony_ci * 2) dstStride is a multiple of 16 134cabdff1aSopenharmony_ci * 3) width is a multiple of 16 135cabdff1aSopenharmony_ci * 4) lum & chrom stride are multiples of 8 136cabdff1aSopenharmony_ci */ 137cabdff1aSopenharmony_ci 138cabdff1aSopenharmony_ci for (y = 0; y < height; y++) { 139cabdff1aSopenharmony_ci int i; 140cabdff1aSopenharmony_ci for (i = 0; i < width - 31; i += 32) { 141cabdff1aSopenharmony_ci const unsigned int j = i >> 1; 142cabdff1aSopenharmony_ci vector unsigned char v_yA = vec_ld(i, ysrc); 143cabdff1aSopenharmony_ci vector unsigned char v_yB = vec_ld(i + 16, ysrc); 144cabdff1aSopenharmony_ci vector unsigned char v_yC = vec_ld(i + 32, ysrc); 145cabdff1aSopenharmony_ci vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm); 146cabdff1aSopenharmony_ci vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm); 147cabdff1aSopenharmony_ci vector unsigned char v_uA = vec_ld(j, usrc); 148cabdff1aSopenharmony_ci vector unsigned char v_uB = vec_ld(j + 16, usrc); 149cabdff1aSopenharmony_ci vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc)); 150cabdff1aSopenharmony_ci vector unsigned char v_vA = vec_ld(j, vsrc); 151cabdff1aSopenharmony_ci vector unsigned char v_vB = vec_ld(j + 16, vsrc); 152cabdff1aSopenharmony_ci vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc)); 153cabdff1aSopenharmony_ci vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); 154cabdff1aSopenharmony_ci vector unsigned char v_uv_b = vec_mergel(v_u, v_v); 155cabdff1aSopenharmony_ci vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); 156cabdff1aSopenharmony_ci vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); 157cabdff1aSopenharmony_ci vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2); 158cabdff1aSopenharmony_ci vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2); 159cabdff1aSopenharmony_ci vec_st(v_uyvy_0, (i << 1), dst); 160cabdff1aSopenharmony_ci vec_st(v_uyvy_1, (i << 1) + 16, dst); 161cabdff1aSopenharmony_ci vec_st(v_uyvy_2, (i << 1) + 32, dst); 162cabdff1aSopenharmony_ci vec_st(v_uyvy_3, (i << 1) + 48, dst); 163cabdff1aSopenharmony_ci } 164cabdff1aSopenharmony_ci if (i < width) { 165cabdff1aSopenharmony_ci const unsigned int j = i >> 1; 166cabdff1aSopenharmony_ci vector unsigned char v_y1 = vec_ld(i, ysrc); 167cabdff1aSopenharmony_ci vector unsigned char v_u = vec_ld(j, usrc); 168cabdff1aSopenharmony_ci vector unsigned char v_v = vec_ld(j, vsrc); 169cabdff1aSopenharmony_ci vector unsigned char v_uv_a = vec_mergeh(v_u, v_v); 170cabdff1aSopenharmony_ci vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1); 171cabdff1aSopenharmony_ci vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1); 172cabdff1aSopenharmony_ci vec_st(v_uyvy_0, (i << 1), dst); 173cabdff1aSopenharmony_ci vec_st(v_uyvy_1, (i << 1) + 16, dst); 174cabdff1aSopenharmony_ci } 175cabdff1aSopenharmony_ci if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) { 176cabdff1aSopenharmony_ci usrc += chromStride; 177cabdff1aSopenharmony_ci vsrc += chromStride; 178cabdff1aSopenharmony_ci } 179cabdff1aSopenharmony_ci ysrc += lumStride; 180cabdff1aSopenharmony_ci dst += dstStride; 181cabdff1aSopenharmony_ci } 182cabdff1aSopenharmony_ci return srcSliceH; 183cabdff1aSopenharmony_ci} 184cabdff1aSopenharmony_ci 185cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */ 186cabdff1aSopenharmony_ci 187cabdff1aSopenharmony_ciav_cold void ff_get_unscaled_swscale_ppc(SwsContext *c) 188cabdff1aSopenharmony_ci{ 189cabdff1aSopenharmony_ci#if HAVE_ALTIVEC 190cabdff1aSopenharmony_ci if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) 191cabdff1aSopenharmony_ci return; 192cabdff1aSopenharmony_ci 193cabdff1aSopenharmony_ci if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) && 194cabdff1aSopenharmony_ci c->srcFormat == AV_PIX_FMT_YUV420P) { 195cabdff1aSopenharmony_ci enum AVPixelFormat dstFormat = c->dstFormat; 196cabdff1aSopenharmony_ci 197cabdff1aSopenharmony_ci // unscaled YV12 -> packed YUV, we want speed 198cabdff1aSopenharmony_ci if (dstFormat == AV_PIX_FMT_YUYV422) 199cabdff1aSopenharmony_ci c->convert_unscaled = yv12toyuy2_unscaled_altivec; 200cabdff1aSopenharmony_ci else if (dstFormat == AV_PIX_FMT_UYVY422) 201cabdff1aSopenharmony_ci c->convert_unscaled = yv12touyvy_unscaled_altivec; 202cabdff1aSopenharmony_ci } 203cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */ 204cabdff1aSopenharmony_ci} 205