1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * This file is part of FFmpeg. 3cabdff1aSopenharmony_ci * 4cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 5cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 6cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 7cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 8cabdff1aSopenharmony_ci * 9cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 10cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 11cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12cabdff1aSopenharmony_ci * Lesser General Public License for more details. 13cabdff1aSopenharmony_ci * 14cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 15cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 16cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17cabdff1aSopenharmony_ci */ 18cabdff1aSopenharmony_ci 19cabdff1aSopenharmony_ci#include "config.h" 20cabdff1aSopenharmony_ci 21cabdff1aSopenharmony_ci#include <stdint.h> 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 24cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 25cabdff1aSopenharmony_ci#include "libavutil/ppc/cpu.h" 26cabdff1aSopenharmony_ci#include "libavutil/ppc/util_altivec.h" 27cabdff1aSopenharmony_ci 28cabdff1aSopenharmony_ci#include "libavcodec/mpegvideoencdsp.h" 29cabdff1aSopenharmony_ci 30cabdff1aSopenharmony_ci#if HAVE_ALTIVEC 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci#if HAVE_VSX 33cabdff1aSopenharmony_cistatic int pix_norm1_altivec(uint8_t *pix, int line_size) 34cabdff1aSopenharmony_ci{ 35cabdff1aSopenharmony_ci int i, s = 0; 36cabdff1aSopenharmony_ci const vector unsigned int zero = 37cabdff1aSopenharmony_ci (const vector unsigned int) vec_splat_u32(0); 38cabdff1aSopenharmony_ci vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); 39cabdff1aSopenharmony_ci vector signed int sum; 40cabdff1aSopenharmony_ci 41cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) { 42cabdff1aSopenharmony_ci /* Read the potentially unaligned pixels. */ 43cabdff1aSopenharmony_ci //vector unsigned char pixl = vec_ld(0, pix); 44cabdff1aSopenharmony_ci //vector unsigned char pixr = vec_ld(15, pix); 45cabdff1aSopenharmony_ci //vector unsigned char pixv = vec_perm(pixl, pixr, perm); 46cabdff1aSopenharmony_ci vector unsigned char pixv = vec_vsx_ld(0, pix); 47cabdff1aSopenharmony_ci 48cabdff1aSopenharmony_ci /* Square the values, and add them to our sum. */ 49cabdff1aSopenharmony_ci sv = vec_msum(pixv, pixv, sv); 50cabdff1aSopenharmony_ci 51cabdff1aSopenharmony_ci pix += line_size; 52cabdff1aSopenharmony_ci } 53cabdff1aSopenharmony_ci /* Sum up the four partial sums, and put the result into s. */ 54cabdff1aSopenharmony_ci sum = vec_sums((vector signed int) sv, (vector signed int) zero); 55cabdff1aSopenharmony_ci sum = vec_splat(sum, 3); 56cabdff1aSopenharmony_ci vec_ste(sum, 0, &s); 57cabdff1aSopenharmony_ci return s; 58cabdff1aSopenharmony_ci} 59cabdff1aSopenharmony_ci#else 60cabdff1aSopenharmony_cistatic int pix_norm1_altivec(uint8_t *pix, int line_size) 61cabdff1aSopenharmony_ci{ 62cabdff1aSopenharmony_ci int i, s = 0; 63cabdff1aSopenharmony_ci const vector unsigned int zero = 64cabdff1aSopenharmony_ci (const vector unsigned int) vec_splat_u32(0); 65cabdff1aSopenharmony_ci vector unsigned char perm = vec_lvsl(0, pix); 66cabdff1aSopenharmony_ci vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); 67cabdff1aSopenharmony_ci vector signed int sum; 68cabdff1aSopenharmony_ci 69cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) { 70cabdff1aSopenharmony_ci /* Read the potentially unaligned pixels. */ 71cabdff1aSopenharmony_ci vector unsigned char pixl = vec_ld(0, pix); 72cabdff1aSopenharmony_ci vector unsigned char pixr = vec_ld(15, pix); 73cabdff1aSopenharmony_ci vector unsigned char pixv = vec_perm(pixl, pixr, perm); 74cabdff1aSopenharmony_ci 75cabdff1aSopenharmony_ci /* Square the values, and add them to our sum. */ 76cabdff1aSopenharmony_ci sv = vec_msum(pixv, pixv, sv); 77cabdff1aSopenharmony_ci 78cabdff1aSopenharmony_ci pix += line_size; 79cabdff1aSopenharmony_ci } 80cabdff1aSopenharmony_ci /* Sum up the four partial sums, and put the result into s. */ 81cabdff1aSopenharmony_ci sum = vec_sums((vector signed int) sv, (vector signed int) zero); 82cabdff1aSopenharmony_ci sum = vec_splat(sum, 3); 83cabdff1aSopenharmony_ci vec_ste(sum, 0, &s); 84cabdff1aSopenharmony_ci 85cabdff1aSopenharmony_ci return s; 86cabdff1aSopenharmony_ci} 87cabdff1aSopenharmony_ci#endif /* HAVE_VSX */ 88cabdff1aSopenharmony_ci 89cabdff1aSopenharmony_ci#if HAVE_VSX 90cabdff1aSopenharmony_cistatic int pix_sum_altivec(uint8_t *pix, int line_size) 91cabdff1aSopenharmony_ci{ 92cabdff1aSopenharmony_ci int i, s; 93cabdff1aSopenharmony_ci const vector unsigned int zero = 94cabdff1aSopenharmony_ci (const vector unsigned int) vec_splat_u32(0); 95cabdff1aSopenharmony_ci vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); 96cabdff1aSopenharmony_ci vector signed int sumdiffs; 97cabdff1aSopenharmony_ci 98cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) { 99cabdff1aSopenharmony_ci /* Read the potentially unaligned 16 pixels into t1. */ 100cabdff1aSopenharmony_ci //vector unsigned char pixl = vec_ld(0, pix); 101cabdff1aSopenharmony_ci //vector unsigned char pixr = vec_ld(15, pix); 102cabdff1aSopenharmony_ci //vector unsigned char t1 = vec_perm(pixl, pixr, perm); 103cabdff1aSopenharmony_ci vector unsigned char t1 = vec_vsx_ld(0, pix); 104cabdff1aSopenharmony_ci 105cabdff1aSopenharmony_ci /* Add each 4 pixel group together and put 4 results into sad. */ 106cabdff1aSopenharmony_ci sad = vec_sum4s(t1, sad); 107cabdff1aSopenharmony_ci 108cabdff1aSopenharmony_ci pix += line_size; 109cabdff1aSopenharmony_ci } 110cabdff1aSopenharmony_ci 111cabdff1aSopenharmony_ci /* Sum up the four partial sums, and put the result into s. */ 112cabdff1aSopenharmony_ci sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 113cabdff1aSopenharmony_ci sumdiffs = vec_splat(sumdiffs, 3); 114cabdff1aSopenharmony_ci vec_ste(sumdiffs, 0, &s); 115cabdff1aSopenharmony_ci return s; 116cabdff1aSopenharmony_ci} 117cabdff1aSopenharmony_ci#else 118cabdff1aSopenharmony_cistatic int pix_sum_altivec(uint8_t *pix, int line_size) 119cabdff1aSopenharmony_ci{ 120cabdff1aSopenharmony_ci int i, s; 121cabdff1aSopenharmony_ci const vector unsigned int zero = 122cabdff1aSopenharmony_ci (const vector unsigned int) vec_splat_u32(0); 123cabdff1aSopenharmony_ci vector unsigned char perm = vec_lvsl(0, pix); 124cabdff1aSopenharmony_ci vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); 125cabdff1aSopenharmony_ci vector signed int sumdiffs; 126cabdff1aSopenharmony_ci 127cabdff1aSopenharmony_ci for (i = 0; i < 16; i++) { 128cabdff1aSopenharmony_ci /* Read the potentially unaligned 16 pixels into t1. */ 129cabdff1aSopenharmony_ci vector unsigned char pixl = vec_ld(0, pix); 130cabdff1aSopenharmony_ci vector unsigned char pixr = vec_ld(15, pix); 131cabdff1aSopenharmony_ci vector unsigned char t1 = vec_perm(pixl, pixr, perm); 132cabdff1aSopenharmony_ci 133cabdff1aSopenharmony_ci /* Add each 4 pixel group together and put 4 results into sad. */ 134cabdff1aSopenharmony_ci sad = vec_sum4s(t1, sad); 135cabdff1aSopenharmony_ci 136cabdff1aSopenharmony_ci pix += line_size; 137cabdff1aSopenharmony_ci } 138cabdff1aSopenharmony_ci 139cabdff1aSopenharmony_ci /* Sum up the four partial sums, and put the result into s. */ 140cabdff1aSopenharmony_ci sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 141cabdff1aSopenharmony_ci sumdiffs = vec_splat(sumdiffs, 3); 142cabdff1aSopenharmony_ci vec_ste(sumdiffs, 0, &s); 143cabdff1aSopenharmony_ci 144cabdff1aSopenharmony_ci return s; 145cabdff1aSopenharmony_ci} 146cabdff1aSopenharmony_ci 147cabdff1aSopenharmony_ci#endif /* HAVE_VSX */ 148cabdff1aSopenharmony_ci 149cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */ 150cabdff1aSopenharmony_ci 151cabdff1aSopenharmony_ciav_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, 152cabdff1aSopenharmony_ci AVCodecContext *avctx) 153cabdff1aSopenharmony_ci{ 154cabdff1aSopenharmony_ci#if HAVE_ALTIVEC 155cabdff1aSopenharmony_ci if (!PPC_ALTIVEC(av_get_cpu_flags())) 156cabdff1aSopenharmony_ci return; 157cabdff1aSopenharmony_ci 158cabdff1aSopenharmony_ci c->pix_norm1 = pix_norm1_altivec; 159cabdff1aSopenharmony_ci c->pix_sum = pix_sum_altivec; 160cabdff1aSopenharmony_ci#endif /* HAVE_ALTIVEC */ 161cabdff1aSopenharmony_ci} 162