1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "config.h" 20 21#include <stdint.h> 22 23#include "libavutil/attributes.h" 24#include "libavutil/cpu.h" 25#include "libavutil/ppc/cpu.h" 26#include "libavutil/ppc/util_altivec.h" 27 28#include "libavcodec/mpegvideoencdsp.h" 29 30#if HAVE_ALTIVEC 31 32#if HAVE_VSX 33static int pix_norm1_altivec(uint8_t *pix, int line_size) 34{ 35 int i, s = 0; 36 const vector unsigned int zero = 37 (const vector unsigned int) vec_splat_u32(0); 38 vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); 39 vector signed int sum; 40 41 for (i = 0; i < 16; i++) { 42 /* Read the potentially unaligned pixels. */ 43 //vector unsigned char pixl = vec_ld(0, pix); 44 //vector unsigned char pixr = vec_ld(15, pix); 45 //vector unsigned char pixv = vec_perm(pixl, pixr, perm); 46 vector unsigned char pixv = vec_vsx_ld(0, pix); 47 48 /* Square the values, and add them to our sum. */ 49 sv = vec_msum(pixv, pixv, sv); 50 51 pix += line_size; 52 } 53 /* Sum up the four partial sums, and put the result into s. */ 54 sum = vec_sums((vector signed int) sv, (vector signed int) zero); 55 sum = vec_splat(sum, 3); 56 vec_ste(sum, 0, &s); 57 return s; 58} 59#else 60static int pix_norm1_altivec(uint8_t *pix, int line_size) 61{ 62 int i, s = 0; 63 const vector unsigned int zero = 64 (const vector unsigned int) vec_splat_u32(0); 65 vector unsigned char perm = vec_lvsl(0, pix); 66 vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); 67 vector signed int sum; 68 69 for (i = 0; i < 16; i++) { 70 /* Read the potentially unaligned pixels. */ 71 vector unsigned char pixl = vec_ld(0, pix); 72 vector unsigned char pixr = vec_ld(15, pix); 73 vector unsigned char pixv = vec_perm(pixl, pixr, perm); 74 75 /* Square the values, and add them to our sum. */ 76 sv = vec_msum(pixv, pixv, sv); 77 78 pix += line_size; 79 } 80 /* Sum up the four partial sums, and put the result into s. */ 81 sum = vec_sums((vector signed int) sv, (vector signed int) zero); 82 sum = vec_splat(sum, 3); 83 vec_ste(sum, 0, &s); 84 85 return s; 86} 87#endif /* HAVE_VSX */ 88 89#if HAVE_VSX 90static int pix_sum_altivec(uint8_t *pix, int line_size) 91{ 92 int i, s; 93 const vector unsigned int zero = 94 (const vector unsigned int) vec_splat_u32(0); 95 vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); 96 vector signed int sumdiffs; 97 98 for (i = 0; i < 16; i++) { 99 /* Read the potentially unaligned 16 pixels into t1. */ 100 //vector unsigned char pixl = vec_ld(0, pix); 101 //vector unsigned char pixr = vec_ld(15, pix); 102 //vector unsigned char t1 = vec_perm(pixl, pixr, perm); 103 vector unsigned char t1 = vec_vsx_ld(0, pix); 104 105 /* Add each 4 pixel group together and put 4 results into sad. */ 106 sad = vec_sum4s(t1, sad); 107 108 pix += line_size; 109 } 110 111 /* Sum up the four partial sums, and put the result into s. */ 112 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 113 sumdiffs = vec_splat(sumdiffs, 3); 114 vec_ste(sumdiffs, 0, &s); 115 return s; 116} 117#else 118static int pix_sum_altivec(uint8_t *pix, int line_size) 119{ 120 int i, s; 121 const vector unsigned int zero = 122 (const vector unsigned int) vec_splat_u32(0); 123 vector unsigned char perm = vec_lvsl(0, pix); 124 vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); 125 vector signed int sumdiffs; 126 127 for (i = 0; i < 16; i++) { 128 /* Read the potentially unaligned 16 pixels into t1. */ 129 vector unsigned char pixl = vec_ld(0, pix); 130 vector unsigned char pixr = vec_ld(15, pix); 131 vector unsigned char t1 = vec_perm(pixl, pixr, perm); 132 133 /* Add each 4 pixel group together and put 4 results into sad. */ 134 sad = vec_sum4s(t1, sad); 135 136 pix += line_size; 137 } 138 139 /* Sum up the four partial sums, and put the result into s. */ 140 sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 141 sumdiffs = vec_splat(sumdiffs, 3); 142 vec_ste(sumdiffs, 0, &s); 143 144 return s; 145} 146 147#endif /* HAVE_VSX */ 148 149#endif /* HAVE_ALTIVEC */ 150 151av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, 152 AVCodecContext *avctx) 153{ 154#if HAVE_ALTIVEC 155 if (!PPC_ALTIVEC(av_get_cpu_flags())) 156 return; 157 158 c->pix_norm1 = pix_norm1_altivec; 159 c->pix_sum = pix_sum_altivec; 160#endif /* HAVE_ALTIVEC */ 161} 162