1/* 2 * FFT transform, optimized with VSX built-in functions 3 * Copyright (c) 2014 Rong Yan 4 * 5 * This algorithm (though not any of the implementation details) is 6 * based on libdjbfft by D. J. Bernstein. 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25 26#include "config.h" 27#include "libavutil/cpu.h" 28#include "libavutil/ppc/util_altivec.h" 29#include "libavcodec/fft.h" 30#include "libavcodec/fft-internal.h" 31#include "fft_vsx.h" 32 33#if HAVE_VSX 34 35static void fft32_vsx_interleave(FFTComplex *z) 36{ 37 fft16_vsx_interleave(z); 38 fft8_vsx_interleave(z+16); 39 fft8_vsx_interleave(z+24); 40 pass_vsx_interleave(z,ff_cos_32,4); 41} 42 43static void fft64_vsx_interleave(FFTComplex *z) 44{ 45 fft32_vsx_interleave(z); 46 fft16_vsx_interleave(z+32); 47 fft16_vsx_interleave(z+48); 48 pass_vsx_interleave(z,ff_cos_64, 8); 49} 50static void fft128_vsx_interleave(FFTComplex *z) 51{ 52 fft64_vsx_interleave(z); 53 fft32_vsx_interleave(z+64); 54 fft32_vsx_interleave(z+96); 55 pass_vsx_interleave(z,ff_cos_128,16); 56} 57static void fft256_vsx_interleave(FFTComplex *z) 58{ 59 fft128_vsx_interleave(z); 60 fft64_vsx_interleave(z+128); 61 fft64_vsx_interleave(z+192); 62 pass_vsx_interleave(z,ff_cos_256,32); 63} 64static void fft512_vsx_interleave(FFTComplex *z) 65{ 66 fft256_vsx_interleave(z); 67 fft128_vsx_interleave(z+256); 68 fft128_vsx_interleave(z+384); 69 pass_vsx_interleave(z,ff_cos_512,64); 70} 71static void fft1024_vsx_interleave(FFTComplex *z) 72{ 73 fft512_vsx_interleave(z); 74 fft256_vsx_interleave(z+512); 75 fft256_vsx_interleave(z+768); 76 pass_vsx_interleave(z,ff_cos_1024,128); 77 78} 79static void fft2048_vsx_interleave(FFTComplex *z) 80{ 81 fft1024_vsx_interleave(z); 82 fft512_vsx_interleave(z+1024); 83 fft512_vsx_interleave(z+1536); 84 pass_vsx_interleave(z,ff_cos_2048,256); 85} 86static void fft4096_vsx_interleave(FFTComplex *z) 87{ 88 fft2048_vsx_interleave(z); 89 fft1024_vsx_interleave(z+2048); 90 fft1024_vsx_interleave(z+3072); 91 pass_vsx_interleave(z,ff_cos_4096, 512); 92} 93static void fft8192_vsx_interleave(FFTComplex *z) 94{ 95 fft4096_vsx_interleave(z); 96 fft2048_vsx_interleave(z+4096); 97 fft2048_vsx_interleave(z+6144); 98 pass_vsx_interleave(z,ff_cos_8192,1024); 99} 100static void fft16384_vsx_interleave(FFTComplex *z) 101{ 102 fft8192_vsx_interleave(z); 103 fft4096_vsx_interleave(z+8192); 104 fft4096_vsx_interleave(z+12288); 105 pass_vsx_interleave(z,ff_cos_16384,2048); 106} 107static void fft32768_vsx_interleave(FFTComplex *z) 108{ 109 fft16384_vsx_interleave(z); 110 fft8192_vsx_interleave(z+16384); 111 fft8192_vsx_interleave(z+24576); 112 pass_vsx_interleave(z,ff_cos_32768,4096); 113} 114static void fft65536_vsx_interleave(FFTComplex *z) 115{ 116 fft32768_vsx_interleave(z); 117 fft16384_vsx_interleave(z+32768); 118 fft16384_vsx_interleave(z+49152); 119 pass_vsx_interleave(z,ff_cos_65536,8192); 120} 121 122static void fft32_vsx(FFTComplex *z) 123{ 124 fft16_vsx(z); 125 fft8_vsx(z+16); 126 fft8_vsx(z+24); 127 pass_vsx(z,ff_cos_32,4); 128} 129 130static void fft64_vsx(FFTComplex *z) 131{ 132 fft32_vsx(z); 133 fft16_vsx(z+32); 134 fft16_vsx(z+48); 135 pass_vsx(z,ff_cos_64, 8); 136} 137static void fft128_vsx(FFTComplex *z) 138{ 139 fft64_vsx(z); 140 fft32_vsx(z+64); 141 fft32_vsx(z+96); 142 pass_vsx(z,ff_cos_128,16); 143} 144static void fft256_vsx(FFTComplex *z) 145{ 146 fft128_vsx(z); 147 fft64_vsx(z+128); 148 fft64_vsx(z+192); 149 pass_vsx(z,ff_cos_256,32); 150} 151static void fft512_vsx(FFTComplex *z) 152{ 153 fft256_vsx(z); 154 fft128_vsx(z+256); 155 fft128_vsx(z+384); 156 pass_vsx(z,ff_cos_512,64); 157} 158static void fft1024_vsx(FFTComplex *z) 159{ 160 fft512_vsx(z); 161 fft256_vsx(z+512); 162 fft256_vsx(z+768); 163 pass_vsx(z,ff_cos_1024,128); 164 165} 166static void fft2048_vsx(FFTComplex *z) 167{ 168 fft1024_vsx(z); 169 fft512_vsx(z+1024); 170 fft512_vsx(z+1536); 171 pass_vsx(z,ff_cos_2048,256); 172} 173static void fft4096_vsx(FFTComplex *z) 174{ 175 fft2048_vsx(z); 176 fft1024_vsx(z+2048); 177 fft1024_vsx(z+3072); 178 pass_vsx(z,ff_cos_4096, 512); 179} 180static void fft8192_vsx(FFTComplex *z) 181{ 182 fft4096_vsx(z); 183 fft2048_vsx(z+4096); 184 fft2048_vsx(z+6144); 185 pass_vsx(z,ff_cos_8192,1024); 186} 187static void fft16384_vsx(FFTComplex *z) 188{ 189 fft8192_vsx(z); 190 fft4096_vsx(z+8192); 191 fft4096_vsx(z+12288); 192 pass_vsx(z,ff_cos_16384,2048); 193} 194static void fft32768_vsx(FFTComplex *z) 195{ 196 fft16384_vsx(z); 197 fft8192_vsx(z+16384); 198 fft8192_vsx(z+24576); 199 pass_vsx(z,ff_cos_32768,4096); 200} 201static void fft65536_vsx(FFTComplex *z) 202{ 203 fft32768_vsx(z); 204 fft16384_vsx(z+32768); 205 fft16384_vsx(z+49152); 206 pass_vsx(z,ff_cos_65536,8192); 207} 208 209static void (* const fft_dispatch_vsx[])(FFTComplex*) = { 210 fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx, 211 fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx, 212}; 213static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = { 214 fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave, 215 fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave, 216 fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave, 217}; 218void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z) 219{ 220 fft_dispatch_vsx_interleave[s->nbits-2](z); 221} 222void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z) 223{ 224 fft_dispatch_vsx[s->nbits-2](z); 225} 226#endif /* HAVE_VSX */ 227