1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * FFT transform, optimized with VSX built-in functions 3cabdff1aSopenharmony_ci * Copyright (c) 2014 Rong Yan 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This algorithm (though not any of the implementation details) is 6cabdff1aSopenharmony_ci * based on libdjbfft by D. J. Bernstein. 7cabdff1aSopenharmony_ci * 8cabdff1aSopenharmony_ci * This file is part of FFmpeg. 9cabdff1aSopenharmony_ci * 10cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 11cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 12cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 13cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 14cabdff1aSopenharmony_ci * 15cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 16cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 17cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18cabdff1aSopenharmony_ci * Lesser General Public License for more details. 19cabdff1aSopenharmony_ci * 20cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 21cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 22cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23cabdff1aSopenharmony_ci */ 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ci 26cabdff1aSopenharmony_ci#include "config.h" 27cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 28cabdff1aSopenharmony_ci#include "libavutil/ppc/util_altivec.h" 29cabdff1aSopenharmony_ci#include "libavcodec/fft.h" 30cabdff1aSopenharmony_ci#include "libavcodec/fft-internal.h" 31cabdff1aSopenharmony_ci#include "fft_vsx.h" 32cabdff1aSopenharmony_ci 33cabdff1aSopenharmony_ci#if HAVE_VSX 34cabdff1aSopenharmony_ci 35cabdff1aSopenharmony_cistatic void fft32_vsx_interleave(FFTComplex *z) 36cabdff1aSopenharmony_ci{ 37cabdff1aSopenharmony_ci fft16_vsx_interleave(z); 38cabdff1aSopenharmony_ci fft8_vsx_interleave(z+16); 39cabdff1aSopenharmony_ci fft8_vsx_interleave(z+24); 40cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_32,4); 41cabdff1aSopenharmony_ci} 42cabdff1aSopenharmony_ci 43cabdff1aSopenharmony_cistatic void fft64_vsx_interleave(FFTComplex *z) 44cabdff1aSopenharmony_ci{ 45cabdff1aSopenharmony_ci fft32_vsx_interleave(z); 46cabdff1aSopenharmony_ci fft16_vsx_interleave(z+32); 47cabdff1aSopenharmony_ci fft16_vsx_interleave(z+48); 48cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_64, 8); 49cabdff1aSopenharmony_ci} 50cabdff1aSopenharmony_cistatic void fft128_vsx_interleave(FFTComplex *z) 51cabdff1aSopenharmony_ci{ 52cabdff1aSopenharmony_ci fft64_vsx_interleave(z); 53cabdff1aSopenharmony_ci fft32_vsx_interleave(z+64); 54cabdff1aSopenharmony_ci fft32_vsx_interleave(z+96); 55cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_128,16); 56cabdff1aSopenharmony_ci} 57cabdff1aSopenharmony_cistatic void fft256_vsx_interleave(FFTComplex *z) 58cabdff1aSopenharmony_ci{ 59cabdff1aSopenharmony_ci fft128_vsx_interleave(z); 60cabdff1aSopenharmony_ci fft64_vsx_interleave(z+128); 61cabdff1aSopenharmony_ci fft64_vsx_interleave(z+192); 62cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_256,32); 63cabdff1aSopenharmony_ci} 64cabdff1aSopenharmony_cistatic void fft512_vsx_interleave(FFTComplex *z) 65cabdff1aSopenharmony_ci{ 66cabdff1aSopenharmony_ci fft256_vsx_interleave(z); 67cabdff1aSopenharmony_ci fft128_vsx_interleave(z+256); 68cabdff1aSopenharmony_ci fft128_vsx_interleave(z+384); 69cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_512,64); 70cabdff1aSopenharmony_ci} 71cabdff1aSopenharmony_cistatic void fft1024_vsx_interleave(FFTComplex *z) 72cabdff1aSopenharmony_ci{ 73cabdff1aSopenharmony_ci fft512_vsx_interleave(z); 74cabdff1aSopenharmony_ci fft256_vsx_interleave(z+512); 75cabdff1aSopenharmony_ci fft256_vsx_interleave(z+768); 76cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_1024,128); 77cabdff1aSopenharmony_ci 78cabdff1aSopenharmony_ci} 79cabdff1aSopenharmony_cistatic void fft2048_vsx_interleave(FFTComplex *z) 80cabdff1aSopenharmony_ci{ 81cabdff1aSopenharmony_ci fft1024_vsx_interleave(z); 82cabdff1aSopenharmony_ci fft512_vsx_interleave(z+1024); 83cabdff1aSopenharmony_ci fft512_vsx_interleave(z+1536); 84cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_2048,256); 85cabdff1aSopenharmony_ci} 86cabdff1aSopenharmony_cistatic void fft4096_vsx_interleave(FFTComplex *z) 87cabdff1aSopenharmony_ci{ 88cabdff1aSopenharmony_ci fft2048_vsx_interleave(z); 89cabdff1aSopenharmony_ci fft1024_vsx_interleave(z+2048); 90cabdff1aSopenharmony_ci fft1024_vsx_interleave(z+3072); 91cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_4096, 512); 92cabdff1aSopenharmony_ci} 93cabdff1aSopenharmony_cistatic void fft8192_vsx_interleave(FFTComplex *z) 94cabdff1aSopenharmony_ci{ 95cabdff1aSopenharmony_ci fft4096_vsx_interleave(z); 96cabdff1aSopenharmony_ci fft2048_vsx_interleave(z+4096); 97cabdff1aSopenharmony_ci fft2048_vsx_interleave(z+6144); 98cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_8192,1024); 99cabdff1aSopenharmony_ci} 100cabdff1aSopenharmony_cistatic void fft16384_vsx_interleave(FFTComplex *z) 101cabdff1aSopenharmony_ci{ 102cabdff1aSopenharmony_ci fft8192_vsx_interleave(z); 103cabdff1aSopenharmony_ci fft4096_vsx_interleave(z+8192); 104cabdff1aSopenharmony_ci fft4096_vsx_interleave(z+12288); 105cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_16384,2048); 106cabdff1aSopenharmony_ci} 107cabdff1aSopenharmony_cistatic void fft32768_vsx_interleave(FFTComplex *z) 108cabdff1aSopenharmony_ci{ 109cabdff1aSopenharmony_ci fft16384_vsx_interleave(z); 110cabdff1aSopenharmony_ci fft8192_vsx_interleave(z+16384); 111cabdff1aSopenharmony_ci fft8192_vsx_interleave(z+24576); 112cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_32768,4096); 113cabdff1aSopenharmony_ci} 114cabdff1aSopenharmony_cistatic void fft65536_vsx_interleave(FFTComplex *z) 115cabdff1aSopenharmony_ci{ 116cabdff1aSopenharmony_ci fft32768_vsx_interleave(z); 117cabdff1aSopenharmony_ci fft16384_vsx_interleave(z+32768); 118cabdff1aSopenharmony_ci fft16384_vsx_interleave(z+49152); 119cabdff1aSopenharmony_ci pass_vsx_interleave(z,ff_cos_65536,8192); 120cabdff1aSopenharmony_ci} 121cabdff1aSopenharmony_ci 122cabdff1aSopenharmony_cistatic void fft32_vsx(FFTComplex *z) 123cabdff1aSopenharmony_ci{ 124cabdff1aSopenharmony_ci fft16_vsx(z); 125cabdff1aSopenharmony_ci fft8_vsx(z+16); 126cabdff1aSopenharmony_ci fft8_vsx(z+24); 127cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_32,4); 128cabdff1aSopenharmony_ci} 129cabdff1aSopenharmony_ci 130cabdff1aSopenharmony_cistatic void fft64_vsx(FFTComplex *z) 131cabdff1aSopenharmony_ci{ 132cabdff1aSopenharmony_ci fft32_vsx(z); 133cabdff1aSopenharmony_ci fft16_vsx(z+32); 134cabdff1aSopenharmony_ci fft16_vsx(z+48); 135cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_64, 8); 136cabdff1aSopenharmony_ci} 137cabdff1aSopenharmony_cistatic void fft128_vsx(FFTComplex *z) 138cabdff1aSopenharmony_ci{ 139cabdff1aSopenharmony_ci fft64_vsx(z); 140cabdff1aSopenharmony_ci fft32_vsx(z+64); 141cabdff1aSopenharmony_ci fft32_vsx(z+96); 142cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_128,16); 143cabdff1aSopenharmony_ci} 144cabdff1aSopenharmony_cistatic void fft256_vsx(FFTComplex *z) 145cabdff1aSopenharmony_ci{ 146cabdff1aSopenharmony_ci fft128_vsx(z); 147cabdff1aSopenharmony_ci fft64_vsx(z+128); 148cabdff1aSopenharmony_ci fft64_vsx(z+192); 149cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_256,32); 150cabdff1aSopenharmony_ci} 151cabdff1aSopenharmony_cistatic void fft512_vsx(FFTComplex *z) 152cabdff1aSopenharmony_ci{ 153cabdff1aSopenharmony_ci fft256_vsx(z); 154cabdff1aSopenharmony_ci fft128_vsx(z+256); 155cabdff1aSopenharmony_ci fft128_vsx(z+384); 156cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_512,64); 157cabdff1aSopenharmony_ci} 158cabdff1aSopenharmony_cistatic void fft1024_vsx(FFTComplex *z) 159cabdff1aSopenharmony_ci{ 160cabdff1aSopenharmony_ci fft512_vsx(z); 161cabdff1aSopenharmony_ci fft256_vsx(z+512); 162cabdff1aSopenharmony_ci fft256_vsx(z+768); 163cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_1024,128); 164cabdff1aSopenharmony_ci 165cabdff1aSopenharmony_ci} 166cabdff1aSopenharmony_cistatic void fft2048_vsx(FFTComplex *z) 167cabdff1aSopenharmony_ci{ 168cabdff1aSopenharmony_ci fft1024_vsx(z); 169cabdff1aSopenharmony_ci fft512_vsx(z+1024); 170cabdff1aSopenharmony_ci fft512_vsx(z+1536); 171cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_2048,256); 172cabdff1aSopenharmony_ci} 173cabdff1aSopenharmony_cistatic void fft4096_vsx(FFTComplex *z) 174cabdff1aSopenharmony_ci{ 175cabdff1aSopenharmony_ci fft2048_vsx(z); 176cabdff1aSopenharmony_ci fft1024_vsx(z+2048); 177cabdff1aSopenharmony_ci fft1024_vsx(z+3072); 178cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_4096, 512); 179cabdff1aSopenharmony_ci} 180cabdff1aSopenharmony_cistatic void fft8192_vsx(FFTComplex *z) 181cabdff1aSopenharmony_ci{ 182cabdff1aSopenharmony_ci fft4096_vsx(z); 183cabdff1aSopenharmony_ci fft2048_vsx(z+4096); 184cabdff1aSopenharmony_ci fft2048_vsx(z+6144); 185cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_8192,1024); 186cabdff1aSopenharmony_ci} 187cabdff1aSopenharmony_cistatic void fft16384_vsx(FFTComplex *z) 188cabdff1aSopenharmony_ci{ 189cabdff1aSopenharmony_ci fft8192_vsx(z); 190cabdff1aSopenharmony_ci fft4096_vsx(z+8192); 191cabdff1aSopenharmony_ci fft4096_vsx(z+12288); 192cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_16384,2048); 193cabdff1aSopenharmony_ci} 194cabdff1aSopenharmony_cistatic void fft32768_vsx(FFTComplex *z) 195cabdff1aSopenharmony_ci{ 196cabdff1aSopenharmony_ci fft16384_vsx(z); 197cabdff1aSopenharmony_ci fft8192_vsx(z+16384); 198cabdff1aSopenharmony_ci fft8192_vsx(z+24576); 199cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_32768,4096); 200cabdff1aSopenharmony_ci} 201cabdff1aSopenharmony_cistatic void fft65536_vsx(FFTComplex *z) 202cabdff1aSopenharmony_ci{ 203cabdff1aSopenharmony_ci fft32768_vsx(z); 204cabdff1aSopenharmony_ci fft16384_vsx(z+32768); 205cabdff1aSopenharmony_ci fft16384_vsx(z+49152); 206cabdff1aSopenharmony_ci pass_vsx(z,ff_cos_65536,8192); 207cabdff1aSopenharmony_ci} 208cabdff1aSopenharmony_ci 209cabdff1aSopenharmony_cistatic void (* const fft_dispatch_vsx[])(FFTComplex*) = { 210cabdff1aSopenharmony_ci fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx, 211cabdff1aSopenharmony_ci fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx, 212cabdff1aSopenharmony_ci}; 213cabdff1aSopenharmony_cistatic void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = { 214cabdff1aSopenharmony_ci fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave, 215cabdff1aSopenharmony_ci fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave, 216cabdff1aSopenharmony_ci fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave, 217cabdff1aSopenharmony_ci}; 218cabdff1aSopenharmony_civoid ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z) 219cabdff1aSopenharmony_ci{ 220cabdff1aSopenharmony_ci fft_dispatch_vsx_interleave[s->nbits-2](z); 221cabdff1aSopenharmony_ci} 222cabdff1aSopenharmony_civoid ff_fft_calc_vsx(FFTContext *s, FFTComplex *z) 223cabdff1aSopenharmony_ci{ 224cabdff1aSopenharmony_ci fft_dispatch_vsx[s->nbits-2](z); 225cabdff1aSopenharmony_ci} 226cabdff1aSopenharmony_ci#endif /* HAVE_VSX */ 227