xref: /third_party/ffmpeg/libavcodec/ppc/fft_vsx.c (revision cabdff1a)
1/*
2 * FFT  transform, optimized with VSX built-in functions
3 * Copyright (c) 2014 Rong Yan
4 *
5 * This algorithm (though not any of the implementation details) is
6 * based on libdjbfft by D. J. Bernstein.
7 *
8 * This file is part of FFmpeg.
9 *
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25
26#include "config.h"
27#include "libavutil/cpu.h"
28#include "libavutil/ppc/util_altivec.h"
29#include "libavcodec/fft.h"
30#include "libavcodec/fft-internal.h"
31#include "fft_vsx.h"
32
33#if HAVE_VSX
34
35static void fft32_vsx_interleave(FFTComplex *z)
36{
37    fft16_vsx_interleave(z);
38    fft8_vsx_interleave(z+16);
39    fft8_vsx_interleave(z+24);
40    pass_vsx_interleave(z,ff_cos_32,4);
41}
42
43static void fft64_vsx_interleave(FFTComplex *z)
44{
45    fft32_vsx_interleave(z);
46    fft16_vsx_interleave(z+32);
47    fft16_vsx_interleave(z+48);
48    pass_vsx_interleave(z,ff_cos_64, 8);
49}
50static void fft128_vsx_interleave(FFTComplex *z)
51{
52    fft64_vsx_interleave(z);
53    fft32_vsx_interleave(z+64);
54    fft32_vsx_interleave(z+96);
55    pass_vsx_interleave(z,ff_cos_128,16);
56}
57static void fft256_vsx_interleave(FFTComplex *z)
58{
59    fft128_vsx_interleave(z);
60    fft64_vsx_interleave(z+128);
61    fft64_vsx_interleave(z+192);
62    pass_vsx_interleave(z,ff_cos_256,32);
63}
64static void fft512_vsx_interleave(FFTComplex *z)
65{
66    fft256_vsx_interleave(z);
67    fft128_vsx_interleave(z+256);
68    fft128_vsx_interleave(z+384);
69    pass_vsx_interleave(z,ff_cos_512,64);
70}
71static void fft1024_vsx_interleave(FFTComplex *z)
72{
73    fft512_vsx_interleave(z);
74    fft256_vsx_interleave(z+512);
75    fft256_vsx_interleave(z+768);
76    pass_vsx_interleave(z,ff_cos_1024,128);
77
78}
79static void fft2048_vsx_interleave(FFTComplex *z)
80{
81    fft1024_vsx_interleave(z);
82    fft512_vsx_interleave(z+1024);
83    fft512_vsx_interleave(z+1536);
84    pass_vsx_interleave(z,ff_cos_2048,256);
85}
86static void fft4096_vsx_interleave(FFTComplex *z)
87{
88    fft2048_vsx_interleave(z);
89    fft1024_vsx_interleave(z+2048);
90    fft1024_vsx_interleave(z+3072);
91    pass_vsx_interleave(z,ff_cos_4096, 512);
92}
93static void fft8192_vsx_interleave(FFTComplex *z)
94{
95    fft4096_vsx_interleave(z);
96    fft2048_vsx_interleave(z+4096);
97    fft2048_vsx_interleave(z+6144);
98    pass_vsx_interleave(z,ff_cos_8192,1024);
99}
100static void fft16384_vsx_interleave(FFTComplex *z)
101{
102    fft8192_vsx_interleave(z);
103    fft4096_vsx_interleave(z+8192);
104    fft4096_vsx_interleave(z+12288);
105    pass_vsx_interleave(z,ff_cos_16384,2048);
106}
107static void fft32768_vsx_interleave(FFTComplex *z)
108{
109    fft16384_vsx_interleave(z);
110    fft8192_vsx_interleave(z+16384);
111    fft8192_vsx_interleave(z+24576);
112    pass_vsx_interleave(z,ff_cos_32768,4096);
113}
114static void fft65536_vsx_interleave(FFTComplex *z)
115{
116    fft32768_vsx_interleave(z);
117    fft16384_vsx_interleave(z+32768);
118    fft16384_vsx_interleave(z+49152);
119    pass_vsx_interleave(z,ff_cos_65536,8192);
120}
121
122static void fft32_vsx(FFTComplex *z)
123{
124    fft16_vsx(z);
125    fft8_vsx(z+16);
126    fft8_vsx(z+24);
127    pass_vsx(z,ff_cos_32,4);
128}
129
130static void fft64_vsx(FFTComplex *z)
131{
132    fft32_vsx(z);
133    fft16_vsx(z+32);
134    fft16_vsx(z+48);
135    pass_vsx(z,ff_cos_64, 8);
136}
137static void fft128_vsx(FFTComplex *z)
138{
139    fft64_vsx(z);
140    fft32_vsx(z+64);
141    fft32_vsx(z+96);
142    pass_vsx(z,ff_cos_128,16);
143}
144static void fft256_vsx(FFTComplex *z)
145{
146    fft128_vsx(z);
147    fft64_vsx(z+128);
148    fft64_vsx(z+192);
149    pass_vsx(z,ff_cos_256,32);
150}
151static void fft512_vsx(FFTComplex *z)
152{
153    fft256_vsx(z);
154    fft128_vsx(z+256);
155    fft128_vsx(z+384);
156    pass_vsx(z,ff_cos_512,64);
157}
158static void fft1024_vsx(FFTComplex *z)
159{
160    fft512_vsx(z);
161    fft256_vsx(z+512);
162    fft256_vsx(z+768);
163    pass_vsx(z,ff_cos_1024,128);
164
165}
166static void fft2048_vsx(FFTComplex *z)
167{
168    fft1024_vsx(z);
169    fft512_vsx(z+1024);
170    fft512_vsx(z+1536);
171    pass_vsx(z,ff_cos_2048,256);
172}
173static void fft4096_vsx(FFTComplex *z)
174{
175    fft2048_vsx(z);
176    fft1024_vsx(z+2048);
177    fft1024_vsx(z+3072);
178    pass_vsx(z,ff_cos_4096, 512);
179}
180static void fft8192_vsx(FFTComplex *z)
181{
182    fft4096_vsx(z);
183    fft2048_vsx(z+4096);
184    fft2048_vsx(z+6144);
185    pass_vsx(z,ff_cos_8192,1024);
186}
187static void fft16384_vsx(FFTComplex *z)
188{
189    fft8192_vsx(z);
190    fft4096_vsx(z+8192);
191    fft4096_vsx(z+12288);
192    pass_vsx(z,ff_cos_16384,2048);
193}
194static void fft32768_vsx(FFTComplex *z)
195{
196    fft16384_vsx(z);
197    fft8192_vsx(z+16384);
198    fft8192_vsx(z+24576);
199    pass_vsx(z,ff_cos_32768,4096);
200}
201static void fft65536_vsx(FFTComplex *z)
202{
203    fft32768_vsx(z);
204    fft16384_vsx(z+32768);
205    fft16384_vsx(z+49152);
206    pass_vsx(z,ff_cos_65536,8192);
207}
208
209static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
210    fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
211    fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
212};
213static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
214    fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
215    fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
216    fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
217};
218void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
219{
220     fft_dispatch_vsx_interleave[s->nbits-2](z);
221}
222void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
223{
224     fft_dispatch_vsx[s->nbits-2](z);
225}
226#endif /* HAVE_VSX */
227