xref: /third_party/ffmpeg/libavcodec/ppc/fft_vsx.c (revision cabdff1a)
1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * FFT  transform, optimized with VSX built-in functions
3cabdff1aSopenharmony_ci * Copyright (c) 2014 Rong Yan
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This algorithm (though not any of the implementation details) is
6cabdff1aSopenharmony_ci * based on libdjbfft by D. J. Bernstein.
7cabdff1aSopenharmony_ci *
8cabdff1aSopenharmony_ci * This file is part of FFmpeg.
9cabdff1aSopenharmony_ci *
10cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
11cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
12cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
13cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
14cabdff1aSopenharmony_ci *
15cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
16cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
17cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18cabdff1aSopenharmony_ci * Lesser General Public License for more details.
19cabdff1aSopenharmony_ci *
20cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
21cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
22cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23cabdff1aSopenharmony_ci */
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci#include "config.h"
27cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
28cabdff1aSopenharmony_ci#include "libavutil/ppc/util_altivec.h"
29cabdff1aSopenharmony_ci#include "libavcodec/fft.h"
30cabdff1aSopenharmony_ci#include "libavcodec/fft-internal.h"
31cabdff1aSopenharmony_ci#include "fft_vsx.h"
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ci#if HAVE_VSX
34cabdff1aSopenharmony_ci
35cabdff1aSopenharmony_cistatic void fft32_vsx_interleave(FFTComplex *z)
36cabdff1aSopenharmony_ci{
37cabdff1aSopenharmony_ci    fft16_vsx_interleave(z);
38cabdff1aSopenharmony_ci    fft8_vsx_interleave(z+16);
39cabdff1aSopenharmony_ci    fft8_vsx_interleave(z+24);
40cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_32,4);
41cabdff1aSopenharmony_ci}
42cabdff1aSopenharmony_ci
43cabdff1aSopenharmony_cistatic void fft64_vsx_interleave(FFTComplex *z)
44cabdff1aSopenharmony_ci{
45cabdff1aSopenharmony_ci    fft32_vsx_interleave(z);
46cabdff1aSopenharmony_ci    fft16_vsx_interleave(z+32);
47cabdff1aSopenharmony_ci    fft16_vsx_interleave(z+48);
48cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_64, 8);
49cabdff1aSopenharmony_ci}
50cabdff1aSopenharmony_cistatic void fft128_vsx_interleave(FFTComplex *z)
51cabdff1aSopenharmony_ci{
52cabdff1aSopenharmony_ci    fft64_vsx_interleave(z);
53cabdff1aSopenharmony_ci    fft32_vsx_interleave(z+64);
54cabdff1aSopenharmony_ci    fft32_vsx_interleave(z+96);
55cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_128,16);
56cabdff1aSopenharmony_ci}
57cabdff1aSopenharmony_cistatic void fft256_vsx_interleave(FFTComplex *z)
58cabdff1aSopenharmony_ci{
59cabdff1aSopenharmony_ci    fft128_vsx_interleave(z);
60cabdff1aSopenharmony_ci    fft64_vsx_interleave(z+128);
61cabdff1aSopenharmony_ci    fft64_vsx_interleave(z+192);
62cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_256,32);
63cabdff1aSopenharmony_ci}
64cabdff1aSopenharmony_cistatic void fft512_vsx_interleave(FFTComplex *z)
65cabdff1aSopenharmony_ci{
66cabdff1aSopenharmony_ci    fft256_vsx_interleave(z);
67cabdff1aSopenharmony_ci    fft128_vsx_interleave(z+256);
68cabdff1aSopenharmony_ci    fft128_vsx_interleave(z+384);
69cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_512,64);
70cabdff1aSopenharmony_ci}
71cabdff1aSopenharmony_cistatic void fft1024_vsx_interleave(FFTComplex *z)
72cabdff1aSopenharmony_ci{
73cabdff1aSopenharmony_ci    fft512_vsx_interleave(z);
74cabdff1aSopenharmony_ci    fft256_vsx_interleave(z+512);
75cabdff1aSopenharmony_ci    fft256_vsx_interleave(z+768);
76cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_1024,128);
77cabdff1aSopenharmony_ci
78cabdff1aSopenharmony_ci}
79cabdff1aSopenharmony_cistatic void fft2048_vsx_interleave(FFTComplex *z)
80cabdff1aSopenharmony_ci{
81cabdff1aSopenharmony_ci    fft1024_vsx_interleave(z);
82cabdff1aSopenharmony_ci    fft512_vsx_interleave(z+1024);
83cabdff1aSopenharmony_ci    fft512_vsx_interleave(z+1536);
84cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_2048,256);
85cabdff1aSopenharmony_ci}
86cabdff1aSopenharmony_cistatic void fft4096_vsx_interleave(FFTComplex *z)
87cabdff1aSopenharmony_ci{
88cabdff1aSopenharmony_ci    fft2048_vsx_interleave(z);
89cabdff1aSopenharmony_ci    fft1024_vsx_interleave(z+2048);
90cabdff1aSopenharmony_ci    fft1024_vsx_interleave(z+3072);
91cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_4096, 512);
92cabdff1aSopenharmony_ci}
93cabdff1aSopenharmony_cistatic void fft8192_vsx_interleave(FFTComplex *z)
94cabdff1aSopenharmony_ci{
95cabdff1aSopenharmony_ci    fft4096_vsx_interleave(z);
96cabdff1aSopenharmony_ci    fft2048_vsx_interleave(z+4096);
97cabdff1aSopenharmony_ci    fft2048_vsx_interleave(z+6144);
98cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_8192,1024);
99cabdff1aSopenharmony_ci}
100cabdff1aSopenharmony_cistatic void fft16384_vsx_interleave(FFTComplex *z)
101cabdff1aSopenharmony_ci{
102cabdff1aSopenharmony_ci    fft8192_vsx_interleave(z);
103cabdff1aSopenharmony_ci    fft4096_vsx_interleave(z+8192);
104cabdff1aSopenharmony_ci    fft4096_vsx_interleave(z+12288);
105cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_16384,2048);
106cabdff1aSopenharmony_ci}
107cabdff1aSopenharmony_cistatic void fft32768_vsx_interleave(FFTComplex *z)
108cabdff1aSopenharmony_ci{
109cabdff1aSopenharmony_ci    fft16384_vsx_interleave(z);
110cabdff1aSopenharmony_ci    fft8192_vsx_interleave(z+16384);
111cabdff1aSopenharmony_ci    fft8192_vsx_interleave(z+24576);
112cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_32768,4096);
113cabdff1aSopenharmony_ci}
114cabdff1aSopenharmony_cistatic void fft65536_vsx_interleave(FFTComplex *z)
115cabdff1aSopenharmony_ci{
116cabdff1aSopenharmony_ci    fft32768_vsx_interleave(z);
117cabdff1aSopenharmony_ci    fft16384_vsx_interleave(z+32768);
118cabdff1aSopenharmony_ci    fft16384_vsx_interleave(z+49152);
119cabdff1aSopenharmony_ci    pass_vsx_interleave(z,ff_cos_65536,8192);
120cabdff1aSopenharmony_ci}
121cabdff1aSopenharmony_ci
122cabdff1aSopenharmony_cistatic void fft32_vsx(FFTComplex *z)
123cabdff1aSopenharmony_ci{
124cabdff1aSopenharmony_ci    fft16_vsx(z);
125cabdff1aSopenharmony_ci    fft8_vsx(z+16);
126cabdff1aSopenharmony_ci    fft8_vsx(z+24);
127cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_32,4);
128cabdff1aSopenharmony_ci}
129cabdff1aSopenharmony_ci
130cabdff1aSopenharmony_cistatic void fft64_vsx(FFTComplex *z)
131cabdff1aSopenharmony_ci{
132cabdff1aSopenharmony_ci    fft32_vsx(z);
133cabdff1aSopenharmony_ci    fft16_vsx(z+32);
134cabdff1aSopenharmony_ci    fft16_vsx(z+48);
135cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_64, 8);
136cabdff1aSopenharmony_ci}
137cabdff1aSopenharmony_cistatic void fft128_vsx(FFTComplex *z)
138cabdff1aSopenharmony_ci{
139cabdff1aSopenharmony_ci    fft64_vsx(z);
140cabdff1aSopenharmony_ci    fft32_vsx(z+64);
141cabdff1aSopenharmony_ci    fft32_vsx(z+96);
142cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_128,16);
143cabdff1aSopenharmony_ci}
144cabdff1aSopenharmony_cistatic void fft256_vsx(FFTComplex *z)
145cabdff1aSopenharmony_ci{
146cabdff1aSopenharmony_ci    fft128_vsx(z);
147cabdff1aSopenharmony_ci    fft64_vsx(z+128);
148cabdff1aSopenharmony_ci    fft64_vsx(z+192);
149cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_256,32);
150cabdff1aSopenharmony_ci}
151cabdff1aSopenharmony_cistatic void fft512_vsx(FFTComplex *z)
152cabdff1aSopenharmony_ci{
153cabdff1aSopenharmony_ci    fft256_vsx(z);
154cabdff1aSopenharmony_ci    fft128_vsx(z+256);
155cabdff1aSopenharmony_ci    fft128_vsx(z+384);
156cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_512,64);
157cabdff1aSopenharmony_ci}
158cabdff1aSopenharmony_cistatic void fft1024_vsx(FFTComplex *z)
159cabdff1aSopenharmony_ci{
160cabdff1aSopenharmony_ci    fft512_vsx(z);
161cabdff1aSopenharmony_ci    fft256_vsx(z+512);
162cabdff1aSopenharmony_ci    fft256_vsx(z+768);
163cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_1024,128);
164cabdff1aSopenharmony_ci
165cabdff1aSopenharmony_ci}
166cabdff1aSopenharmony_cistatic void fft2048_vsx(FFTComplex *z)
167cabdff1aSopenharmony_ci{
168cabdff1aSopenharmony_ci    fft1024_vsx(z);
169cabdff1aSopenharmony_ci    fft512_vsx(z+1024);
170cabdff1aSopenharmony_ci    fft512_vsx(z+1536);
171cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_2048,256);
172cabdff1aSopenharmony_ci}
173cabdff1aSopenharmony_cistatic void fft4096_vsx(FFTComplex *z)
174cabdff1aSopenharmony_ci{
175cabdff1aSopenharmony_ci    fft2048_vsx(z);
176cabdff1aSopenharmony_ci    fft1024_vsx(z+2048);
177cabdff1aSopenharmony_ci    fft1024_vsx(z+3072);
178cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_4096, 512);
179cabdff1aSopenharmony_ci}
180cabdff1aSopenharmony_cistatic void fft8192_vsx(FFTComplex *z)
181cabdff1aSopenharmony_ci{
182cabdff1aSopenharmony_ci    fft4096_vsx(z);
183cabdff1aSopenharmony_ci    fft2048_vsx(z+4096);
184cabdff1aSopenharmony_ci    fft2048_vsx(z+6144);
185cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_8192,1024);
186cabdff1aSopenharmony_ci}
187cabdff1aSopenharmony_cistatic void fft16384_vsx(FFTComplex *z)
188cabdff1aSopenharmony_ci{
189cabdff1aSopenharmony_ci    fft8192_vsx(z);
190cabdff1aSopenharmony_ci    fft4096_vsx(z+8192);
191cabdff1aSopenharmony_ci    fft4096_vsx(z+12288);
192cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_16384,2048);
193cabdff1aSopenharmony_ci}
194cabdff1aSopenharmony_cistatic void fft32768_vsx(FFTComplex *z)
195cabdff1aSopenharmony_ci{
196cabdff1aSopenharmony_ci    fft16384_vsx(z);
197cabdff1aSopenharmony_ci    fft8192_vsx(z+16384);
198cabdff1aSopenharmony_ci    fft8192_vsx(z+24576);
199cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_32768,4096);
200cabdff1aSopenharmony_ci}
201cabdff1aSopenharmony_cistatic void fft65536_vsx(FFTComplex *z)
202cabdff1aSopenharmony_ci{
203cabdff1aSopenharmony_ci    fft32768_vsx(z);
204cabdff1aSopenharmony_ci    fft16384_vsx(z+32768);
205cabdff1aSopenharmony_ci    fft16384_vsx(z+49152);
206cabdff1aSopenharmony_ci    pass_vsx(z,ff_cos_65536,8192);
207cabdff1aSopenharmony_ci}
208cabdff1aSopenharmony_ci
209cabdff1aSopenharmony_cistatic void (* const fft_dispatch_vsx[])(FFTComplex*) = {
210cabdff1aSopenharmony_ci    fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
211cabdff1aSopenharmony_ci    fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
212cabdff1aSopenharmony_ci};
213cabdff1aSopenharmony_cistatic void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
214cabdff1aSopenharmony_ci    fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
215cabdff1aSopenharmony_ci    fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
216cabdff1aSopenharmony_ci    fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
217cabdff1aSopenharmony_ci};
218cabdff1aSopenharmony_civoid ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
219cabdff1aSopenharmony_ci{
220cabdff1aSopenharmony_ci     fft_dispatch_vsx_interleave[s->nbits-2](z);
221cabdff1aSopenharmony_ci}
222cabdff1aSopenharmony_civoid ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
223cabdff1aSopenharmony_ci{
224cabdff1aSopenharmony_ci     fft_dispatch_vsx[s->nbits-2](z);
225cabdff1aSopenharmony_ci}
226cabdff1aSopenharmony_ci#endif /* HAVE_VSX */
227