1159b3361Sopenharmony_ci; back port from GOGO-no coda 2.24b by Takehiro TOMINAGA
2159b3361Sopenharmony_ci
3159b3361Sopenharmony_ci; GOGO-no-coda
4159b3361Sopenharmony_ci;	Copyright (C) 1999 shigeo
5159b3361Sopenharmony_ci;	special thanks to URURI
6159b3361Sopenharmony_ci
7159b3361Sopenharmony_ci%include "nasm.h"
8159b3361Sopenharmony_ci
9159b3361Sopenharmony_ci	externdef costab_fft
10159b3361Sopenharmony_ci	externdef sintab_fft
11159b3361Sopenharmony_ci
12159b3361Sopenharmony_ci	segment_data
13159b3361Sopenharmony_ci	align 32
14159b3361Sopenharmony_ciD_1_41421	dd	1.41421356
15159b3361Sopenharmony_ciD_1_0	dd	1.0
16159b3361Sopenharmony_ciD_0_5	dd	0.5
17159b3361Sopenharmony_ciD_0_25	dd	0.25
18159b3361Sopenharmony_ciD_0_0005	dd	0.0005
19159b3361Sopenharmony_ciD_0_0	dd	0.0
20159b3361Sopenharmony_ci
21159b3361Sopenharmony_ci	segment_code
22159b3361Sopenharmony_ci
23159b3361Sopenharmony_ci;void fht(float *fz, int n);
24159b3361Sopenharmony_ciproc	fht_FPU
25159b3361Sopenharmony_ci
26159b3361Sopenharmony_ci%$fz	arg	4
27159b3361Sopenharmony_ci%$n	arg	4
28159b3361Sopenharmony_ci
29159b3361Sopenharmony_ci%$k	local	4
30159b3361Sopenharmony_ci
31159b3361Sopenharmony_ci%$f0	local	4
32159b3361Sopenharmony_ci%$f1	local	4
33159b3361Sopenharmony_ci%$f2	local	4
34159b3361Sopenharmony_ci%$f3	local	4
35159b3361Sopenharmony_ci
36159b3361Sopenharmony_ci%$g0	local	4
37159b3361Sopenharmony_ci%$g1	local	4
38159b3361Sopenharmony_ci%$g2	local	4
39159b3361Sopenharmony_ci%$g3	local	4
40159b3361Sopenharmony_ci
41159b3361Sopenharmony_ci%$s1	local	4
42159b3361Sopenharmony_ci%$c1	local	4
43159b3361Sopenharmony_ci%$s2	local	4
44159b3361Sopenharmony_ci%$c2	local	4
45159b3361Sopenharmony_ci
46159b3361Sopenharmony_ci%$t_s	local	4
47159b3361Sopenharmony_ci%$t_c	local	4
48159b3361Sopenharmony_ci	alloc
49159b3361Sopenharmony_ci
50159b3361Sopenharmony_ci	pushd	ebp, ebx, esi, edi
51159b3361Sopenharmony_ci
52159b3361Sopenharmony_cifht_FPU_1st_part:
53159b3361Sopenharmony_ci
54159b3361Sopenharmony_cifht_FPU_2nd_part:
55159b3361Sopenharmony_ci
56159b3361Sopenharmony_cifht_FPU_3rd_part:
57159b3361Sopenharmony_ci
58159b3361Sopenharmony_ci.do_init:
59159b3361Sopenharmony_ci	mov	r3, 16		;k1*fsize = 4*fsize = k4
60159b3361Sopenharmony_ci	mov	r4, 8		;kx = k1/2
61159b3361Sopenharmony_ci	mov	r2, 48		;k3*fsize
62159b3361Sopenharmony_ci	mov	dword [sp(%$k)], 2	;k = 2
63159b3361Sopenharmony_ci	mov	r0, [sp(%$fz)]	;fi
64159b3361Sopenharmony_ci	lea	r1, [r0+8]		;gi = fi + kx
65159b3361Sopenharmony_ci
66159b3361Sopenharmony_ci.do:
67159b3361Sopenharmony_ci.do2:
68159b3361Sopenharmony_ci	;f
69159b3361Sopenharmony_ci	fld	dword [r0]
70159b3361Sopenharmony_ci	fsub	dword [r0+r3]
71159b3361Sopenharmony_ci
72159b3361Sopenharmony_ci	fld	dword [r0]
73159b3361Sopenharmony_ci	fadd	dword [r0+r3]
74159b3361Sopenharmony_ci
75159b3361Sopenharmony_ci	fld	dword [r0+r3*2]
76159b3361Sopenharmony_ci	fsub	dword [r0+r2]
77159b3361Sopenharmony_ci
78159b3361Sopenharmony_ci	fld	dword [r0+r3*2]
79159b3361Sopenharmony_ci	fadd	dword [r0+r2]		;f2 f3 f0 f1
80159b3361Sopenharmony_ci
81159b3361Sopenharmony_ci	fld	st2			;f0 f2 f3 f0 f1
82159b3361Sopenharmony_ci	fadd	st0, st1
83159b3361Sopenharmony_ci	fstp	dword [r0]		;fi[0]
84159b3361Sopenharmony_ci
85159b3361Sopenharmony_ci	fld	st3			;f1 f2 f3 f0 f1
86159b3361Sopenharmony_ci	fadd	st0, st2
87159b3361Sopenharmony_ci	fstp	dword [r0+r3]		;fi[k1]
88159b3361Sopenharmony_ci
89159b3361Sopenharmony_ci	fsubr	st0, st2		;f0-f2 f3 f0 f1
90159b3361Sopenharmony_ci	fstp	dword [r0+r3*2]		;fi[k2]
91159b3361Sopenharmony_ci
92159b3361Sopenharmony_ci	fsubr	st0, st2		;f1-f3 f0 f1
93159b3361Sopenharmony_ci	fstp	dword [r0+r2]		;fi[k3]
94159b3361Sopenharmony_ci	fcompp
95159b3361Sopenharmony_ci
96159b3361Sopenharmony_ci	;g
97159b3361Sopenharmony_ci	fld	dword [r1]
98159b3361Sopenharmony_ci	fsub	dword [r1+r3]
99159b3361Sopenharmony_ci
100159b3361Sopenharmony_ci	fld	dword [r1]
101159b3361Sopenharmony_ci	fadd	dword [r1+r3]
102159b3361Sopenharmony_ci
103159b3361Sopenharmony_ci	fld	dword [D_1_41421]
104159b3361Sopenharmony_ci	fmul	dword [r1+r2]
105159b3361Sopenharmony_ci
106159b3361Sopenharmony_ci	fld	dword [D_1_41421]
107159b3361Sopenharmony_ci	fmul	dword [r1+r3*2]		;g2 g3 g0 g1
108159b3361Sopenharmony_ci
109159b3361Sopenharmony_ci	fld	st2			;g0 g2 g3 g0 g1
110159b3361Sopenharmony_ci	fadd	st0, st1
111159b3361Sopenharmony_ci	fstp	dword [r1]		;gi[0]
112159b3361Sopenharmony_ci
113159b3361Sopenharmony_ci	fld	st3			;g1 g2 g3 g0 g1
114159b3361Sopenharmony_ci	fadd	st0, st2
115159b3361Sopenharmony_ci	fstp	dword [r1+r3]		;gi[k1]
116159b3361Sopenharmony_ci
117159b3361Sopenharmony_ci	fsubr	st0, st2		;g0-g2 g3 g0 g1
118159b3361Sopenharmony_ci	fstp	dword [r1+r3*2]		;gi[k2]
119159b3361Sopenharmony_ci
120159b3361Sopenharmony_ci	fsubr	st0, st2		;g1-g3 g0 g1
121159b3361Sopenharmony_ci	fstp	dword [r1+r2]		;gi[k3]
122159b3361Sopenharmony_ci	fcompp
123159b3361Sopenharmony_ci
124159b3361Sopenharmony_ci	lea	r0, [r0+r3*4]
125159b3361Sopenharmony_ci	lea	r1, [r1+r3*4]
126159b3361Sopenharmony_ci	cmp	r0, r6
127159b3361Sopenharmony_ci	jb	.do2
128159b3361Sopenharmony_ci
129159b3361Sopenharmony_ci
130159b3361Sopenharmony_ci	mov	r0, [sp(%$k)]
131159b3361Sopenharmony_ci	fld	dword [costab_fft +r0*4]
132159b3361Sopenharmony_ci	fstp	dword [sp(%$t_c)]
133159b3361Sopenharmony_ci	fld	dword [sintab_fft +r0*4]
134159b3361Sopenharmony_ci	fstp	dword [sp(%$t_s)]
135159b3361Sopenharmony_ci	fld	dword [D_1_0]
136159b3361Sopenharmony_ci	fstp	dword [sp(%$c1)]
137159b3361Sopenharmony_ci	fld	dword [D_0_0]
138159b3361Sopenharmony_ci	fstp	dword [sp(%$s1)]
139159b3361Sopenharmony_ci
140159b3361Sopenharmony_ci.for_init:
141159b3361Sopenharmony_ci	mov	r5, 4		;i = 1*fsize
142159b3361Sopenharmony_ci
143159b3361Sopenharmony_ci.for:
144159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
145159b3361Sopenharmony_ci	fmul	dword [sp(%$t_c)]
146159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
147159b3361Sopenharmony_ci	fmul	dword [sp(%$t_s)]
148159b3361Sopenharmony_ci	fsubp	st1, st0		;c1
149159b3361Sopenharmony_ci
150159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
151159b3361Sopenharmony_ci	fmul	dword [sp(%$t_s)]
152159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
153159b3361Sopenharmony_ci	fmul	dword [sp(%$t_c)]
154159b3361Sopenharmony_ci	faddp	st1, st0		;s1 c1
155159b3361Sopenharmony_ci	
156159b3361Sopenharmony_ci	fld	st1
157159b3361Sopenharmony_ci	fmul	st0, st0		;c1c1 s1 c1
158159b3361Sopenharmony_ci	fld	st1
159159b3361Sopenharmony_ci	fmul	st0, st0		;s1s1 c1c1 s1 c1
160159b3361Sopenharmony_ci	fsubp	st1, st0		;c2 s1 c1
161159b3361Sopenharmony_ci	fstp	dword [sp(%$c2)]	;s1 c1
162159b3361Sopenharmony_ci
163159b3361Sopenharmony_ci	fld	st1			;c1 s1 c1
164159b3361Sopenharmony_ci	fmul	st0, st1		;c1s1 s1 c1
165159b3361Sopenharmony_ci	fadd	st0, st0		;s2 s1 c1
166159b3361Sopenharmony_ci	fstp	dword [sp(%$s2)]	;s1 c1
167159b3361Sopenharmony_ci
168159b3361Sopenharmony_ci	fstp	dword [sp(%$s1)]	;c1
169159b3361Sopenharmony_ci	fstp	dword [sp(%$c1)]	;
170159b3361Sopenharmony_ci	
171159b3361Sopenharmony_ci	mov	r0, [sp(%$fz)]
172159b3361Sopenharmony_ci	add	r0, r5		;r0 = fi
173159b3361Sopenharmony_ci	mov	r1, [sp(%$fz)]
174159b3361Sopenharmony_ci	add	r1, r3
175159b3361Sopenharmony_ci	sub	r1, r5		;r1 = gi
176159b3361Sopenharmony_ci
177159b3361Sopenharmony_ci.do3:
178159b3361Sopenharmony_ci	fld	dword [sp(%$s2)]
179159b3361Sopenharmony_ci	fmul	dword [r0+r3]
180159b3361Sopenharmony_ci	fld	dword [sp(%$c2)]
181159b3361Sopenharmony_ci	fmul	dword [r1+r3]
182159b3361Sopenharmony_ci	fsubp	st1, st0		;b = s2*fi[k1] - c2*gi[k1]
183159b3361Sopenharmony_ci
184159b3361Sopenharmony_ci	fld	dword [sp(%$c2)]
185159b3361Sopenharmony_ci	fmul	dword [r0+r3]
186159b3361Sopenharmony_ci	fld	dword [sp(%$s2)]
187159b3361Sopenharmony_ci	fmul	dword [r1+r3]
188159b3361Sopenharmony_ci	faddp	st1, st0		;a = c2*fi[k1] + s2*gi[k1]  b
189159b3361Sopenharmony_ci
190159b3361Sopenharmony_ci	fld	dword [r0]
191159b3361Sopenharmony_ci	fsub	st0, st1		;f1 a b
192159b3361Sopenharmony_ci	fstp	dword [sp(%$f1)]	;a b
193159b3361Sopenharmony_ci
194159b3361Sopenharmony_ci	fadd	dword [r0]		;f0 b
195159b3361Sopenharmony_ci	fstp	dword [sp(%$f0)]	;b
196159b3361Sopenharmony_ci
197159b3361Sopenharmony_ci	fld	dword [r1]
198159b3361Sopenharmony_ci	fsub	st0, st1		;g1 b
199159b3361Sopenharmony_ci	fstp	dword [sp(%$g1)]	;b
200159b3361Sopenharmony_ci
201159b3361Sopenharmony_ci	fadd	dword [r1]		;g0
202159b3361Sopenharmony_ci	fstp	dword [sp(%$g0)]	;
203159b3361Sopenharmony_ci
204159b3361Sopenharmony_ci
205159b3361Sopenharmony_ci	fld	dword [sp(%$s2)]
206159b3361Sopenharmony_ci	fmul	dword [r0+r2]
207159b3361Sopenharmony_ci	fld	dword [sp(%$c2)]
208159b3361Sopenharmony_ci	fmul	dword [r1+r2]
209159b3361Sopenharmony_ci	fsubp	st1, st0		;b = s2*fi[k3] - c2*gi[k3]
210159b3361Sopenharmony_ci
211159b3361Sopenharmony_ci	fld	dword [sp(%$c2)]
212159b3361Sopenharmony_ci	fmul	dword [r0+r2]
213159b3361Sopenharmony_ci	fld	dword [sp(%$s2)]
214159b3361Sopenharmony_ci	fmul	dword [r1+r2]
215159b3361Sopenharmony_ci	faddp	st1, st0		;a = c2*fi[k3] + s2*gi[k3]  b
216159b3361Sopenharmony_ci
217159b3361Sopenharmony_ci	fld	dword [r0+r3*2]
218159b3361Sopenharmony_ci	fsub	st0, st1		;f3 a b
219159b3361Sopenharmony_ci	fstp	dword [sp(%$f3)]	;a b
220159b3361Sopenharmony_ci
221159b3361Sopenharmony_ci	fadd	dword [r0+r3*2]	;f2 b
222159b3361Sopenharmony_ci	fstp	dword [sp(%$f2)]	;b
223159b3361Sopenharmony_ci
224159b3361Sopenharmony_ci	fld	dword [r1+r3*2]
225159b3361Sopenharmony_ci	fsub	st0, st1		;g3 b
226159b3361Sopenharmony_ci	fstp	dword [sp(%$g3)]	;b
227159b3361Sopenharmony_ci
228159b3361Sopenharmony_ci	fadd	dword [r1+r3*2]	;g2
229159b3361Sopenharmony_ci	fstp	dword [sp(%$g2)]	;
230159b3361Sopenharmony_ci
231159b3361Sopenharmony_ci
232159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
233159b3361Sopenharmony_ci	fmul	dword [sp(%$f2)]
234159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
235159b3361Sopenharmony_ci	fmul	dword [sp(%$g3)]
236159b3361Sopenharmony_ci	fsubp	st1, st0		;b = s1*f2 - c1*g3
237159b3361Sopenharmony_ci	
238159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
239159b3361Sopenharmony_ci	fmul	dword [sp(%$f2)]
240159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
241159b3361Sopenharmony_ci	fmul	dword [sp(%$g3)]
242159b3361Sopenharmony_ci	faddp	st1, st0		;a = c1*f2 + s1*g3  b
243159b3361Sopenharmony_ci
244159b3361Sopenharmony_ci	fld	dword [sp(%$f0)]
245159b3361Sopenharmony_ci	fsub	st0, st1		;fi[k2] a b
246159b3361Sopenharmony_ci	fstp	dword [r0+r3*2]
247159b3361Sopenharmony_ci
248159b3361Sopenharmony_ci	fadd	dword [sp(%$f0)]	;fi[0] b
249159b3361Sopenharmony_ci	fstp	dword [r0]
250159b3361Sopenharmony_ci
251159b3361Sopenharmony_ci	fld	dword [sp(%$g1)]
252159b3361Sopenharmony_ci	fsub	st0, st1		;gi[k3] b
253159b3361Sopenharmony_ci	fstp	dword [r1+r2]
254159b3361Sopenharmony_ci
255159b3361Sopenharmony_ci	fadd	dword [sp(%$g1)]	;gi[k1]
256159b3361Sopenharmony_ci	fstp	dword [r1+r3]
257159b3361Sopenharmony_ci
258159b3361Sopenharmony_ci
259159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
260159b3361Sopenharmony_ci	fmul	dword [sp(%$g2)]
261159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
262159b3361Sopenharmony_ci	fmul	dword [sp(%$f3)]
263159b3361Sopenharmony_ci	fsubp	st1, st0		;b = c1*g2 - s1*f3
264159b3361Sopenharmony_ci	
265159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
266159b3361Sopenharmony_ci	fmul	dword [sp(%$g2)]
267159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
268159b3361Sopenharmony_ci	fmul	dword [sp(%$f3)]
269159b3361Sopenharmony_ci	faddp	st1, st0		;a = s1*g2 + c1*f3  b
270159b3361Sopenharmony_ci
271159b3361Sopenharmony_ci	fld	dword [sp(%$g0)]
272159b3361Sopenharmony_ci	fsub	st0, st1		;gi[k2] a b
273159b3361Sopenharmony_ci	fstp	dword [r1+r3*2]
274159b3361Sopenharmony_ci
275159b3361Sopenharmony_ci	fadd	dword [sp(%$g0)]	;gi[0] b
276159b3361Sopenharmony_ci	fstp	dword [r1]
277159b3361Sopenharmony_ci
278159b3361Sopenharmony_ci	fld	dword [sp(%$f1)]
279159b3361Sopenharmony_ci	fsub	st0, st1		;fi[k3] b
280159b3361Sopenharmony_ci	fstp	dword [r0+r2]
281159b3361Sopenharmony_ci
282159b3361Sopenharmony_ci	fadd	dword [sp(%$f1)]	;fi[k1]
283159b3361Sopenharmony_ci	fstp	dword [r0+r3]
284159b3361Sopenharmony_ci
285159b3361Sopenharmony_ci
286159b3361Sopenharmony_ci	lea	r0, [r0+r3*4]
287159b3361Sopenharmony_ci	lea	r1, [r1+r3*4]
288159b3361Sopenharmony_ci	cmp	r0, r6
289159b3361Sopenharmony_ci	jb near	.do3
290159b3361Sopenharmony_ci
291159b3361Sopenharmony_ci	add	r5, 4
292159b3361Sopenharmony_ci	cmp	r5, r4
293159b3361Sopenharmony_ci	jb near	.for
294159b3361Sopenharmony_ci
295159b3361Sopenharmony_ci	cmp	r3, [sp(%$n)]
296159b3361Sopenharmony_ci	jae	.exit
297159b3361Sopenharmony_ci
298159b3361Sopenharmony_ci	add	dword [sp(%$k)], 2	;k  += 2;
299159b3361Sopenharmony_ci	lea	r3, [r3*4]		;k1 *= 4
300159b3361Sopenharmony_ci	lea	r2, [r2*4]		;k3 *= 4
301159b3361Sopenharmony_ci	lea	r4, [r4*4]		;kx *= 4
302159b3361Sopenharmony_ci	mov	r0, [sp(%$fz)]	;fi
303159b3361Sopenharmony_ci	lea	r1, [r0+r4]		;gi = fi + kx
304159b3361Sopenharmony_ci	jmp	.do
305159b3361Sopenharmony_ci
306159b3361Sopenharmony_ci.exit:
307159b3361Sopenharmony_ci	popd	ebp, ebx, esi, edi
308159b3361Sopenharmony_ciendproc
309159b3361Sopenharmony_ci
310159b3361Sopenharmony_ci;*************************************************************
311159b3361Sopenharmony_ci
312159b3361Sopenharmony_ci;void fht_FPU_FXCH(float *fz, int n);
313159b3361Sopenharmony_ciproc	fht_FPU_FXCH
314159b3361Sopenharmony_ci
315159b3361Sopenharmony_ci%$fz	arg	4
316159b3361Sopenharmony_ci%$n	arg	4
317159b3361Sopenharmony_ci
318159b3361Sopenharmony_ci%$k	local	4
319159b3361Sopenharmony_ci
320159b3361Sopenharmony_ci%$f0	local	4
321159b3361Sopenharmony_ci%$f1	local	4
322159b3361Sopenharmony_ci%$f2	local	4
323159b3361Sopenharmony_ci%$f3	local	4
324159b3361Sopenharmony_ci
325159b3361Sopenharmony_ci%$g0	local	4
326159b3361Sopenharmony_ci%$g1	local	4
327159b3361Sopenharmony_ci%$g2	local	4
328159b3361Sopenharmony_ci%$g3	local	4
329159b3361Sopenharmony_ci
330159b3361Sopenharmony_ci%$s1	local	4
331159b3361Sopenharmony_ci%$c1	local	4
332159b3361Sopenharmony_ci%$s2	local	4
333159b3361Sopenharmony_ci%$c2	local	4
334159b3361Sopenharmony_ci
335159b3361Sopenharmony_ci%$t_s	local	4
336159b3361Sopenharmony_ci%$t_c	local	4
337159b3361Sopenharmony_ci	alloc
338159b3361Sopenharmony_ci
339159b3361Sopenharmony_ci	pushd	ebp, ebx, esi, edi
340159b3361Sopenharmony_ci
341159b3361Sopenharmony_cifht_FPU_FXCH_1st_part:
342159b3361Sopenharmony_ci
343159b3361Sopenharmony_cifht_FPU_FXCH_2nd_part:
344159b3361Sopenharmony_ci
345159b3361Sopenharmony_cifht_FPU_FXCH_3rd_part:
346159b3361Sopenharmony_ci
347159b3361Sopenharmony_ci.do_init:
348159b3361Sopenharmony_ci	mov	r3, 16		;k1*fsize = 4*fsize = k4
349159b3361Sopenharmony_ci	mov	r4, 8		;kx = k1/2
350159b3361Sopenharmony_ci	mov	r2, 48		;k3*fsize
351159b3361Sopenharmony_ci	mov	dword [sp(%$k)], 2	;k = 2
352159b3361Sopenharmony_ci	mov	r0, [sp(%$fz)]	;fi
353159b3361Sopenharmony_ci	lea	r1, [r0+8]		;gi = fi + kx
354159b3361Sopenharmony_ci
355159b3361Sopenharmony_ci.do:
356159b3361Sopenharmony_ci.do2:
357159b3361Sopenharmony_ci	;f
358159b3361Sopenharmony_ci	fld	dword [r0]
359159b3361Sopenharmony_ci	fsub	dword [r0+r3]
360159b3361Sopenharmony_ci	fld	dword [r0]
361159b3361Sopenharmony_ci	fadd	dword [r0+r3]
362159b3361Sopenharmony_ci
363159b3361Sopenharmony_ci	fld	dword [r0+r3*2]
364159b3361Sopenharmony_ci	fsub	dword [r0+r2]
365159b3361Sopenharmony_ci	fld	dword [r0+r3*2]
366159b3361Sopenharmony_ci	fadd	dword [r0+r2]		;f2 f3 f0 f1
367159b3361Sopenharmony_ci
368159b3361Sopenharmony_ci	fld	st3
369159b3361Sopenharmony_ci	fld	st3
370159b3361Sopenharmony_ci	fxch	st5
371159b3361Sopenharmony_ci	fadd	st0, st3
372159b3361Sopenharmony_ci	fxch	st4
373159b3361Sopenharmony_ci	fadd	st0, st2
374159b3361Sopenharmony_ci	fxch	st3
375159b3361Sopenharmony_ci	fsubp	st1, st0
376159b3361Sopenharmony_ci	fxch	st1
377159b3361Sopenharmony_ci	fsubp	st4, st0
378159b3361Sopenharmony_ci	fxch	st2
379159b3361Sopenharmony_ci
380159b3361Sopenharmony_ci	fstp	dword [r0+r3]		;fi[k1]
381159b3361Sopenharmony_ci	fstp	dword [r0]		;fi[0]
382159b3361Sopenharmony_ci	fstp	dword [r0+r2]		;fi[k3]
383159b3361Sopenharmony_ci	fstp	dword [r0+r3*2]		;fi[k2]
384159b3361Sopenharmony_ci
385159b3361Sopenharmony_ci	;g
386159b3361Sopenharmony_ci	fld	dword [r1]
387159b3361Sopenharmony_ci	fsub	dword [r1+r3]
388159b3361Sopenharmony_ci	fld	dword [r1]
389159b3361Sopenharmony_ci	fadd	dword [r1+r3]
390159b3361Sopenharmony_ci
391159b3361Sopenharmony_ci	fld	dword [D_1_41421]
392159b3361Sopenharmony_ci	fmul	dword [r1+r2]
393159b3361Sopenharmony_ci	fld	dword [D_1_41421]
394159b3361Sopenharmony_ci	fmul	dword [r1+r3*2]		;g2 g3 g0 g1
395159b3361Sopenharmony_ci
396159b3361Sopenharmony_ci	fld	st3
397159b3361Sopenharmony_ci	fld	st3
398159b3361Sopenharmony_ci	fxch	st5
399159b3361Sopenharmony_ci	fadd	st0, st3
400159b3361Sopenharmony_ci	fxch	st4
401159b3361Sopenharmony_ci	fadd	st0, st2
402159b3361Sopenharmony_ci	fxch	st3
403159b3361Sopenharmony_ci	fsubp	st1, st0
404159b3361Sopenharmony_ci	fxch	st1
405159b3361Sopenharmony_ci	fsubp	st4, st0
406159b3361Sopenharmony_ci	fxch	st2
407159b3361Sopenharmony_ci
408159b3361Sopenharmony_ci	fstp	dword [r1+r3]		;gi[k1]
409159b3361Sopenharmony_ci	fstp	dword [r1]		;gi[0]
410159b3361Sopenharmony_ci	fstp	dword [r1+r2]		;gi[k3]
411159b3361Sopenharmony_ci	fstp	dword [r1+r3*2]		;gi[k2]
412159b3361Sopenharmony_ci
413159b3361Sopenharmony_ci	lea	r0, [r0+r3*4]
414159b3361Sopenharmony_ci	lea	r1, [r1+r3*4]
415159b3361Sopenharmony_ci	cmp	r0, r6
416159b3361Sopenharmony_ci	jb	.do2
417159b3361Sopenharmony_ci
418159b3361Sopenharmony_ci
419159b3361Sopenharmony_ci	mov	r0, [sp(%$k)]
420159b3361Sopenharmony_ci	fld	dword [costab_fft +r0*4]
421159b3361Sopenharmony_ci	fld	dword [sintab_fft +r0*4]
422159b3361Sopenharmony_ci	fld	dword [D_1_0]
423159b3361Sopenharmony_ci	fld	dword [D_0_0]
424159b3361Sopenharmony_ci	fxch	st3
425159b3361Sopenharmony_ci	fstp	dword [sp(%$t_c)]
426159b3361Sopenharmony_ci	fxch	st1
427159b3361Sopenharmony_ci	fstp	dword [sp(%$t_s)]
428159b3361Sopenharmony_ci	fstp	dword [sp(%$c1)]
429159b3361Sopenharmony_ci	fstp	dword [sp(%$s1)]
430159b3361Sopenharmony_ci
431159b3361Sopenharmony_ci.for_init:
432159b3361Sopenharmony_ci	mov	r5, 4		;i = 1*fsize
433159b3361Sopenharmony_ci
434159b3361Sopenharmony_ci.for:
435159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
436159b3361Sopenharmony_ci	fmul	dword [sp(%$t_c)]
437159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
438159b3361Sopenharmony_ci	fmul	dword [sp(%$t_s)]
439159b3361Sopenharmony_ci
440159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
441159b3361Sopenharmony_ci	fmul	dword [sp(%$t_s)]
442159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
443159b3361Sopenharmony_ci	fmul	dword [sp(%$t_c)]
444159b3361Sopenharmony_ci	fxch	st2
445159b3361Sopenharmony_ci	fsubp	st3, st0		;c1
446159b3361Sopenharmony_ci	faddp	st1, st0		;s1 c1
447159b3361Sopenharmony_ci	
448159b3361Sopenharmony_ci	fld	st1
449159b3361Sopenharmony_ci	fxch	st2
450159b3361Sopenharmony_ci	fmul	st0, st0		;c1c1 s1 c1
451159b3361Sopenharmony_ci	fld	st1
452159b3361Sopenharmony_ci	fxch	st2
453159b3361Sopenharmony_ci	fmul	st0, st0		;s1s1 c1c1 s1 c1
454159b3361Sopenharmony_ci
455159b3361Sopenharmony_ci	fxch	st3
456159b3361Sopenharmony_ci	fst	dword [sp(%$c1)]	;c1
457159b3361Sopenharmony_ci	fxch	st2
458159b3361Sopenharmony_ci	fst	dword [sp(%$s1)]	;s1 c1c1 c1 s1s1
459159b3361Sopenharmony_ci
460159b3361Sopenharmony_ci	fmulp	st2, st0
461159b3361Sopenharmony_ci	fsubrp	st2, st0
462159b3361Sopenharmony_ci	fadd	st0, st0		;s2 c2
463159b3361Sopenharmony_ci	fxch	st1
464159b3361Sopenharmony_ci	fstp	dword [sp(%$c2)]
465159b3361Sopenharmony_ci	fstp	dword [sp(%$s2)]
466159b3361Sopenharmony_ci
467159b3361Sopenharmony_ci	mov	r0, [sp(%$fz)]
468159b3361Sopenharmony_ci	mov	r1, [sp(%$fz)]
469159b3361Sopenharmony_ci	add	r0, r5		;r0 = fi
470159b3361Sopenharmony_ci	add	r1, r3
471159b3361Sopenharmony_ci	sub	r1, r5		;r1 = gi
472159b3361Sopenharmony_ci
473159b3361Sopenharmony_ci.do3:
474159b3361Sopenharmony_ci	fld	dword [sp(%$s2)]
475159b3361Sopenharmony_ci	fmul	dword [r0+r3]
476159b3361Sopenharmony_ci	fld	dword [sp(%$c2)]
477159b3361Sopenharmony_ci	fmul	dword [r1+r3]
478159b3361Sopenharmony_ci
479159b3361Sopenharmony_ci	fld	dword [sp(%$c2)]
480159b3361Sopenharmony_ci	fmul	dword [r0+r3]
481159b3361Sopenharmony_ci	fld	dword [sp(%$s2)]
482159b3361Sopenharmony_ci	fmul	dword [r1+r3]
483159b3361Sopenharmony_ci	fxch	st2
484159b3361Sopenharmony_ci	fsubp	st3, st0		;b = s2*fi[k1] - c2*gi[k1]
485159b3361Sopenharmony_ci	faddp	st1, st0		;a = c2*fi[k1] + s2*gi[k1]  b
486159b3361Sopenharmony_ci
487159b3361Sopenharmony_ci	fld	dword [r1]
488159b3361Sopenharmony_ci	fsub	st0, st2		;g1 a b
489159b3361Sopenharmony_ci	fxch	st2
490159b3361Sopenharmony_ci	fadd	dword [r1]		;g0 a g1
491159b3361Sopenharmony_ci
492159b3361Sopenharmony_ci	fld	dword [r0]
493159b3361Sopenharmony_ci	fsub	st0, st2		;f1 g0 a g1
494159b3361Sopenharmony_ci	fxch	st2
495159b3361Sopenharmony_ci	fadd	dword [r0]		;f0 g0 f1 g1
496159b3361Sopenharmony_ci
497159b3361Sopenharmony_ci	fxch	st3
498159b3361Sopenharmony_ci	fstp	dword [sp(%$g1)]
499159b3361Sopenharmony_ci	fstp	dword [sp(%$g0)]
500159b3361Sopenharmony_ci	fstp	dword [sp(%$f1)]
501159b3361Sopenharmony_ci	fstp	dword [sp(%$f0)]
502159b3361Sopenharmony_ci
503159b3361Sopenharmony_ci
504159b3361Sopenharmony_ci	fld	dword [sp(%$s2)]
505159b3361Sopenharmony_ci	fmul	dword [r0+r2]
506159b3361Sopenharmony_ci	fld	dword [sp(%$c2)]
507159b3361Sopenharmony_ci	fmul	dword [r1+r2]
508159b3361Sopenharmony_ci
509159b3361Sopenharmony_ci	fld	dword [sp(%$c2)]
510159b3361Sopenharmony_ci	fmul	dword [r0+r2]
511159b3361Sopenharmony_ci	fld	dword [sp(%$s2)]
512159b3361Sopenharmony_ci	fmul	dword [r1+r2]
513159b3361Sopenharmony_ci	fxch	st2
514159b3361Sopenharmony_ci	fsubp	st3, st0		;b = s2*fi[k3] - c2*gi[k3]
515159b3361Sopenharmony_ci	faddp	st1, st0		;a = c2*fi[k3] + s2*gi[k3]  b
516159b3361Sopenharmony_ci
517159b3361Sopenharmony_ci
518159b3361Sopenharmony_ci	fld	dword [r1+r3*2]
519159b3361Sopenharmony_ci	fsub	st0, st2		;g3 a b
520159b3361Sopenharmony_ci	fxch	st2
521159b3361Sopenharmony_ci	fadd	dword [r1+r3*2]	;g2 a g3
522159b3361Sopenharmony_ci
523159b3361Sopenharmony_ci	fld	dword [r0+r3*2]
524159b3361Sopenharmony_ci	fsub	st0, st2		;f3 g2 a g3
525159b3361Sopenharmony_ci	fxch	st2
526159b3361Sopenharmony_ci	fadd	dword [r0+r3*2]	;f2 g2 f3 g3
527159b3361Sopenharmony_ci
528159b3361Sopenharmony_ci	fxch	st3
529159b3361Sopenharmony_ci	fstp	dword [sp(%$g3)]
530159b3361Sopenharmony_ci	fstp	dword [sp(%$g2)]
531159b3361Sopenharmony_ci	fstp	dword [sp(%$f3)]
532159b3361Sopenharmony_ci	fstp	dword [sp(%$f2)]
533159b3361Sopenharmony_ci
534159b3361Sopenharmony_ci
535159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
536159b3361Sopenharmony_ci	fmul	dword [sp(%$f2)]
537159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
538159b3361Sopenharmony_ci	fmul	dword [sp(%$g3)]
539159b3361Sopenharmony_ci	
540159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
541159b3361Sopenharmony_ci	fmul	dword [sp(%$f2)]
542159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
543159b3361Sopenharmony_ci	fmul	dword [sp(%$g3)]
544159b3361Sopenharmony_ci	fxch	st2
545159b3361Sopenharmony_ci	fsubp	st3, st0		;b = s1*f2 - c1*g3
546159b3361Sopenharmony_ci	faddp	st1, st0		;a = c1*f2 + s1*g3  b
547159b3361Sopenharmony_ci
548159b3361Sopenharmony_ci	fld	dword [sp(%$g1)]
549159b3361Sopenharmony_ci	fsub	st0, st2		;gi[k3] a b
550159b3361Sopenharmony_ci	fxch	st2
551159b3361Sopenharmony_ci	fadd	dword [sp(%$g1)]	;gi[k1] a gi[k3]
552159b3361Sopenharmony_ci
553159b3361Sopenharmony_ci	fld	dword [sp(%$f0)]
554159b3361Sopenharmony_ci	fsub	st0, st2		;fi[k2] gi[k1] a gi[k3]
555159b3361Sopenharmony_ci	fxch	st2
556159b3361Sopenharmony_ci	fadd	dword [sp(%$f0)]	;fi[0] gi[k1] fi[k2] gi[k3]
557159b3361Sopenharmony_ci
558159b3361Sopenharmony_ci	fxch	st3
559159b3361Sopenharmony_ci	fstp	dword [r1+r2]
560159b3361Sopenharmony_ci	fstp	dword [r1+r3]
561159b3361Sopenharmony_ci	fstp	dword [r0+r3*2]
562159b3361Sopenharmony_ci	fstp	dword [r0]
563159b3361Sopenharmony_ci
564159b3361Sopenharmony_ci
565159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
566159b3361Sopenharmony_ci	fmul	dword [sp(%$g2)]
567159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
568159b3361Sopenharmony_ci	fmul	dword [sp(%$f3)]
569159b3361Sopenharmony_ci	
570159b3361Sopenharmony_ci	fld	dword [sp(%$s1)]
571159b3361Sopenharmony_ci	fmul	dword [sp(%$g2)]
572159b3361Sopenharmony_ci	fld	dword [sp(%$c1)]
573159b3361Sopenharmony_ci	fmul	dword [sp(%$f3)]
574159b3361Sopenharmony_ci	fxch	st2
575159b3361Sopenharmony_ci	fsubp	st3, st0		;b = c1*g2 - s1*f3
576159b3361Sopenharmony_ci	faddp	st1, st0		;a = s1*g2 + c1*f3  b
577159b3361Sopenharmony_ci
578159b3361Sopenharmony_ci	fld	dword [sp(%$f1)]
579159b3361Sopenharmony_ci	fsub	st0, st2		;fi[k3] a b
580159b3361Sopenharmony_ci	fxch	st2
581159b3361Sopenharmony_ci	fadd	dword [sp(%$f1)]	;fi[k1] a fi[k3]
582159b3361Sopenharmony_ci
583159b3361Sopenharmony_ci	fld	dword [sp(%$g0)]
584159b3361Sopenharmony_ci	fsub	st0, st2		;gi[k2] fi[k1] a fi[k3]
585159b3361Sopenharmony_ci	fxch	st2
586159b3361Sopenharmony_ci	fadd	dword [sp(%$g0)]	;gi[0] fi[k1] gi[k2] fi[k3]
587159b3361Sopenharmony_ci
588159b3361Sopenharmony_ci	fxch	st3
589159b3361Sopenharmony_ci	fstp	dword [r0+r2]
590159b3361Sopenharmony_ci	fstp	dword [r0+r3]
591159b3361Sopenharmony_ci	fstp	dword [r1+r3*2]
592159b3361Sopenharmony_ci	fstp	dword [r1]
593159b3361Sopenharmony_ci
594159b3361Sopenharmony_ci
595159b3361Sopenharmony_ci	lea	r0, [r0+r3*4]
596159b3361Sopenharmony_ci	lea	r1, [r1+r3*4]
597159b3361Sopenharmony_ci	cmp	r0, r6
598159b3361Sopenharmony_ci	jb near	.do3
599159b3361Sopenharmony_ci
600159b3361Sopenharmony_ci	add	r5, 4
601159b3361Sopenharmony_ci	cmp	r5, r4
602159b3361Sopenharmony_ci	jb near	.for
603159b3361Sopenharmony_ci
604159b3361Sopenharmony_ci	cmp	r3, [sp(%$n)]
605159b3361Sopenharmony_ci	jae	.exit
606159b3361Sopenharmony_ci
607159b3361Sopenharmony_ci	add	dword [sp(%$k)], 2	;k  += 2;
608159b3361Sopenharmony_ci	lea	r3, [r3*4]		;k1 *= 4
609159b3361Sopenharmony_ci	lea	r2, [r2*4]		;k3 *= 4
610159b3361Sopenharmony_ci	lea	r4, [r4*4]		;kx *= 4
611159b3361Sopenharmony_ci	mov	r0, [sp(%$fz)]	;fi
612159b3361Sopenharmony_ci	lea	r1, [r0+r4]		;gi = fi + kx
613159b3361Sopenharmony_ci	jmp	.do
614159b3361Sopenharmony_ci
615159b3361Sopenharmony_ci.exit:
616159b3361Sopenharmony_ci	popd	ebp, ebx, esi, edi
617159b3361Sopenharmony_ciendproc
618159b3361Sopenharmony_ci
619159b3361Sopenharmony_ci	end
620