1/* Copyright (C) 2007 Hong Zhiqian */
2/**
3   @file kiss_fftr_tm.h
4   @author Hong Zhiqian
5   @brief Various compatibility routines for Speex (TriMedia version)
6*/
7/*
8   Redistribution and use in source and binary forms, with or without
9   modification, are permitted provided that the following conditions
10   are met:
11
12   - Redistributions of source code must retain the above copyright
13   notice, this list of conditions and the following disclaimer.
14
15   - Redistributions in binary form must reproduce the above copyright
16   notice, this list of conditions and the following disclaimer in the
17   documentation and/or other materials provided with the distribution.
18
19   - Neither the name of the Xiph.org Foundation nor the names of its
20   contributors may be used to endorse or promote products derived from
21   this software without specific prior written permission.
22
23   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
27   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34*/
35#include "_kiss_fft_guts_tm.h"
36
37#ifdef TM_ASM
38
39#include "profile_tm.h"
40
41#ifdef FIXED_POINT
42
43#define TM_NDIV(res,c,frac)															\
44	{	register int c1, c0;														\
45																					\
46		c1 = -asri(16,(c));															\
47		c0 = sex16((c));															\
48		(res) = pack16lsb(sround(c1 * (32767/(frac))), sround(c0 * (32767/(frac))));\
49	}
50
51
52#define OVERRIDE_KISS_FFTR
53void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar * restrict timedata, kiss_fft_cpx * restrict freqdata)
54{
55	register int ncfft, ncfft2, k;
56	register int * restrict tmpbuf;
57	register int * restrict twiddles;
58
59	ncfft = st->substate->nfft;
60	ncfft2 = ncfft >> 1;
61	tmpbuf = (int*)st->tmpbuf;
62	twiddles = (int*)st->super_twiddles;
63
64	TMDEBUG_ALIGNMEM(timedata);
65	TMDEBUG_ALIGNMEM(freqdata);
66	TMDEBUG_ALIGNMEM(tmpbuf);
67	TMDEBUG_ALIGNMEM(twiddles);
68
69	kiss_fft(st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf);
70
71	 {
72		register int tdcr, tdci;
73		tdcr = sround(st->tmpbuf[0].r * (32767/2));
74		tdci = sround(st->tmpbuf[0].i * (32767/2));
75
76		freqdata[0].r = tdcr + tdci;
77		freqdata[ncfft].r = tdcr - tdci;
78		freqdata[ncfft].i = freqdata[0].i = 0;
79	 }
80
81	 for ( k=1 ; k <= ncfft2 ; ++k )
82	 {
83		register int fpk, fpnk, i, tw, f1k, f2k;
84		register int fq1, fq2;
85
86		i = ncfft-k;
87
88		fpk  = ld32x(tmpbuf,k);
89		tw   = ld32x(twiddles,k);
90		fpnk = ld32x(tmpbuf,i);
91
92		TM_DIV(fpk, fpk, 2);
93		TM_NDIV(fpnk,fpnk,2);
94
95        TM_ADD( f1k, fpk , fpnk );
96        TM_SUB( f2k, fpk , fpnk );
97		TM_MUL( tw , f2k, tw );
98		TM_ADD( fq1, f1k, tw );
99		TM_SHR( fq1, fq1, 1  );
100		TM_SUB( fq2, f1k, tw );
101		TM_NEGMSB( fq2, fq2 );
102		TM_SHR( fq2, fq2, 1 );
103
104
105		st32d( k<<2, freqdata, fq1 );
106		st32d( i<<2, freqdata, fq2 );
107    }
108}
109
110#define OVERRIDE_KISS_FFTRI
111void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx * restrict freqdata,kiss_fft_scalar * restrict timedata)
112{
113	register int k, ncfft, ncfft2;
114	register int * restrict tmpbuf;
115	register int * restrict twiddles;
116
117    ncfft = st->substate->nfft;
118	ncfft2 = ncfft >> 1;
119	tmpbuf = (int*)st->tmpbuf;
120	twiddles = (int*)st->super_twiddles;
121
122	TMDEBUG_ALIGNMEM(freqdata);
123	TMDEBUG_ALIGNMEM(timedata);
124	TMDEBUG_ALIGNMEM(tmpbuf);
125	TMDEBUG_ALIGNMEM(twiddles);
126
127	{
128		register int fqr, fqnr;
129
130		fqr  = freqdata[0].r;
131		fqnr = freqdata[ncfft].r;
132
133		st->tmpbuf[0].r = fqr + fqnr;
134		st->tmpbuf[0].i = fqr - fqnr;
135	}
136
137    for ( k=1 ; k <= ncfft2 ; ++k )
138	{
139		register int fk, fnkc, i, tw, fek, fok, tmp;
140		register int tbk, tbn;
141
142		i = ncfft-k;
143
144		fk = ld32x(freqdata,k);
145		tw = ld32x(twiddles,k);
146		fnkc = pack16lsb(-freqdata[i].i, freqdata[i].r);
147
148        TM_ADD (fek, fk, fnkc);
149        TM_SUB (tmp, fk, fnkc);
150        TM_MUL (fok, tmp, tw );
151		TM_ADD (tbk, fek, fok);
152		TM_SUB (tbn, fek, fok);
153		TM_NEGMSB(tbn, tbn);
154
155		st32d(k<<2, tmpbuf, tbk);
156		st32d(i<<2, tmpbuf, tbn);
157    }
158    kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
159}
160
161#else
162
163#define OVERRIDE_KISS_FFTR
164void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar * restrict timedata,kiss_fft_cpx * restrict freqdata)
165{
166    register kiss_fft_cpx fpnk, fpk, f1k, f2k, twk;
167    register int k, ncfft;
168	register kiss_fft_cpx * restrict tmpbuf, * restrict tw;
169	register float tdcr, tdci;
170
171    ncfft = st->substate->nfft;
172	tmpbuf= st->tmpbuf;
173	tw	  = st->super_twiddles;
174
175	kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, tmpbuf );
176
177	tdcr = tmpbuf[0].r;
178    tdci = tmpbuf[0].i;
179
180	freqdata[0].r = tdcr + tdci;
181    freqdata[ncfft].r = tdcr - tdci;
182    freqdata[ncfft].i = freqdata[0].i = 0;
183
184    for ( k=1;k <= ncfft/2 ; ++k )
185	{
186        fpk    = tmpbuf[k];
187        fpnk.r = tmpbuf[ncfft-k].r;
188        fpnk.i = -tmpbuf[ncfft-k].i;
189
190        C_ADD( f1k, fpk , fpnk );
191        C_SUB( f2k, fpk , fpnk );
192        C_MUL( twk, f2k , tw[k]);
193
194        freqdata[k].r = HALF_OF(f1k.r + twk.r);
195        freqdata[k].i = HALF_OF(f1k.i + twk.i);
196        freqdata[ncfft-k].r = HALF_OF(f1k.r - twk.r);
197        freqdata[ncfft-k].i = HALF_OF(twk.i - f1k.i);
198    }
199}
200
201#define OVERRIDE_KISS_FFTRI
202void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx * restrict freqdata,kiss_fft_scalar * restrict timedata)
203{
204    register int k, ncfft;
205	register kiss_fft_cpx * restrict tmpbuf, * restrict tw;
206
207
208    ncfft = st->substate->nfft;
209	tmpbuf= st->tmpbuf;
210	tw	  = st->super_twiddles;
211
212    tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
213    tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
214
215    for (k = 1; k <= ncfft / 2; ++k)
216	{
217        register kiss_fft_cpx fk, fnkc, fek, fok, tmp;
218        fk = freqdata[k];
219        fnkc.r = freqdata[ncfft - k].r;
220        fnkc.i = -freqdata[ncfft - k].i;
221
222        C_ADD (fek, fk, fnkc);
223        C_SUB (tmp, fk, fnkc);
224        C_MUL (fok,tmp,tw[k]);
225        C_ADD (tmpbuf[k],fek, fok);
226        C_SUB (tmp, fek, fok);
227		tmpbuf[ncfft - k].r = tmp.r;
228        tmpbuf[ncfft - k].i = -tmp.i;
229	}
230    kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
231}
232
233#endif
234#endif
235
236