153a5a1b3Sopenharmony_ci/*
253a5a1b3Sopenharmony_ciCopyright (c) 2003-2004, Mark Borgerding
353a5a1b3Sopenharmony_ciCopyright (c) 2005-2007, Jean-Marc Valin
453a5a1b3Sopenharmony_ci
553a5a1b3Sopenharmony_ciAll rights reserved.
653a5a1b3Sopenharmony_ci
753a5a1b3Sopenharmony_ciRedistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
853a5a1b3Sopenharmony_ci
953a5a1b3Sopenharmony_ci    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
1053a5a1b3Sopenharmony_ci    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
1153a5a1b3Sopenharmony_ci    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
1253a5a1b3Sopenharmony_ci
1353a5a1b3Sopenharmony_ciTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1453a5a1b3Sopenharmony_ci*/
1553a5a1b3Sopenharmony_ci
1653a5a1b3Sopenharmony_ci
1753a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H
1853a5a1b3Sopenharmony_ci#include "config.h"
1953a5a1b3Sopenharmony_ci#endif
2053a5a1b3Sopenharmony_ci
2153a5a1b3Sopenharmony_ci#include "_kiss_fft_guts.h"
2253a5a1b3Sopenharmony_ci#include "arch.h"
2353a5a1b3Sopenharmony_ci#include "os_support.h"
2453a5a1b3Sopenharmony_ci
2553a5a1b3Sopenharmony_ci/* The guts header contains all the multiplication and addition macros that are defined for
2653a5a1b3Sopenharmony_ci fixed or floating point complex numbers.  It also delares the kf_ internal functions.
2753a5a1b3Sopenharmony_ci */
2853a5a1b3Sopenharmony_ci
2953a5a1b3Sopenharmony_cistatic void kf_bfly2(
3053a5a1b3Sopenharmony_ci        kiss_fft_cpx * Fout,
3153a5a1b3Sopenharmony_ci        const size_t fstride,
3253a5a1b3Sopenharmony_ci        const kiss_fft_cfg st,
3353a5a1b3Sopenharmony_ci        int m,
3453a5a1b3Sopenharmony_ci        int N,
3553a5a1b3Sopenharmony_ci        int mm
3653a5a1b3Sopenharmony_ci        )
3753a5a1b3Sopenharmony_ci{
3853a5a1b3Sopenharmony_ci    kiss_fft_cpx * Fout2;
3953a5a1b3Sopenharmony_ci    kiss_fft_cpx * tw1;
4053a5a1b3Sopenharmony_ci    kiss_fft_cpx t;
4153a5a1b3Sopenharmony_ci    if (!st->inverse) {
4253a5a1b3Sopenharmony_ci       int i,j;
4353a5a1b3Sopenharmony_ci       kiss_fft_cpx * Fout_beg = Fout;
4453a5a1b3Sopenharmony_ci       for (i=0;i<N;i++)
4553a5a1b3Sopenharmony_ci       {
4653a5a1b3Sopenharmony_ci          Fout = Fout_beg + i*mm;
4753a5a1b3Sopenharmony_ci          Fout2 = Fout + m;
4853a5a1b3Sopenharmony_ci          tw1 = st->twiddles;
4953a5a1b3Sopenharmony_ci          for(j=0;j<m;j++)
5053a5a1b3Sopenharmony_ci          {
5153a5a1b3Sopenharmony_ci             /* Almost the same as the code path below, except that we divide the input by two
5253a5a1b3Sopenharmony_ci              (while keeping the best accuracy possible) */
5353a5a1b3Sopenharmony_ci             spx_word32_t tr, ti;
5453a5a1b3Sopenharmony_ci             tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);
5553a5a1b3Sopenharmony_ci             ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);
5653a5a1b3Sopenharmony_ci             tw1 += fstride;
5753a5a1b3Sopenharmony_ci             Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
5853a5a1b3Sopenharmony_ci             Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
5953a5a1b3Sopenharmony_ci             Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
6053a5a1b3Sopenharmony_ci             Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
6153a5a1b3Sopenharmony_ci             ++Fout2;
6253a5a1b3Sopenharmony_ci             ++Fout;
6353a5a1b3Sopenharmony_ci          }
6453a5a1b3Sopenharmony_ci       }
6553a5a1b3Sopenharmony_ci    } else {
6653a5a1b3Sopenharmony_ci       int i,j;
6753a5a1b3Sopenharmony_ci       kiss_fft_cpx * Fout_beg = Fout;
6853a5a1b3Sopenharmony_ci       for (i=0;i<N;i++)
6953a5a1b3Sopenharmony_ci       {
7053a5a1b3Sopenharmony_ci          Fout = Fout_beg + i*mm;
7153a5a1b3Sopenharmony_ci          Fout2 = Fout + m;
7253a5a1b3Sopenharmony_ci          tw1 = st->twiddles;
7353a5a1b3Sopenharmony_ci          for(j=0;j<m;j++)
7453a5a1b3Sopenharmony_ci          {
7553a5a1b3Sopenharmony_ci             C_MUL (t,  *Fout2 , *tw1);
7653a5a1b3Sopenharmony_ci             tw1 += fstride;
7753a5a1b3Sopenharmony_ci             C_SUB( *Fout2 ,  *Fout , t );
7853a5a1b3Sopenharmony_ci             C_ADDTO( *Fout ,  t );
7953a5a1b3Sopenharmony_ci             ++Fout2;
8053a5a1b3Sopenharmony_ci             ++Fout;
8153a5a1b3Sopenharmony_ci          }
8253a5a1b3Sopenharmony_ci       }
8353a5a1b3Sopenharmony_ci    }
8453a5a1b3Sopenharmony_ci}
8553a5a1b3Sopenharmony_ci
8653a5a1b3Sopenharmony_cistatic void kf_bfly4(
8753a5a1b3Sopenharmony_ci        kiss_fft_cpx * Fout,
8853a5a1b3Sopenharmony_ci        const size_t fstride,
8953a5a1b3Sopenharmony_ci        const kiss_fft_cfg st,
9053a5a1b3Sopenharmony_ci        int m,
9153a5a1b3Sopenharmony_ci        int N,
9253a5a1b3Sopenharmony_ci        int mm
9353a5a1b3Sopenharmony_ci        )
9453a5a1b3Sopenharmony_ci{
9553a5a1b3Sopenharmony_ci    kiss_fft_cpx *tw1,*tw2,*tw3;
9653a5a1b3Sopenharmony_ci    kiss_fft_cpx scratch[6];
9753a5a1b3Sopenharmony_ci    const size_t m2=2*m;
9853a5a1b3Sopenharmony_ci    const size_t m3=3*m;
9953a5a1b3Sopenharmony_ci    int i, j;
10053a5a1b3Sopenharmony_ci
10153a5a1b3Sopenharmony_ci    if (st->inverse)
10253a5a1b3Sopenharmony_ci    {
10353a5a1b3Sopenharmony_ci       kiss_fft_cpx * Fout_beg = Fout;
10453a5a1b3Sopenharmony_ci       for (i=0;i<N;i++)
10553a5a1b3Sopenharmony_ci       {
10653a5a1b3Sopenharmony_ci          Fout = Fout_beg + i*mm;
10753a5a1b3Sopenharmony_ci          tw3 = tw2 = tw1 = st->twiddles;
10853a5a1b3Sopenharmony_ci          for (j=0;j<m;j++)
10953a5a1b3Sopenharmony_ci          {
11053a5a1b3Sopenharmony_ci             C_MUL(scratch[0],Fout[m] , *tw1 );
11153a5a1b3Sopenharmony_ci             C_MUL(scratch[1],Fout[m2] , *tw2 );
11253a5a1b3Sopenharmony_ci             C_MUL(scratch[2],Fout[m3] , *tw3 );
11353a5a1b3Sopenharmony_ci
11453a5a1b3Sopenharmony_ci             C_SUB( scratch[5] , *Fout, scratch[1] );
11553a5a1b3Sopenharmony_ci             C_ADDTO(*Fout, scratch[1]);
11653a5a1b3Sopenharmony_ci             C_ADD( scratch[3] , scratch[0] , scratch[2] );
11753a5a1b3Sopenharmony_ci             C_SUB( scratch[4] , scratch[0] , scratch[2] );
11853a5a1b3Sopenharmony_ci             C_SUB( Fout[m2], *Fout, scratch[3] );
11953a5a1b3Sopenharmony_ci             tw1 += fstride;
12053a5a1b3Sopenharmony_ci             tw2 += fstride*2;
12153a5a1b3Sopenharmony_ci             tw3 += fstride*3;
12253a5a1b3Sopenharmony_ci             C_ADDTO( *Fout , scratch[3] );
12353a5a1b3Sopenharmony_ci
12453a5a1b3Sopenharmony_ci             Fout[m].r = scratch[5].r - scratch[4].i;
12553a5a1b3Sopenharmony_ci             Fout[m].i = scratch[5].i + scratch[4].r;
12653a5a1b3Sopenharmony_ci             Fout[m3].r = scratch[5].r + scratch[4].i;
12753a5a1b3Sopenharmony_ci             Fout[m3].i = scratch[5].i - scratch[4].r;
12853a5a1b3Sopenharmony_ci             ++Fout;
12953a5a1b3Sopenharmony_ci          }
13053a5a1b3Sopenharmony_ci       }
13153a5a1b3Sopenharmony_ci    } else
13253a5a1b3Sopenharmony_ci    {
13353a5a1b3Sopenharmony_ci       kiss_fft_cpx * Fout_beg = Fout;
13453a5a1b3Sopenharmony_ci       for (i=0;i<N;i++)
13553a5a1b3Sopenharmony_ci       {
13653a5a1b3Sopenharmony_ci          Fout = Fout_beg + i*mm;
13753a5a1b3Sopenharmony_ci          tw3 = tw2 = tw1 = st->twiddles;
13853a5a1b3Sopenharmony_ci          for (j=0;j<m;j++)
13953a5a1b3Sopenharmony_ci          {
14053a5a1b3Sopenharmony_ci             C_MUL4(scratch[0],Fout[m] , *tw1 );
14153a5a1b3Sopenharmony_ci             C_MUL4(scratch[1],Fout[m2] , *tw2 );
14253a5a1b3Sopenharmony_ci             C_MUL4(scratch[2],Fout[m3] , *tw3 );
14353a5a1b3Sopenharmony_ci
14453a5a1b3Sopenharmony_ci             Fout->r = PSHR16(Fout->r, 2);
14553a5a1b3Sopenharmony_ci             Fout->i = PSHR16(Fout->i, 2);
14653a5a1b3Sopenharmony_ci             C_SUB( scratch[5] , *Fout, scratch[1] );
14753a5a1b3Sopenharmony_ci             C_ADDTO(*Fout, scratch[1]);
14853a5a1b3Sopenharmony_ci             C_ADD( scratch[3] , scratch[0] , scratch[2] );
14953a5a1b3Sopenharmony_ci             C_SUB( scratch[4] , scratch[0] , scratch[2] );
15053a5a1b3Sopenharmony_ci             Fout[m2].r = PSHR16(Fout[m2].r, 2);
15153a5a1b3Sopenharmony_ci             Fout[m2].i = PSHR16(Fout[m2].i, 2);
15253a5a1b3Sopenharmony_ci             C_SUB( Fout[m2], *Fout, scratch[3] );
15353a5a1b3Sopenharmony_ci             tw1 += fstride;
15453a5a1b3Sopenharmony_ci             tw2 += fstride*2;
15553a5a1b3Sopenharmony_ci             tw3 += fstride*3;
15653a5a1b3Sopenharmony_ci             C_ADDTO( *Fout , scratch[3] );
15753a5a1b3Sopenharmony_ci
15853a5a1b3Sopenharmony_ci             Fout[m].r = scratch[5].r + scratch[4].i;
15953a5a1b3Sopenharmony_ci             Fout[m].i = scratch[5].i - scratch[4].r;
16053a5a1b3Sopenharmony_ci             Fout[m3].r = scratch[5].r - scratch[4].i;
16153a5a1b3Sopenharmony_ci             Fout[m3].i = scratch[5].i + scratch[4].r;
16253a5a1b3Sopenharmony_ci             ++Fout;
16353a5a1b3Sopenharmony_ci          }
16453a5a1b3Sopenharmony_ci       }
16553a5a1b3Sopenharmony_ci    }
16653a5a1b3Sopenharmony_ci}
16753a5a1b3Sopenharmony_ci
16853a5a1b3Sopenharmony_cistatic void kf_bfly3(
16953a5a1b3Sopenharmony_ci         kiss_fft_cpx * Fout,
17053a5a1b3Sopenharmony_ci         const size_t fstride,
17153a5a1b3Sopenharmony_ci         const kiss_fft_cfg st,
17253a5a1b3Sopenharmony_ci         size_t m
17353a5a1b3Sopenharmony_ci         )
17453a5a1b3Sopenharmony_ci{
17553a5a1b3Sopenharmony_ci     size_t k=m;
17653a5a1b3Sopenharmony_ci     const size_t m2 = 2*m;
17753a5a1b3Sopenharmony_ci     kiss_fft_cpx *tw1,*tw2;
17853a5a1b3Sopenharmony_ci     kiss_fft_cpx scratch[5];
17953a5a1b3Sopenharmony_ci     kiss_fft_cpx epi3;
18053a5a1b3Sopenharmony_ci     epi3 = st->twiddles[fstride*m];
18153a5a1b3Sopenharmony_ci
18253a5a1b3Sopenharmony_ci     tw1=tw2=st->twiddles;
18353a5a1b3Sopenharmony_ci
18453a5a1b3Sopenharmony_ci     do{
18553a5a1b3Sopenharmony_ci        if (!st->inverse) {
18653a5a1b3Sopenharmony_ci         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
18753a5a1b3Sopenharmony_ci	}
18853a5a1b3Sopenharmony_ci
18953a5a1b3Sopenharmony_ci         C_MUL(scratch[1],Fout[m] , *tw1);
19053a5a1b3Sopenharmony_ci         C_MUL(scratch[2],Fout[m2] , *tw2);
19153a5a1b3Sopenharmony_ci
19253a5a1b3Sopenharmony_ci         C_ADD(scratch[3],scratch[1],scratch[2]);
19353a5a1b3Sopenharmony_ci         C_SUB(scratch[0],scratch[1],scratch[2]);
19453a5a1b3Sopenharmony_ci         tw1 += fstride;
19553a5a1b3Sopenharmony_ci         tw2 += fstride*2;
19653a5a1b3Sopenharmony_ci
19753a5a1b3Sopenharmony_ci         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
19853a5a1b3Sopenharmony_ci         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
19953a5a1b3Sopenharmony_ci
20053a5a1b3Sopenharmony_ci         C_MULBYSCALAR( scratch[0] , epi3.i );
20153a5a1b3Sopenharmony_ci
20253a5a1b3Sopenharmony_ci         C_ADDTO(*Fout,scratch[3]);
20353a5a1b3Sopenharmony_ci
20453a5a1b3Sopenharmony_ci         Fout[m2].r = Fout[m].r + scratch[0].i;
20553a5a1b3Sopenharmony_ci         Fout[m2].i = Fout[m].i - scratch[0].r;
20653a5a1b3Sopenharmony_ci
20753a5a1b3Sopenharmony_ci         Fout[m].r -= scratch[0].i;
20853a5a1b3Sopenharmony_ci         Fout[m].i += scratch[0].r;
20953a5a1b3Sopenharmony_ci
21053a5a1b3Sopenharmony_ci         ++Fout;
21153a5a1b3Sopenharmony_ci     }while(--k);
21253a5a1b3Sopenharmony_ci}
21353a5a1b3Sopenharmony_ci
21453a5a1b3Sopenharmony_cistatic void kf_bfly5(
21553a5a1b3Sopenharmony_ci        kiss_fft_cpx * Fout,
21653a5a1b3Sopenharmony_ci        const size_t fstride,
21753a5a1b3Sopenharmony_ci        const kiss_fft_cfg st,
21853a5a1b3Sopenharmony_ci        int m
21953a5a1b3Sopenharmony_ci        )
22053a5a1b3Sopenharmony_ci{
22153a5a1b3Sopenharmony_ci    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
22253a5a1b3Sopenharmony_ci    int u;
22353a5a1b3Sopenharmony_ci    kiss_fft_cpx scratch[13];
22453a5a1b3Sopenharmony_ci    kiss_fft_cpx * twiddles = st->twiddles;
22553a5a1b3Sopenharmony_ci    kiss_fft_cpx *tw;
22653a5a1b3Sopenharmony_ci    kiss_fft_cpx ya,yb;
22753a5a1b3Sopenharmony_ci    ya = twiddles[fstride*m];
22853a5a1b3Sopenharmony_ci    yb = twiddles[fstride*2*m];
22953a5a1b3Sopenharmony_ci
23053a5a1b3Sopenharmony_ci    Fout0=Fout;
23153a5a1b3Sopenharmony_ci    Fout1=Fout0+m;
23253a5a1b3Sopenharmony_ci    Fout2=Fout0+2*m;
23353a5a1b3Sopenharmony_ci    Fout3=Fout0+3*m;
23453a5a1b3Sopenharmony_ci    Fout4=Fout0+4*m;
23553a5a1b3Sopenharmony_ci
23653a5a1b3Sopenharmony_ci    tw=st->twiddles;
23753a5a1b3Sopenharmony_ci    for ( u=0; u<m; ++u ) {
23853a5a1b3Sopenharmony_ci        if (!st->inverse) {
23953a5a1b3Sopenharmony_ci        C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
24053a5a1b3Sopenharmony_ci	}
24153a5a1b3Sopenharmony_ci        scratch[0] = *Fout0;
24253a5a1b3Sopenharmony_ci
24353a5a1b3Sopenharmony_ci        C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
24453a5a1b3Sopenharmony_ci        C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
24553a5a1b3Sopenharmony_ci        C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
24653a5a1b3Sopenharmony_ci        C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
24753a5a1b3Sopenharmony_ci
24853a5a1b3Sopenharmony_ci        C_ADD( scratch[7],scratch[1],scratch[4]);
24953a5a1b3Sopenharmony_ci        C_SUB( scratch[10],scratch[1],scratch[4]);
25053a5a1b3Sopenharmony_ci        C_ADD( scratch[8],scratch[2],scratch[3]);
25153a5a1b3Sopenharmony_ci        C_SUB( scratch[9],scratch[2],scratch[3]);
25253a5a1b3Sopenharmony_ci
25353a5a1b3Sopenharmony_ci        Fout0->r += scratch[7].r + scratch[8].r;
25453a5a1b3Sopenharmony_ci        Fout0->i += scratch[7].i + scratch[8].i;
25553a5a1b3Sopenharmony_ci
25653a5a1b3Sopenharmony_ci        scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
25753a5a1b3Sopenharmony_ci        scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
25853a5a1b3Sopenharmony_ci
25953a5a1b3Sopenharmony_ci        scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
26053a5a1b3Sopenharmony_ci        scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
26153a5a1b3Sopenharmony_ci
26253a5a1b3Sopenharmony_ci        C_SUB(*Fout1,scratch[5],scratch[6]);
26353a5a1b3Sopenharmony_ci        C_ADD(*Fout4,scratch[5],scratch[6]);
26453a5a1b3Sopenharmony_ci
26553a5a1b3Sopenharmony_ci        scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
26653a5a1b3Sopenharmony_ci        scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
26753a5a1b3Sopenharmony_ci        scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
26853a5a1b3Sopenharmony_ci        scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
26953a5a1b3Sopenharmony_ci
27053a5a1b3Sopenharmony_ci        C_ADD(*Fout2,scratch[11],scratch[12]);
27153a5a1b3Sopenharmony_ci        C_SUB(*Fout3,scratch[11],scratch[12]);
27253a5a1b3Sopenharmony_ci
27353a5a1b3Sopenharmony_ci        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
27453a5a1b3Sopenharmony_ci    }
27553a5a1b3Sopenharmony_ci}
27653a5a1b3Sopenharmony_ci
27753a5a1b3Sopenharmony_ci/* perform the butterfly for one stage of a mixed radix FFT */
27853a5a1b3Sopenharmony_cistatic void kf_bfly_generic(
27953a5a1b3Sopenharmony_ci        kiss_fft_cpx * Fout,
28053a5a1b3Sopenharmony_ci        const size_t fstride,
28153a5a1b3Sopenharmony_ci        const kiss_fft_cfg st,
28253a5a1b3Sopenharmony_ci        int m,
28353a5a1b3Sopenharmony_ci        int p
28453a5a1b3Sopenharmony_ci        )
28553a5a1b3Sopenharmony_ci{
28653a5a1b3Sopenharmony_ci    int u,k,q1,q;
28753a5a1b3Sopenharmony_ci    kiss_fft_cpx * twiddles = st->twiddles;
28853a5a1b3Sopenharmony_ci    kiss_fft_cpx t;
28953a5a1b3Sopenharmony_ci    kiss_fft_cpx scratchbuf[17];
29053a5a1b3Sopenharmony_ci    int Norig = st->nfft;
29153a5a1b3Sopenharmony_ci
29253a5a1b3Sopenharmony_ci    /*CHECKBUF(scratchbuf,nscratchbuf,p);*/
29353a5a1b3Sopenharmony_ci    if (p>17)
29453a5a1b3Sopenharmony_ci       speex_fatal("KissFFT: max radix supported is 17");
29553a5a1b3Sopenharmony_ci
29653a5a1b3Sopenharmony_ci    for ( u=0; u<m; ++u ) {
29753a5a1b3Sopenharmony_ci        k=u;
29853a5a1b3Sopenharmony_ci        for ( q1=0 ; q1<p ; ++q1 ) {
29953a5a1b3Sopenharmony_ci            scratchbuf[q1] = Fout[ k  ];
30053a5a1b3Sopenharmony_ci        if (!st->inverse) {
30153a5a1b3Sopenharmony_ci            C_FIXDIV(scratchbuf[q1],p);
30253a5a1b3Sopenharmony_ci	}
30353a5a1b3Sopenharmony_ci            k += m;
30453a5a1b3Sopenharmony_ci        }
30553a5a1b3Sopenharmony_ci
30653a5a1b3Sopenharmony_ci        k=u;
30753a5a1b3Sopenharmony_ci        for ( q1=0 ; q1<p ; ++q1 ) {
30853a5a1b3Sopenharmony_ci            int twidx=0;
30953a5a1b3Sopenharmony_ci            Fout[ k ] = scratchbuf[0];
31053a5a1b3Sopenharmony_ci            for (q=1;q<p;++q ) {
31153a5a1b3Sopenharmony_ci                twidx += fstride * k;
31253a5a1b3Sopenharmony_ci                if (twidx>=Norig) twidx-=Norig;
31353a5a1b3Sopenharmony_ci                C_MUL(t,scratchbuf[q] , twiddles[twidx] );
31453a5a1b3Sopenharmony_ci                C_ADDTO( Fout[ k ] ,t);
31553a5a1b3Sopenharmony_ci            }
31653a5a1b3Sopenharmony_ci            k += m;
31753a5a1b3Sopenharmony_ci        }
31853a5a1b3Sopenharmony_ci    }
31953a5a1b3Sopenharmony_ci}
32053a5a1b3Sopenharmony_ci
32153a5a1b3Sopenharmony_cistatic
32253a5a1b3Sopenharmony_civoid kf_shuffle(
32353a5a1b3Sopenharmony_ci         kiss_fft_cpx * Fout,
32453a5a1b3Sopenharmony_ci         const kiss_fft_cpx * f,
32553a5a1b3Sopenharmony_ci         const size_t fstride,
32653a5a1b3Sopenharmony_ci         int in_stride,
32753a5a1b3Sopenharmony_ci         int * factors,
32853a5a1b3Sopenharmony_ci         const kiss_fft_cfg st
32953a5a1b3Sopenharmony_ci            )
33053a5a1b3Sopenharmony_ci{
33153a5a1b3Sopenharmony_ci   const int p=*factors++; /* the radix  */
33253a5a1b3Sopenharmony_ci   const int m=*factors++; /* stage's fft length/p */
33353a5a1b3Sopenharmony_ci
33453a5a1b3Sopenharmony_ci    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
33553a5a1b3Sopenharmony_ci   if (m==1)
33653a5a1b3Sopenharmony_ci   {
33753a5a1b3Sopenharmony_ci      int j;
33853a5a1b3Sopenharmony_ci      for (j=0;j<p;j++)
33953a5a1b3Sopenharmony_ci      {
34053a5a1b3Sopenharmony_ci         Fout[j] = *f;
34153a5a1b3Sopenharmony_ci         f += fstride*in_stride;
34253a5a1b3Sopenharmony_ci      }
34353a5a1b3Sopenharmony_ci   } else {
34453a5a1b3Sopenharmony_ci      int j;
34553a5a1b3Sopenharmony_ci      for (j=0;j<p;j++)
34653a5a1b3Sopenharmony_ci      {
34753a5a1b3Sopenharmony_ci         kf_shuffle( Fout , f, fstride*p, in_stride, factors,st);
34853a5a1b3Sopenharmony_ci         f += fstride*in_stride;
34953a5a1b3Sopenharmony_ci         Fout += m;
35053a5a1b3Sopenharmony_ci      }
35153a5a1b3Sopenharmony_ci   }
35253a5a1b3Sopenharmony_ci}
35353a5a1b3Sopenharmony_ci
35453a5a1b3Sopenharmony_cistatic
35553a5a1b3Sopenharmony_civoid kf_work(
35653a5a1b3Sopenharmony_ci        kiss_fft_cpx * Fout,
35753a5a1b3Sopenharmony_ci        const kiss_fft_cpx * f,
35853a5a1b3Sopenharmony_ci        const size_t fstride,
35953a5a1b3Sopenharmony_ci        int in_stride,
36053a5a1b3Sopenharmony_ci        int * factors,
36153a5a1b3Sopenharmony_ci        const kiss_fft_cfg st,
36253a5a1b3Sopenharmony_ci        int N,
36353a5a1b3Sopenharmony_ci        int s2,
36453a5a1b3Sopenharmony_ci        int m2
36553a5a1b3Sopenharmony_ci        )
36653a5a1b3Sopenharmony_ci{
36753a5a1b3Sopenharmony_ci   int i;
36853a5a1b3Sopenharmony_ci    kiss_fft_cpx * Fout_beg=Fout;
36953a5a1b3Sopenharmony_ci    const int p=*factors++; /* the radix  */
37053a5a1b3Sopenharmony_ci    const int m=*factors++; /* stage's fft length/p */
37153a5a1b3Sopenharmony_ci#if 0
37253a5a1b3Sopenharmony_ci    /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
37353a5a1b3Sopenharmony_ci    if (m==1)
37453a5a1b3Sopenharmony_ci    {
37553a5a1b3Sopenharmony_ci    /*   int j;
37653a5a1b3Sopenharmony_ci       for (j=0;j<p;j++)
37753a5a1b3Sopenharmony_ci       {
37853a5a1b3Sopenharmony_ci          Fout[j] = *f;
37953a5a1b3Sopenharmony_ci          f += fstride*in_stride;
38053a5a1b3Sopenharmony_ci       }*/
38153a5a1b3Sopenharmony_ci    } else {
38253a5a1b3Sopenharmony_ci       int j;
38353a5a1b3Sopenharmony_ci       for (j=0;j<p;j++)
38453a5a1b3Sopenharmony_ci       {
38553a5a1b3Sopenharmony_ci          kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
38653a5a1b3Sopenharmony_ci          f += fstride*in_stride;
38753a5a1b3Sopenharmony_ci          Fout += m;
38853a5a1b3Sopenharmony_ci       }
38953a5a1b3Sopenharmony_ci    }
39053a5a1b3Sopenharmony_ci
39153a5a1b3Sopenharmony_ci    Fout=Fout_beg;
39253a5a1b3Sopenharmony_ci
39353a5a1b3Sopenharmony_ci    switch (p) {
39453a5a1b3Sopenharmony_ci        case 2: kf_bfly2(Fout,fstride,st,m); break;
39553a5a1b3Sopenharmony_ci        case 3: kf_bfly3(Fout,fstride,st,m); break;
39653a5a1b3Sopenharmony_ci        case 4: kf_bfly4(Fout,fstride,st,m); break;
39753a5a1b3Sopenharmony_ci        case 5: kf_bfly5(Fout,fstride,st,m); break;
39853a5a1b3Sopenharmony_ci        default: kf_bfly_generic(Fout,fstride,st,m,p); break;
39953a5a1b3Sopenharmony_ci    }
40053a5a1b3Sopenharmony_ci#else
40153a5a1b3Sopenharmony_ci    /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
40253a5a1b3Sopenharmony_ci    if (m==1)
40353a5a1b3Sopenharmony_ci    {
40453a5a1b3Sopenharmony_ci       /*for (i=0;i<N;i++)
40553a5a1b3Sopenharmony_ci       {
40653a5a1b3Sopenharmony_ci          int j;
40753a5a1b3Sopenharmony_ci          Fout = Fout_beg+i*m2;
40853a5a1b3Sopenharmony_ci          const kiss_fft_cpx * f2 = f+i*s2;
40953a5a1b3Sopenharmony_ci          for (j=0;j<p;j++)
41053a5a1b3Sopenharmony_ci          {
41153a5a1b3Sopenharmony_ci             *Fout++ = *f2;
41253a5a1b3Sopenharmony_ci             f2 += fstride*in_stride;
41353a5a1b3Sopenharmony_ci          }
41453a5a1b3Sopenharmony_ci       }*/
41553a5a1b3Sopenharmony_ci    }else{
41653a5a1b3Sopenharmony_ci       kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
41753a5a1b3Sopenharmony_ci    }
41853a5a1b3Sopenharmony_ci
41953a5a1b3Sopenharmony_ci
42053a5a1b3Sopenharmony_ci
42153a5a1b3Sopenharmony_ci
42253a5a1b3Sopenharmony_ci       switch (p) {
42353a5a1b3Sopenharmony_ci          case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
42453a5a1b3Sopenharmony_ci          case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break;
42553a5a1b3Sopenharmony_ci          case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
42653a5a1b3Sopenharmony_ci          case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break;
42753a5a1b3Sopenharmony_ci          default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break;
42853a5a1b3Sopenharmony_ci    }
42953a5a1b3Sopenharmony_ci#endif
43053a5a1b3Sopenharmony_ci}
43153a5a1b3Sopenharmony_ci
43253a5a1b3Sopenharmony_ci/*  facbuf is populated by p1,m1,p2,m2, ...
43353a5a1b3Sopenharmony_ci    where
43453a5a1b3Sopenharmony_ci    p[i] * m[i] = m[i-1]
43553a5a1b3Sopenharmony_ci    m0 = n                  */
43653a5a1b3Sopenharmony_cistatic
43753a5a1b3Sopenharmony_civoid kf_factor(int n,int * facbuf)
43853a5a1b3Sopenharmony_ci{
43953a5a1b3Sopenharmony_ci    int p=4;
44053a5a1b3Sopenharmony_ci
44153a5a1b3Sopenharmony_ci    /*factor out powers of 4, powers of 2, then any remaining primes */
44253a5a1b3Sopenharmony_ci    do {
44353a5a1b3Sopenharmony_ci        while (n % p) {
44453a5a1b3Sopenharmony_ci            switch (p) {
44553a5a1b3Sopenharmony_ci                case 4: p = 2; break;
44653a5a1b3Sopenharmony_ci                case 2: p = 3; break;
44753a5a1b3Sopenharmony_ci                default: p += 2; break;
44853a5a1b3Sopenharmony_ci            }
44953a5a1b3Sopenharmony_ci            if (p>32000 || (spx_int32_t)p*(spx_int32_t)p > n)
45053a5a1b3Sopenharmony_ci                p = n;          /* no more factors, skip to end */
45153a5a1b3Sopenharmony_ci        }
45253a5a1b3Sopenharmony_ci        n /= p;
45353a5a1b3Sopenharmony_ci        *facbuf++ = p;
45453a5a1b3Sopenharmony_ci        *facbuf++ = n;
45553a5a1b3Sopenharmony_ci    } while (n > 1);
45653a5a1b3Sopenharmony_ci}
45753a5a1b3Sopenharmony_ci/*
45853a5a1b3Sopenharmony_ci *
45953a5a1b3Sopenharmony_ci * User-callable function to allocate all necessary storage space for the fft.
46053a5a1b3Sopenharmony_ci *
46153a5a1b3Sopenharmony_ci * The return value is a contiguous block of memory, allocated with malloc.  As such,
46253a5a1b3Sopenharmony_ci * It can be freed with free(), rather than a kiss_fft-specific function.
46353a5a1b3Sopenharmony_ci * */
46453a5a1b3Sopenharmony_cikiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
46553a5a1b3Sopenharmony_ci{
46653a5a1b3Sopenharmony_ci    kiss_fft_cfg st=NULL;
46753a5a1b3Sopenharmony_ci    size_t memneeded = sizeof(struct kiss_fft_state)
46853a5a1b3Sopenharmony_ci        + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
46953a5a1b3Sopenharmony_ci
47053a5a1b3Sopenharmony_ci    if ( lenmem==NULL ) {
47153a5a1b3Sopenharmony_ci        st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
47253a5a1b3Sopenharmony_ci    }else{
47353a5a1b3Sopenharmony_ci        if (mem != NULL && *lenmem >= memneeded)
47453a5a1b3Sopenharmony_ci            st = (kiss_fft_cfg)mem;
47553a5a1b3Sopenharmony_ci        *lenmem = memneeded;
47653a5a1b3Sopenharmony_ci    }
47753a5a1b3Sopenharmony_ci    if (st) {
47853a5a1b3Sopenharmony_ci        int i;
47953a5a1b3Sopenharmony_ci        st->nfft=nfft;
48053a5a1b3Sopenharmony_ci        st->inverse = inverse_fft;
48153a5a1b3Sopenharmony_ci#ifdef FIXED_POINT
48253a5a1b3Sopenharmony_ci        for (i=0;i<nfft;++i) {
48353a5a1b3Sopenharmony_ci            spx_word32_t phase = i;
48453a5a1b3Sopenharmony_ci            if (!st->inverse)
48553a5a1b3Sopenharmony_ci                phase = -phase;
48653a5a1b3Sopenharmony_ci            kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
48753a5a1b3Sopenharmony_ci        }
48853a5a1b3Sopenharmony_ci#else
48953a5a1b3Sopenharmony_ci        for (i=0;i<nfft;++i) {
49053a5a1b3Sopenharmony_ci           const double pi=3.14159265358979323846264338327;
49153a5a1b3Sopenharmony_ci           double phase = ( -2*pi /nfft ) * i;
49253a5a1b3Sopenharmony_ci           if (st->inverse)
49353a5a1b3Sopenharmony_ci              phase *= -1;
49453a5a1b3Sopenharmony_ci           kf_cexp(st->twiddles+i, phase );
49553a5a1b3Sopenharmony_ci        }
49653a5a1b3Sopenharmony_ci#endif
49753a5a1b3Sopenharmony_ci        kf_factor(nfft,st->factors);
49853a5a1b3Sopenharmony_ci    }
49953a5a1b3Sopenharmony_ci    return st;
50053a5a1b3Sopenharmony_ci}
50153a5a1b3Sopenharmony_ci
50253a5a1b3Sopenharmony_ci
50353a5a1b3Sopenharmony_ci
50453a5a1b3Sopenharmony_ci
50553a5a1b3Sopenharmony_civoid kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
50653a5a1b3Sopenharmony_ci{
50753a5a1b3Sopenharmony_ci    if (fin == fout)
50853a5a1b3Sopenharmony_ci    {
50953a5a1b3Sopenharmony_ci       speex_fatal("In-place FFT not supported");
51053a5a1b3Sopenharmony_ci       /*CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
51153a5a1b3Sopenharmony_ci       kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
51253a5a1b3Sopenharmony_ci       SPEEX_MOVE(fout,tmpbuf,st->nfft);*/
51353a5a1b3Sopenharmony_ci    } else {
51453a5a1b3Sopenharmony_ci       kf_shuffle( fout, fin, 1,in_stride, st->factors,st);
51553a5a1b3Sopenharmony_ci       kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
51653a5a1b3Sopenharmony_ci    }
51753a5a1b3Sopenharmony_ci}
51853a5a1b3Sopenharmony_ci
51953a5a1b3Sopenharmony_civoid kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
52053a5a1b3Sopenharmony_ci{
52153a5a1b3Sopenharmony_ci    kiss_fft_stride(cfg,fin,fout,1);
52253a5a1b3Sopenharmony_ci}
52353a5a1b3Sopenharmony_ci
524