153a5a1b3Sopenharmony_ci/* 253a5a1b3Sopenharmony_ciCopyright (c) 2003-2004, Mark Borgerding 353a5a1b3Sopenharmony_ciCopyright (c) 2005-2007, Jean-Marc Valin 453a5a1b3Sopenharmony_ci 553a5a1b3Sopenharmony_ciAll rights reserved. 653a5a1b3Sopenharmony_ci 753a5a1b3Sopenharmony_ciRedistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 853a5a1b3Sopenharmony_ci 953a5a1b3Sopenharmony_ci * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 1053a5a1b3Sopenharmony_ci * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 1153a5a1b3Sopenharmony_ci * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. 1253a5a1b3Sopenharmony_ci 1353a5a1b3Sopenharmony_ciTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1453a5a1b3Sopenharmony_ci*/ 1553a5a1b3Sopenharmony_ci 1653a5a1b3Sopenharmony_ci 1753a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H 1853a5a1b3Sopenharmony_ci#include "config.h" 1953a5a1b3Sopenharmony_ci#endif 2053a5a1b3Sopenharmony_ci 2153a5a1b3Sopenharmony_ci#include "_kiss_fft_guts.h" 2253a5a1b3Sopenharmony_ci#include "arch.h" 2353a5a1b3Sopenharmony_ci#include "os_support.h" 2453a5a1b3Sopenharmony_ci 2553a5a1b3Sopenharmony_ci/* The guts header contains all the multiplication and addition macros that are defined for 2653a5a1b3Sopenharmony_ci fixed or floating point complex numbers. It also delares the kf_ internal functions. 2753a5a1b3Sopenharmony_ci */ 2853a5a1b3Sopenharmony_ci 2953a5a1b3Sopenharmony_cistatic void kf_bfly2( 3053a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout, 3153a5a1b3Sopenharmony_ci const size_t fstride, 3253a5a1b3Sopenharmony_ci const kiss_fft_cfg st, 3353a5a1b3Sopenharmony_ci int m, 3453a5a1b3Sopenharmony_ci int N, 3553a5a1b3Sopenharmony_ci int mm 3653a5a1b3Sopenharmony_ci ) 3753a5a1b3Sopenharmony_ci{ 3853a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout2; 3953a5a1b3Sopenharmony_ci kiss_fft_cpx * tw1; 4053a5a1b3Sopenharmony_ci kiss_fft_cpx t; 4153a5a1b3Sopenharmony_ci if (!st->inverse) { 4253a5a1b3Sopenharmony_ci int i,j; 4353a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout_beg = Fout; 4453a5a1b3Sopenharmony_ci for (i=0;i<N;i++) 4553a5a1b3Sopenharmony_ci { 4653a5a1b3Sopenharmony_ci Fout = Fout_beg + i*mm; 4753a5a1b3Sopenharmony_ci Fout2 = Fout + m; 4853a5a1b3Sopenharmony_ci tw1 = st->twiddles; 4953a5a1b3Sopenharmony_ci for(j=0;j<m;j++) 5053a5a1b3Sopenharmony_ci { 5153a5a1b3Sopenharmony_ci /* Almost the same as the code path below, except that we divide the input by two 5253a5a1b3Sopenharmony_ci (while keeping the best accuracy possible) */ 5353a5a1b3Sopenharmony_ci spx_word32_t tr, ti; 5453a5a1b3Sopenharmony_ci tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1); 5553a5a1b3Sopenharmony_ci ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1); 5653a5a1b3Sopenharmony_ci tw1 += fstride; 5753a5a1b3Sopenharmony_ci Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15); 5853a5a1b3Sopenharmony_ci Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15); 5953a5a1b3Sopenharmony_ci Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15); 6053a5a1b3Sopenharmony_ci Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15); 6153a5a1b3Sopenharmony_ci ++Fout2; 6253a5a1b3Sopenharmony_ci ++Fout; 6353a5a1b3Sopenharmony_ci } 6453a5a1b3Sopenharmony_ci } 6553a5a1b3Sopenharmony_ci } else { 6653a5a1b3Sopenharmony_ci int i,j; 6753a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout_beg = Fout; 6853a5a1b3Sopenharmony_ci for (i=0;i<N;i++) 6953a5a1b3Sopenharmony_ci { 7053a5a1b3Sopenharmony_ci Fout = Fout_beg + i*mm; 7153a5a1b3Sopenharmony_ci Fout2 = Fout + m; 7253a5a1b3Sopenharmony_ci tw1 = st->twiddles; 7353a5a1b3Sopenharmony_ci for(j=0;j<m;j++) 7453a5a1b3Sopenharmony_ci { 7553a5a1b3Sopenharmony_ci C_MUL (t, *Fout2 , *tw1); 7653a5a1b3Sopenharmony_ci tw1 += fstride; 7753a5a1b3Sopenharmony_ci C_SUB( *Fout2 , *Fout , t ); 7853a5a1b3Sopenharmony_ci C_ADDTO( *Fout , t ); 7953a5a1b3Sopenharmony_ci ++Fout2; 8053a5a1b3Sopenharmony_ci ++Fout; 8153a5a1b3Sopenharmony_ci } 8253a5a1b3Sopenharmony_ci } 8353a5a1b3Sopenharmony_ci } 8453a5a1b3Sopenharmony_ci} 8553a5a1b3Sopenharmony_ci 8653a5a1b3Sopenharmony_cistatic void kf_bfly4( 8753a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout, 8853a5a1b3Sopenharmony_ci const size_t fstride, 8953a5a1b3Sopenharmony_ci const kiss_fft_cfg st, 9053a5a1b3Sopenharmony_ci int m, 9153a5a1b3Sopenharmony_ci int N, 9253a5a1b3Sopenharmony_ci int mm 9353a5a1b3Sopenharmony_ci ) 9453a5a1b3Sopenharmony_ci{ 9553a5a1b3Sopenharmony_ci kiss_fft_cpx *tw1,*tw2,*tw3; 9653a5a1b3Sopenharmony_ci kiss_fft_cpx scratch[6]; 9753a5a1b3Sopenharmony_ci const size_t m2=2*m; 9853a5a1b3Sopenharmony_ci const size_t m3=3*m; 9953a5a1b3Sopenharmony_ci int i, j; 10053a5a1b3Sopenharmony_ci 10153a5a1b3Sopenharmony_ci if (st->inverse) 10253a5a1b3Sopenharmony_ci { 10353a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout_beg = Fout; 10453a5a1b3Sopenharmony_ci for (i=0;i<N;i++) 10553a5a1b3Sopenharmony_ci { 10653a5a1b3Sopenharmony_ci Fout = Fout_beg + i*mm; 10753a5a1b3Sopenharmony_ci tw3 = tw2 = tw1 = st->twiddles; 10853a5a1b3Sopenharmony_ci for (j=0;j<m;j++) 10953a5a1b3Sopenharmony_ci { 11053a5a1b3Sopenharmony_ci C_MUL(scratch[0],Fout[m] , *tw1 ); 11153a5a1b3Sopenharmony_ci C_MUL(scratch[1],Fout[m2] , *tw2 ); 11253a5a1b3Sopenharmony_ci C_MUL(scratch[2],Fout[m3] , *tw3 ); 11353a5a1b3Sopenharmony_ci 11453a5a1b3Sopenharmony_ci C_SUB( scratch[5] , *Fout, scratch[1] ); 11553a5a1b3Sopenharmony_ci C_ADDTO(*Fout, scratch[1]); 11653a5a1b3Sopenharmony_ci C_ADD( scratch[3] , scratch[0] , scratch[2] ); 11753a5a1b3Sopenharmony_ci C_SUB( scratch[4] , scratch[0] , scratch[2] ); 11853a5a1b3Sopenharmony_ci C_SUB( Fout[m2], *Fout, scratch[3] ); 11953a5a1b3Sopenharmony_ci tw1 += fstride; 12053a5a1b3Sopenharmony_ci tw2 += fstride*2; 12153a5a1b3Sopenharmony_ci tw3 += fstride*3; 12253a5a1b3Sopenharmony_ci C_ADDTO( *Fout , scratch[3] ); 12353a5a1b3Sopenharmony_ci 12453a5a1b3Sopenharmony_ci Fout[m].r = scratch[5].r - scratch[4].i; 12553a5a1b3Sopenharmony_ci Fout[m].i = scratch[5].i + scratch[4].r; 12653a5a1b3Sopenharmony_ci Fout[m3].r = scratch[5].r + scratch[4].i; 12753a5a1b3Sopenharmony_ci Fout[m3].i = scratch[5].i - scratch[4].r; 12853a5a1b3Sopenharmony_ci ++Fout; 12953a5a1b3Sopenharmony_ci } 13053a5a1b3Sopenharmony_ci } 13153a5a1b3Sopenharmony_ci } else 13253a5a1b3Sopenharmony_ci { 13353a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout_beg = Fout; 13453a5a1b3Sopenharmony_ci for (i=0;i<N;i++) 13553a5a1b3Sopenharmony_ci { 13653a5a1b3Sopenharmony_ci Fout = Fout_beg + i*mm; 13753a5a1b3Sopenharmony_ci tw3 = tw2 = tw1 = st->twiddles; 13853a5a1b3Sopenharmony_ci for (j=0;j<m;j++) 13953a5a1b3Sopenharmony_ci { 14053a5a1b3Sopenharmony_ci C_MUL4(scratch[0],Fout[m] , *tw1 ); 14153a5a1b3Sopenharmony_ci C_MUL4(scratch[1],Fout[m2] , *tw2 ); 14253a5a1b3Sopenharmony_ci C_MUL4(scratch[2],Fout[m3] , *tw3 ); 14353a5a1b3Sopenharmony_ci 14453a5a1b3Sopenharmony_ci Fout->r = PSHR16(Fout->r, 2); 14553a5a1b3Sopenharmony_ci Fout->i = PSHR16(Fout->i, 2); 14653a5a1b3Sopenharmony_ci C_SUB( scratch[5] , *Fout, scratch[1] ); 14753a5a1b3Sopenharmony_ci C_ADDTO(*Fout, scratch[1]); 14853a5a1b3Sopenharmony_ci C_ADD( scratch[3] , scratch[0] , scratch[2] ); 14953a5a1b3Sopenharmony_ci C_SUB( scratch[4] , scratch[0] , scratch[2] ); 15053a5a1b3Sopenharmony_ci Fout[m2].r = PSHR16(Fout[m2].r, 2); 15153a5a1b3Sopenharmony_ci Fout[m2].i = PSHR16(Fout[m2].i, 2); 15253a5a1b3Sopenharmony_ci C_SUB( Fout[m2], *Fout, scratch[3] ); 15353a5a1b3Sopenharmony_ci tw1 += fstride; 15453a5a1b3Sopenharmony_ci tw2 += fstride*2; 15553a5a1b3Sopenharmony_ci tw3 += fstride*3; 15653a5a1b3Sopenharmony_ci C_ADDTO( *Fout , scratch[3] ); 15753a5a1b3Sopenharmony_ci 15853a5a1b3Sopenharmony_ci Fout[m].r = scratch[5].r + scratch[4].i; 15953a5a1b3Sopenharmony_ci Fout[m].i = scratch[5].i - scratch[4].r; 16053a5a1b3Sopenharmony_ci Fout[m3].r = scratch[5].r - scratch[4].i; 16153a5a1b3Sopenharmony_ci Fout[m3].i = scratch[5].i + scratch[4].r; 16253a5a1b3Sopenharmony_ci ++Fout; 16353a5a1b3Sopenharmony_ci } 16453a5a1b3Sopenharmony_ci } 16553a5a1b3Sopenharmony_ci } 16653a5a1b3Sopenharmony_ci} 16753a5a1b3Sopenharmony_ci 16853a5a1b3Sopenharmony_cistatic void kf_bfly3( 16953a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout, 17053a5a1b3Sopenharmony_ci const size_t fstride, 17153a5a1b3Sopenharmony_ci const kiss_fft_cfg st, 17253a5a1b3Sopenharmony_ci size_t m 17353a5a1b3Sopenharmony_ci ) 17453a5a1b3Sopenharmony_ci{ 17553a5a1b3Sopenharmony_ci size_t k=m; 17653a5a1b3Sopenharmony_ci const size_t m2 = 2*m; 17753a5a1b3Sopenharmony_ci kiss_fft_cpx *tw1,*tw2; 17853a5a1b3Sopenharmony_ci kiss_fft_cpx scratch[5]; 17953a5a1b3Sopenharmony_ci kiss_fft_cpx epi3; 18053a5a1b3Sopenharmony_ci epi3 = st->twiddles[fstride*m]; 18153a5a1b3Sopenharmony_ci 18253a5a1b3Sopenharmony_ci tw1=tw2=st->twiddles; 18353a5a1b3Sopenharmony_ci 18453a5a1b3Sopenharmony_ci do{ 18553a5a1b3Sopenharmony_ci if (!st->inverse) { 18653a5a1b3Sopenharmony_ci C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); 18753a5a1b3Sopenharmony_ci } 18853a5a1b3Sopenharmony_ci 18953a5a1b3Sopenharmony_ci C_MUL(scratch[1],Fout[m] , *tw1); 19053a5a1b3Sopenharmony_ci C_MUL(scratch[2],Fout[m2] , *tw2); 19153a5a1b3Sopenharmony_ci 19253a5a1b3Sopenharmony_ci C_ADD(scratch[3],scratch[1],scratch[2]); 19353a5a1b3Sopenharmony_ci C_SUB(scratch[0],scratch[1],scratch[2]); 19453a5a1b3Sopenharmony_ci tw1 += fstride; 19553a5a1b3Sopenharmony_ci tw2 += fstride*2; 19653a5a1b3Sopenharmony_ci 19753a5a1b3Sopenharmony_ci Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 19853a5a1b3Sopenharmony_ci Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 19953a5a1b3Sopenharmony_ci 20053a5a1b3Sopenharmony_ci C_MULBYSCALAR( scratch[0] , epi3.i ); 20153a5a1b3Sopenharmony_ci 20253a5a1b3Sopenharmony_ci C_ADDTO(*Fout,scratch[3]); 20353a5a1b3Sopenharmony_ci 20453a5a1b3Sopenharmony_ci Fout[m2].r = Fout[m].r + scratch[0].i; 20553a5a1b3Sopenharmony_ci Fout[m2].i = Fout[m].i - scratch[0].r; 20653a5a1b3Sopenharmony_ci 20753a5a1b3Sopenharmony_ci Fout[m].r -= scratch[0].i; 20853a5a1b3Sopenharmony_ci Fout[m].i += scratch[0].r; 20953a5a1b3Sopenharmony_ci 21053a5a1b3Sopenharmony_ci ++Fout; 21153a5a1b3Sopenharmony_ci }while(--k); 21253a5a1b3Sopenharmony_ci} 21353a5a1b3Sopenharmony_ci 21453a5a1b3Sopenharmony_cistatic void kf_bfly5( 21553a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout, 21653a5a1b3Sopenharmony_ci const size_t fstride, 21753a5a1b3Sopenharmony_ci const kiss_fft_cfg st, 21853a5a1b3Sopenharmony_ci int m 21953a5a1b3Sopenharmony_ci ) 22053a5a1b3Sopenharmony_ci{ 22153a5a1b3Sopenharmony_ci kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 22253a5a1b3Sopenharmony_ci int u; 22353a5a1b3Sopenharmony_ci kiss_fft_cpx scratch[13]; 22453a5a1b3Sopenharmony_ci kiss_fft_cpx * twiddles = st->twiddles; 22553a5a1b3Sopenharmony_ci kiss_fft_cpx *tw; 22653a5a1b3Sopenharmony_ci kiss_fft_cpx ya,yb; 22753a5a1b3Sopenharmony_ci ya = twiddles[fstride*m]; 22853a5a1b3Sopenharmony_ci yb = twiddles[fstride*2*m]; 22953a5a1b3Sopenharmony_ci 23053a5a1b3Sopenharmony_ci Fout0=Fout; 23153a5a1b3Sopenharmony_ci Fout1=Fout0+m; 23253a5a1b3Sopenharmony_ci Fout2=Fout0+2*m; 23353a5a1b3Sopenharmony_ci Fout3=Fout0+3*m; 23453a5a1b3Sopenharmony_ci Fout4=Fout0+4*m; 23553a5a1b3Sopenharmony_ci 23653a5a1b3Sopenharmony_ci tw=st->twiddles; 23753a5a1b3Sopenharmony_ci for ( u=0; u<m; ++u ) { 23853a5a1b3Sopenharmony_ci if (!st->inverse) { 23953a5a1b3Sopenharmony_ci C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); 24053a5a1b3Sopenharmony_ci } 24153a5a1b3Sopenharmony_ci scratch[0] = *Fout0; 24253a5a1b3Sopenharmony_ci 24353a5a1b3Sopenharmony_ci C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 24453a5a1b3Sopenharmony_ci C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 24553a5a1b3Sopenharmony_ci C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); 24653a5a1b3Sopenharmony_ci C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); 24753a5a1b3Sopenharmony_ci 24853a5a1b3Sopenharmony_ci C_ADD( scratch[7],scratch[1],scratch[4]); 24953a5a1b3Sopenharmony_ci C_SUB( scratch[10],scratch[1],scratch[4]); 25053a5a1b3Sopenharmony_ci C_ADD( scratch[8],scratch[2],scratch[3]); 25153a5a1b3Sopenharmony_ci C_SUB( scratch[9],scratch[2],scratch[3]); 25253a5a1b3Sopenharmony_ci 25353a5a1b3Sopenharmony_ci Fout0->r += scratch[7].r + scratch[8].r; 25453a5a1b3Sopenharmony_ci Fout0->i += scratch[7].i + scratch[8].i; 25553a5a1b3Sopenharmony_ci 25653a5a1b3Sopenharmony_ci scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); 25753a5a1b3Sopenharmony_ci scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); 25853a5a1b3Sopenharmony_ci 25953a5a1b3Sopenharmony_ci scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); 26053a5a1b3Sopenharmony_ci scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); 26153a5a1b3Sopenharmony_ci 26253a5a1b3Sopenharmony_ci C_SUB(*Fout1,scratch[5],scratch[6]); 26353a5a1b3Sopenharmony_ci C_ADD(*Fout4,scratch[5],scratch[6]); 26453a5a1b3Sopenharmony_ci 26553a5a1b3Sopenharmony_ci scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); 26653a5a1b3Sopenharmony_ci scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); 26753a5a1b3Sopenharmony_ci scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); 26853a5a1b3Sopenharmony_ci scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); 26953a5a1b3Sopenharmony_ci 27053a5a1b3Sopenharmony_ci C_ADD(*Fout2,scratch[11],scratch[12]); 27153a5a1b3Sopenharmony_ci C_SUB(*Fout3,scratch[11],scratch[12]); 27253a5a1b3Sopenharmony_ci 27353a5a1b3Sopenharmony_ci ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 27453a5a1b3Sopenharmony_ci } 27553a5a1b3Sopenharmony_ci} 27653a5a1b3Sopenharmony_ci 27753a5a1b3Sopenharmony_ci/* perform the butterfly for one stage of a mixed radix FFT */ 27853a5a1b3Sopenharmony_cistatic void kf_bfly_generic( 27953a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout, 28053a5a1b3Sopenharmony_ci const size_t fstride, 28153a5a1b3Sopenharmony_ci const kiss_fft_cfg st, 28253a5a1b3Sopenharmony_ci int m, 28353a5a1b3Sopenharmony_ci int p 28453a5a1b3Sopenharmony_ci ) 28553a5a1b3Sopenharmony_ci{ 28653a5a1b3Sopenharmony_ci int u,k,q1,q; 28753a5a1b3Sopenharmony_ci kiss_fft_cpx * twiddles = st->twiddles; 28853a5a1b3Sopenharmony_ci kiss_fft_cpx t; 28953a5a1b3Sopenharmony_ci kiss_fft_cpx scratchbuf[17]; 29053a5a1b3Sopenharmony_ci int Norig = st->nfft; 29153a5a1b3Sopenharmony_ci 29253a5a1b3Sopenharmony_ci /*CHECKBUF(scratchbuf,nscratchbuf,p);*/ 29353a5a1b3Sopenharmony_ci if (p>17) 29453a5a1b3Sopenharmony_ci speex_fatal("KissFFT: max radix supported is 17"); 29553a5a1b3Sopenharmony_ci 29653a5a1b3Sopenharmony_ci for ( u=0; u<m; ++u ) { 29753a5a1b3Sopenharmony_ci k=u; 29853a5a1b3Sopenharmony_ci for ( q1=0 ; q1<p ; ++q1 ) { 29953a5a1b3Sopenharmony_ci scratchbuf[q1] = Fout[ k ]; 30053a5a1b3Sopenharmony_ci if (!st->inverse) { 30153a5a1b3Sopenharmony_ci C_FIXDIV(scratchbuf[q1],p); 30253a5a1b3Sopenharmony_ci } 30353a5a1b3Sopenharmony_ci k += m; 30453a5a1b3Sopenharmony_ci } 30553a5a1b3Sopenharmony_ci 30653a5a1b3Sopenharmony_ci k=u; 30753a5a1b3Sopenharmony_ci for ( q1=0 ; q1<p ; ++q1 ) { 30853a5a1b3Sopenharmony_ci int twidx=0; 30953a5a1b3Sopenharmony_ci Fout[ k ] = scratchbuf[0]; 31053a5a1b3Sopenharmony_ci for (q=1;q<p;++q ) { 31153a5a1b3Sopenharmony_ci twidx += fstride * k; 31253a5a1b3Sopenharmony_ci if (twidx>=Norig) twidx-=Norig; 31353a5a1b3Sopenharmony_ci C_MUL(t,scratchbuf[q] , twiddles[twidx] ); 31453a5a1b3Sopenharmony_ci C_ADDTO( Fout[ k ] ,t); 31553a5a1b3Sopenharmony_ci } 31653a5a1b3Sopenharmony_ci k += m; 31753a5a1b3Sopenharmony_ci } 31853a5a1b3Sopenharmony_ci } 31953a5a1b3Sopenharmony_ci} 32053a5a1b3Sopenharmony_ci 32153a5a1b3Sopenharmony_cistatic 32253a5a1b3Sopenharmony_civoid kf_shuffle( 32353a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout, 32453a5a1b3Sopenharmony_ci const kiss_fft_cpx * f, 32553a5a1b3Sopenharmony_ci const size_t fstride, 32653a5a1b3Sopenharmony_ci int in_stride, 32753a5a1b3Sopenharmony_ci int * factors, 32853a5a1b3Sopenharmony_ci const kiss_fft_cfg st 32953a5a1b3Sopenharmony_ci ) 33053a5a1b3Sopenharmony_ci{ 33153a5a1b3Sopenharmony_ci const int p=*factors++; /* the radix */ 33253a5a1b3Sopenharmony_ci const int m=*factors++; /* stage's fft length/p */ 33353a5a1b3Sopenharmony_ci 33453a5a1b3Sopenharmony_ci /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ 33553a5a1b3Sopenharmony_ci if (m==1) 33653a5a1b3Sopenharmony_ci { 33753a5a1b3Sopenharmony_ci int j; 33853a5a1b3Sopenharmony_ci for (j=0;j<p;j++) 33953a5a1b3Sopenharmony_ci { 34053a5a1b3Sopenharmony_ci Fout[j] = *f; 34153a5a1b3Sopenharmony_ci f += fstride*in_stride; 34253a5a1b3Sopenharmony_ci } 34353a5a1b3Sopenharmony_ci } else { 34453a5a1b3Sopenharmony_ci int j; 34553a5a1b3Sopenharmony_ci for (j=0;j<p;j++) 34653a5a1b3Sopenharmony_ci { 34753a5a1b3Sopenharmony_ci kf_shuffle( Fout , f, fstride*p, in_stride, factors,st); 34853a5a1b3Sopenharmony_ci f += fstride*in_stride; 34953a5a1b3Sopenharmony_ci Fout += m; 35053a5a1b3Sopenharmony_ci } 35153a5a1b3Sopenharmony_ci } 35253a5a1b3Sopenharmony_ci} 35353a5a1b3Sopenharmony_ci 35453a5a1b3Sopenharmony_cistatic 35553a5a1b3Sopenharmony_civoid kf_work( 35653a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout, 35753a5a1b3Sopenharmony_ci const kiss_fft_cpx * f, 35853a5a1b3Sopenharmony_ci const size_t fstride, 35953a5a1b3Sopenharmony_ci int in_stride, 36053a5a1b3Sopenharmony_ci int * factors, 36153a5a1b3Sopenharmony_ci const kiss_fft_cfg st, 36253a5a1b3Sopenharmony_ci int N, 36353a5a1b3Sopenharmony_ci int s2, 36453a5a1b3Sopenharmony_ci int m2 36553a5a1b3Sopenharmony_ci ) 36653a5a1b3Sopenharmony_ci{ 36753a5a1b3Sopenharmony_ci int i; 36853a5a1b3Sopenharmony_ci kiss_fft_cpx * Fout_beg=Fout; 36953a5a1b3Sopenharmony_ci const int p=*factors++; /* the radix */ 37053a5a1b3Sopenharmony_ci const int m=*factors++; /* stage's fft length/p */ 37153a5a1b3Sopenharmony_ci#if 0 37253a5a1b3Sopenharmony_ci /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ 37353a5a1b3Sopenharmony_ci if (m==1) 37453a5a1b3Sopenharmony_ci { 37553a5a1b3Sopenharmony_ci /* int j; 37653a5a1b3Sopenharmony_ci for (j=0;j<p;j++) 37753a5a1b3Sopenharmony_ci { 37853a5a1b3Sopenharmony_ci Fout[j] = *f; 37953a5a1b3Sopenharmony_ci f += fstride*in_stride; 38053a5a1b3Sopenharmony_ci }*/ 38153a5a1b3Sopenharmony_ci } else { 38253a5a1b3Sopenharmony_ci int j; 38353a5a1b3Sopenharmony_ci for (j=0;j<p;j++) 38453a5a1b3Sopenharmony_ci { 38553a5a1b3Sopenharmony_ci kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m); 38653a5a1b3Sopenharmony_ci f += fstride*in_stride; 38753a5a1b3Sopenharmony_ci Fout += m; 38853a5a1b3Sopenharmony_ci } 38953a5a1b3Sopenharmony_ci } 39053a5a1b3Sopenharmony_ci 39153a5a1b3Sopenharmony_ci Fout=Fout_beg; 39253a5a1b3Sopenharmony_ci 39353a5a1b3Sopenharmony_ci switch (p) { 39453a5a1b3Sopenharmony_ci case 2: kf_bfly2(Fout,fstride,st,m); break; 39553a5a1b3Sopenharmony_ci case 3: kf_bfly3(Fout,fstride,st,m); break; 39653a5a1b3Sopenharmony_ci case 4: kf_bfly4(Fout,fstride,st,m); break; 39753a5a1b3Sopenharmony_ci case 5: kf_bfly5(Fout,fstride,st,m); break; 39853a5a1b3Sopenharmony_ci default: kf_bfly_generic(Fout,fstride,st,m,p); break; 39953a5a1b3Sopenharmony_ci } 40053a5a1b3Sopenharmony_ci#else 40153a5a1b3Sopenharmony_ci /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/ 40253a5a1b3Sopenharmony_ci if (m==1) 40353a5a1b3Sopenharmony_ci { 40453a5a1b3Sopenharmony_ci /*for (i=0;i<N;i++) 40553a5a1b3Sopenharmony_ci { 40653a5a1b3Sopenharmony_ci int j; 40753a5a1b3Sopenharmony_ci Fout = Fout_beg+i*m2; 40853a5a1b3Sopenharmony_ci const kiss_fft_cpx * f2 = f+i*s2; 40953a5a1b3Sopenharmony_ci for (j=0;j<p;j++) 41053a5a1b3Sopenharmony_ci { 41153a5a1b3Sopenharmony_ci *Fout++ = *f2; 41253a5a1b3Sopenharmony_ci f2 += fstride*in_stride; 41353a5a1b3Sopenharmony_ci } 41453a5a1b3Sopenharmony_ci }*/ 41553a5a1b3Sopenharmony_ci }else{ 41653a5a1b3Sopenharmony_ci kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m); 41753a5a1b3Sopenharmony_ci } 41853a5a1b3Sopenharmony_ci 41953a5a1b3Sopenharmony_ci 42053a5a1b3Sopenharmony_ci 42153a5a1b3Sopenharmony_ci 42253a5a1b3Sopenharmony_ci switch (p) { 42353a5a1b3Sopenharmony_ci case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break; 42453a5a1b3Sopenharmony_ci case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break; 42553a5a1b3Sopenharmony_ci case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break; 42653a5a1b3Sopenharmony_ci case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break; 42753a5a1b3Sopenharmony_ci default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break; 42853a5a1b3Sopenharmony_ci } 42953a5a1b3Sopenharmony_ci#endif 43053a5a1b3Sopenharmony_ci} 43153a5a1b3Sopenharmony_ci 43253a5a1b3Sopenharmony_ci/* facbuf is populated by p1,m1,p2,m2, ... 43353a5a1b3Sopenharmony_ci where 43453a5a1b3Sopenharmony_ci p[i] * m[i] = m[i-1] 43553a5a1b3Sopenharmony_ci m0 = n */ 43653a5a1b3Sopenharmony_cistatic 43753a5a1b3Sopenharmony_civoid kf_factor(int n,int * facbuf) 43853a5a1b3Sopenharmony_ci{ 43953a5a1b3Sopenharmony_ci int p=4; 44053a5a1b3Sopenharmony_ci 44153a5a1b3Sopenharmony_ci /*factor out powers of 4, powers of 2, then any remaining primes */ 44253a5a1b3Sopenharmony_ci do { 44353a5a1b3Sopenharmony_ci while (n % p) { 44453a5a1b3Sopenharmony_ci switch (p) { 44553a5a1b3Sopenharmony_ci case 4: p = 2; break; 44653a5a1b3Sopenharmony_ci case 2: p = 3; break; 44753a5a1b3Sopenharmony_ci default: p += 2; break; 44853a5a1b3Sopenharmony_ci } 44953a5a1b3Sopenharmony_ci if (p>32000 || (spx_int32_t)p*(spx_int32_t)p > n) 45053a5a1b3Sopenharmony_ci p = n; /* no more factors, skip to end */ 45153a5a1b3Sopenharmony_ci } 45253a5a1b3Sopenharmony_ci n /= p; 45353a5a1b3Sopenharmony_ci *facbuf++ = p; 45453a5a1b3Sopenharmony_ci *facbuf++ = n; 45553a5a1b3Sopenharmony_ci } while (n > 1); 45653a5a1b3Sopenharmony_ci} 45753a5a1b3Sopenharmony_ci/* 45853a5a1b3Sopenharmony_ci * 45953a5a1b3Sopenharmony_ci * User-callable function to allocate all necessary storage space for the fft. 46053a5a1b3Sopenharmony_ci * 46153a5a1b3Sopenharmony_ci * The return value is a contiguous block of memory, allocated with malloc. As such, 46253a5a1b3Sopenharmony_ci * It can be freed with free(), rather than a kiss_fft-specific function. 46353a5a1b3Sopenharmony_ci * */ 46453a5a1b3Sopenharmony_cikiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) 46553a5a1b3Sopenharmony_ci{ 46653a5a1b3Sopenharmony_ci kiss_fft_cfg st=NULL; 46753a5a1b3Sopenharmony_ci size_t memneeded = sizeof(struct kiss_fft_state) 46853a5a1b3Sopenharmony_ci + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/ 46953a5a1b3Sopenharmony_ci 47053a5a1b3Sopenharmony_ci if ( lenmem==NULL ) { 47153a5a1b3Sopenharmony_ci st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); 47253a5a1b3Sopenharmony_ci }else{ 47353a5a1b3Sopenharmony_ci if (mem != NULL && *lenmem >= memneeded) 47453a5a1b3Sopenharmony_ci st = (kiss_fft_cfg)mem; 47553a5a1b3Sopenharmony_ci *lenmem = memneeded; 47653a5a1b3Sopenharmony_ci } 47753a5a1b3Sopenharmony_ci if (st) { 47853a5a1b3Sopenharmony_ci int i; 47953a5a1b3Sopenharmony_ci st->nfft=nfft; 48053a5a1b3Sopenharmony_ci st->inverse = inverse_fft; 48153a5a1b3Sopenharmony_ci#ifdef FIXED_POINT 48253a5a1b3Sopenharmony_ci for (i=0;i<nfft;++i) { 48353a5a1b3Sopenharmony_ci spx_word32_t phase = i; 48453a5a1b3Sopenharmony_ci if (!st->inverse) 48553a5a1b3Sopenharmony_ci phase = -phase; 48653a5a1b3Sopenharmony_ci kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft)); 48753a5a1b3Sopenharmony_ci } 48853a5a1b3Sopenharmony_ci#else 48953a5a1b3Sopenharmony_ci for (i=0;i<nfft;++i) { 49053a5a1b3Sopenharmony_ci const double pi=3.14159265358979323846264338327; 49153a5a1b3Sopenharmony_ci double phase = ( -2*pi /nfft ) * i; 49253a5a1b3Sopenharmony_ci if (st->inverse) 49353a5a1b3Sopenharmony_ci phase *= -1; 49453a5a1b3Sopenharmony_ci kf_cexp(st->twiddles+i, phase ); 49553a5a1b3Sopenharmony_ci } 49653a5a1b3Sopenharmony_ci#endif 49753a5a1b3Sopenharmony_ci kf_factor(nfft,st->factors); 49853a5a1b3Sopenharmony_ci } 49953a5a1b3Sopenharmony_ci return st; 50053a5a1b3Sopenharmony_ci} 50153a5a1b3Sopenharmony_ci 50253a5a1b3Sopenharmony_ci 50353a5a1b3Sopenharmony_ci 50453a5a1b3Sopenharmony_ci 50553a5a1b3Sopenharmony_civoid kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) 50653a5a1b3Sopenharmony_ci{ 50753a5a1b3Sopenharmony_ci if (fin == fout) 50853a5a1b3Sopenharmony_ci { 50953a5a1b3Sopenharmony_ci speex_fatal("In-place FFT not supported"); 51053a5a1b3Sopenharmony_ci /*CHECKBUF(tmpbuf,ntmpbuf,st->nfft); 51153a5a1b3Sopenharmony_ci kf_work(tmpbuf,fin,1,in_stride, st->factors,st); 51253a5a1b3Sopenharmony_ci SPEEX_MOVE(fout,tmpbuf,st->nfft);*/ 51353a5a1b3Sopenharmony_ci } else { 51453a5a1b3Sopenharmony_ci kf_shuffle( fout, fin, 1,in_stride, st->factors,st); 51553a5a1b3Sopenharmony_ci kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1); 51653a5a1b3Sopenharmony_ci } 51753a5a1b3Sopenharmony_ci} 51853a5a1b3Sopenharmony_ci 51953a5a1b3Sopenharmony_civoid kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 52053a5a1b3Sopenharmony_ci{ 52153a5a1b3Sopenharmony_ci kiss_fft_stride(cfg,fin,fout,1); 52253a5a1b3Sopenharmony_ci} 52353a5a1b3Sopenharmony_ci 524