1/* 2 * FFT/IFFT transforms 3 * Copyright (c) 2008 Loren Merritt 4 * Copyright (c) 2002 Fabrice Bellard 5 * Partly based on libdjbfft by D. J. Bernstein 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24/** 25 * @file 26 * FFT/IFFT transforms. 27 */ 28 29#include <stdlib.h> 30#include <string.h> 31#include "libavutil/mathematics.h" 32#include "libavutil/thread.h" 33#include "fft.h" 34#include "fft-internal.h" 35 36#if !FFT_FLOAT 37#include "fft_table.h" 38#else /* !FFT_FLOAT */ 39 40/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ 41#if !CONFIG_HARDCODED_TABLES 42COSTABLE(16); 43COSTABLE(32); 44COSTABLE(64); 45COSTABLE(128); 46COSTABLE(256); 47COSTABLE(512); 48COSTABLE(1024); 49COSTABLE(2048); 50COSTABLE(4096); 51COSTABLE(8192); 52COSTABLE(16384); 53COSTABLE(32768); 54COSTABLE(65536); 55COSTABLE(131072); 56 57static av_cold void init_ff_cos_tabs(int index) 58{ 59 int i; 60 int m = 1<<index; 61 double freq = 2*M_PI/m; 62 FFTSample *tab = FFT_NAME(ff_cos_tabs)[index]; 63 for(i=0; i<=m/4; i++) 64 tab[i] = FIX15(cos(i*freq)); 65 for(i=1; i<m/4; i++) 66 tab[m/2-i] = tab[i]; 67} 68 69typedef struct CosTabsInitOnce { 70 void (*func)(void); 71 AVOnce control; 72} CosTabsInitOnce; 73 74#define INIT_FF_COS_TABS_FUNC(index, size) \ 75static av_cold void init_ff_cos_tabs_ ## size (void)\ 76{ \ 77 init_ff_cos_tabs(index); \ 78} 79 80INIT_FF_COS_TABS_FUNC(4, 16) 81INIT_FF_COS_TABS_FUNC(5, 32) 82INIT_FF_COS_TABS_FUNC(6, 64) 83INIT_FF_COS_TABS_FUNC(7, 128) 84INIT_FF_COS_TABS_FUNC(8, 256) 85INIT_FF_COS_TABS_FUNC(9, 512) 86INIT_FF_COS_TABS_FUNC(10, 1024) 87INIT_FF_COS_TABS_FUNC(11, 2048) 88INIT_FF_COS_TABS_FUNC(12, 4096) 89INIT_FF_COS_TABS_FUNC(13, 8192) 90INIT_FF_COS_TABS_FUNC(14, 16384) 91INIT_FF_COS_TABS_FUNC(15, 32768) 92INIT_FF_COS_TABS_FUNC(16, 65536) 93INIT_FF_COS_TABS_FUNC(17, 131072) 94 95static CosTabsInitOnce cos_tabs_init_once[] = { 96 { NULL }, 97 { NULL }, 98 { NULL }, 99 { NULL }, 100 { init_ff_cos_tabs_16, AV_ONCE_INIT }, 101 { init_ff_cos_tabs_32, AV_ONCE_INIT }, 102 { init_ff_cos_tabs_64, AV_ONCE_INIT }, 103 { init_ff_cos_tabs_128, AV_ONCE_INIT }, 104 { init_ff_cos_tabs_256, AV_ONCE_INIT }, 105 { init_ff_cos_tabs_512, AV_ONCE_INIT }, 106 { init_ff_cos_tabs_1024, AV_ONCE_INIT }, 107 { init_ff_cos_tabs_2048, AV_ONCE_INIT }, 108 { init_ff_cos_tabs_4096, AV_ONCE_INIT }, 109 { init_ff_cos_tabs_8192, AV_ONCE_INIT }, 110 { init_ff_cos_tabs_16384, AV_ONCE_INIT }, 111 { init_ff_cos_tabs_32768, AV_ONCE_INIT }, 112 { init_ff_cos_tabs_65536, AV_ONCE_INIT }, 113 { init_ff_cos_tabs_131072, AV_ONCE_INIT }, 114}; 115 116av_cold void ff_init_ff_cos_tabs(int index) 117{ 118 ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func); 119} 120#endif 121COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = { 122 NULL, NULL, NULL, NULL, 123 FFT_NAME(ff_cos_16), 124 FFT_NAME(ff_cos_32), 125 FFT_NAME(ff_cos_64), 126 FFT_NAME(ff_cos_128), 127 FFT_NAME(ff_cos_256), 128 FFT_NAME(ff_cos_512), 129 FFT_NAME(ff_cos_1024), 130 FFT_NAME(ff_cos_2048), 131 FFT_NAME(ff_cos_4096), 132 FFT_NAME(ff_cos_8192), 133 FFT_NAME(ff_cos_16384), 134 FFT_NAME(ff_cos_32768), 135 FFT_NAME(ff_cos_65536), 136 FFT_NAME(ff_cos_131072), 137}; 138 139#endif /* FFT_FLOAT */ 140 141static void fft_permute_c(FFTContext *s, FFTComplex *z); 142static void fft_calc_c(FFTContext *s, FFTComplex *z); 143 144static int split_radix_permutation(int i, int n, int inverse) 145{ 146 int m; 147 if(n <= 2) return i&1; 148 m = n >> 1; 149 if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; 150 m >>= 1; 151 if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; 152 else return split_radix_permutation(i, m, inverse)*4 - 1; 153} 154 155 156static const int avx_tab[] = { 157 0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15 158}; 159 160static int is_second_half_of_fft32(int i, int n) 161{ 162 if (n <= 32) 163 return i >= 16; 164 else if (i < n/2) 165 return is_second_half_of_fft32(i, n/2); 166 else if (i < 3*n/4) 167 return is_second_half_of_fft32(i - n/2, n/4); 168 else 169 return is_second_half_of_fft32(i - 3*n/4, n/4); 170} 171 172static av_cold void fft_perm_avx(FFTContext *s) 173{ 174 int i; 175 int n = 1 << s->nbits; 176 177 for (i = 0; i < n; i += 16) { 178 int k; 179 if (is_second_half_of_fft32(i, n)) { 180 for (k = 0; k < 16; k++) 181 s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = 182 i + avx_tab[k]; 183 184 } else { 185 for (k = 0; k < 16; k++) { 186 int j = i + k; 187 j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4); 188 s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j; 189 } 190 } 191 } 192} 193 194av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) 195{ 196 int i, j, n; 197 198 s->revtab = NULL; 199 s->revtab32 = NULL; 200 201 if (nbits < 2 || nbits > 17) 202 goto fail; 203 s->nbits = nbits; 204 n = 1 << nbits; 205 206 if (nbits <= 16) { 207 s->revtab = av_malloc(n * sizeof(uint16_t)); 208 if (!s->revtab) 209 goto fail; 210 } else { 211 s->revtab32 = av_malloc(n * sizeof(uint32_t)); 212 if (!s->revtab32) 213 goto fail; 214 } 215 s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); 216 if (!s->tmp_buf) 217 goto fail; 218 s->inverse = inverse; 219 s->fft_permutation = FF_FFT_PERM_DEFAULT; 220 221 s->fft_permute = fft_permute_c; 222 s->fft_calc = fft_calc_c; 223#if CONFIG_MDCT 224 s->imdct_calc = ff_imdct_calc_c; 225 s->imdct_half = ff_imdct_half_c; 226 s->mdct_calc = ff_mdct_calc_c; 227#endif 228 229#if FFT_FLOAT 230#if ARCH_AARCH64 231 ff_fft_init_aarch64(s); 232#elif ARCH_ARM 233 ff_fft_init_arm(s); 234#elif ARCH_PPC 235 ff_fft_init_ppc(s); 236#elif ARCH_X86 237 ff_fft_init_x86(s); 238#endif 239#if HAVE_MIPSFPU 240 ff_fft_init_mips(s); 241#endif 242 for(j=4; j<=nbits; j++) { 243 ff_init_ff_cos_tabs(j); 244 } 245#else /* FFT_FLOAT */ 246 ff_fft_lut_init(); 247#endif 248 249 250 if (ARCH_X86 && FFT_FLOAT && s->fft_permutation == FF_FFT_PERM_AVX) { 251 fft_perm_avx(s); 252 } else { 253#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\ 254 for(i = 0; i < n; i++) {\ 255 int k;\ 256 j = i;\ 257 j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\ 258 k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ 259 s->revtab##num[k] = j;\ 260 } \ 261} while(0); 262 263#define PROCESS_FFT_PERM_DEFAULT(num) do {\ 264 for(i = 0; i < n; i++) {\ 265 int k;\ 266 j = i;\ 267 k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ 268 s->revtab##num[k] = j;\ 269 } \ 270} while(0); 271 272#define SPLIT_RADIX_PERMUTATION(num) do { \ 273 if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\ 274 PROCESS_FFT_PERM_SWAP_LSBS(num) \ 275 } else {\ 276 PROCESS_FFT_PERM_DEFAULT(num) \ 277 }\ 278} while(0); 279 280 if (s->revtab) 281 SPLIT_RADIX_PERMUTATION() 282 if (s->revtab32) 283 SPLIT_RADIX_PERMUTATION(32) 284 285#undef PROCESS_FFT_PERM_DEFAULT 286#undef PROCESS_FFT_PERM_SWAP_LSBS 287#undef SPLIT_RADIX_PERMUTATION 288 } 289 290 return 0; 291 fail: 292 av_freep(&s->revtab); 293 av_freep(&s->revtab32); 294 av_freep(&s->tmp_buf); 295 return -1; 296} 297 298static void fft_permute_c(FFTContext *s, FFTComplex *z) 299{ 300 int j, np; 301 const uint16_t *revtab = s->revtab; 302 const uint32_t *revtab32 = s->revtab32; 303 np = 1 << s->nbits; 304 /* TODO: handle split-radix permute in a more optimal way, probably in-place */ 305 if (revtab) { 306 for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j]; 307 } else 308 for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j]; 309 310 memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); 311} 312 313av_cold void ff_fft_end(FFTContext *s) 314{ 315 av_freep(&s->revtab); 316 av_freep(&s->revtab32); 317 av_freep(&s->tmp_buf); 318} 319 320#if !FFT_FLOAT 321 322static void fft_calc_c(FFTContext *s, FFTComplex *z) { 323 324 int nbits, i, n, num_transforms, offset, step; 325 int n4, n2, n34; 326 unsigned tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; 327 FFTComplex *tmpz; 328 const int fft_size = (1 << s->nbits); 329 int64_t accu; 330 331 num_transforms = (0x2aab >> (16 - s->nbits)) | 1; 332 333 for (n=0; n<num_transforms; n++){ 334 offset = ff_fft_offsets_lut[n] << 2; 335 tmpz = z + offset; 336 337 tmp1 = tmpz[0].re + (unsigned)tmpz[1].re; 338 tmp5 = tmpz[2].re + (unsigned)tmpz[3].re; 339 tmp2 = tmpz[0].im + (unsigned)tmpz[1].im; 340 tmp6 = tmpz[2].im + (unsigned)tmpz[3].im; 341 tmp3 = tmpz[0].re - (unsigned)tmpz[1].re; 342 tmp8 = tmpz[2].im - (unsigned)tmpz[3].im; 343 tmp4 = tmpz[0].im - (unsigned)tmpz[1].im; 344 tmp7 = tmpz[2].re - (unsigned)tmpz[3].re; 345 346 tmpz[0].re = tmp1 + tmp5; 347 tmpz[2].re = tmp1 - tmp5; 348 tmpz[0].im = tmp2 + tmp6; 349 tmpz[2].im = tmp2 - tmp6; 350 tmpz[1].re = tmp3 + tmp8; 351 tmpz[3].re = tmp3 - tmp8; 352 tmpz[1].im = tmp4 - tmp7; 353 tmpz[3].im = tmp4 + tmp7; 354 } 355 356 if (fft_size < 8) 357 return; 358 359 num_transforms = (num_transforms >> 1) | 1; 360 361 for (n=0; n<num_transforms; n++){ 362 offset = ff_fft_offsets_lut[n] << 3; 363 tmpz = z + offset; 364 365 tmp1 = tmpz[4].re + (unsigned)tmpz[5].re; 366 tmp3 = tmpz[6].re + (unsigned)tmpz[7].re; 367 tmp2 = tmpz[4].im + (unsigned)tmpz[5].im; 368 tmp4 = tmpz[6].im + (unsigned)tmpz[7].im; 369 tmp5 = tmp1 + tmp3; 370 tmp7 = tmp1 - tmp3; 371 tmp6 = tmp2 + tmp4; 372 tmp8 = tmp2 - tmp4; 373 374 tmp1 = tmpz[4].re - (unsigned)tmpz[5].re; 375 tmp2 = tmpz[4].im - (unsigned)tmpz[5].im; 376 tmp3 = tmpz[6].re - (unsigned)tmpz[7].re; 377 tmp4 = tmpz[6].im - (unsigned)tmpz[7].im; 378 379 tmpz[4].re = tmpz[0].re - tmp5; 380 tmpz[0].re = tmpz[0].re + tmp5; 381 tmpz[4].im = tmpz[0].im - tmp6; 382 tmpz[0].im = tmpz[0].im + tmp6; 383 tmpz[6].re = tmpz[2].re - tmp8; 384 tmpz[2].re = tmpz[2].re + tmp8; 385 tmpz[6].im = tmpz[2].im + tmp7; 386 tmpz[2].im = tmpz[2].im - tmp7; 387 388 accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp1 + tmp2); 389 tmp5 = (int32_t)((accu + 0x40000000) >> 31); 390 accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 - tmp4); 391 tmp7 = (int32_t)((accu + 0x40000000) >> 31); 392 accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp2 - tmp1); 393 tmp6 = (int32_t)((accu + 0x40000000) >> 31); 394 accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 + tmp4); 395 tmp8 = (int32_t)((accu + 0x40000000) >> 31); 396 tmp1 = tmp5 + tmp7; 397 tmp3 = tmp5 - tmp7; 398 tmp2 = tmp6 + tmp8; 399 tmp4 = tmp6 - tmp8; 400 401 tmpz[5].re = tmpz[1].re - tmp1; 402 tmpz[1].re = tmpz[1].re + tmp1; 403 tmpz[5].im = tmpz[1].im - tmp2; 404 tmpz[1].im = tmpz[1].im + tmp2; 405 tmpz[7].re = tmpz[3].re - tmp4; 406 tmpz[3].re = tmpz[3].re + tmp4; 407 tmpz[7].im = tmpz[3].im + tmp3; 408 tmpz[3].im = tmpz[3].im - tmp3; 409 } 410 411 step = 1 << ((MAX_LOG2_NFFT-4) - 4); 412 n4 = 4; 413 414 for (nbits=4; nbits<=s->nbits; nbits++){ 415 n2 = 2*n4; 416 n34 = 3*n4; 417 num_transforms = (num_transforms >> 1) | 1; 418 419 for (n=0; n<num_transforms; n++){ 420 const FFTSample *w_re_ptr = ff_w_tab_sr + step; 421 const FFTSample *w_im_ptr = ff_w_tab_sr + MAX_FFT_SIZE/(4*16) - step; 422 offset = ff_fft_offsets_lut[n] << nbits; 423 tmpz = z + offset; 424 425 tmp5 = tmpz[ n2].re + (unsigned)tmpz[n34].re; 426 tmp1 = tmpz[ n2].re - (unsigned)tmpz[n34].re; 427 tmp6 = tmpz[ n2].im + (unsigned)tmpz[n34].im; 428 tmp2 = tmpz[ n2].im - (unsigned)tmpz[n34].im; 429 430 tmpz[ n2].re = tmpz[ 0].re - tmp5; 431 tmpz[ 0].re = tmpz[ 0].re + tmp5; 432 tmpz[ n2].im = tmpz[ 0].im - tmp6; 433 tmpz[ 0].im = tmpz[ 0].im + tmp6; 434 tmpz[n34].re = tmpz[n4].re - tmp2; 435 tmpz[ n4].re = tmpz[n4].re + tmp2; 436 tmpz[n34].im = tmpz[n4].im + tmp1; 437 tmpz[ n4].im = tmpz[n4].im - tmp1; 438 439 for (i=1; i<n4; i++){ 440 FFTSample w_re = w_re_ptr[0]; 441 FFTSample w_im = w_im_ptr[0]; 442 accu = (int64_t)w_re*tmpz[ n2+i].re; 443 accu += (int64_t)w_im*tmpz[ n2+i].im; 444 tmp1 = (int32_t)((accu + 0x40000000) >> 31); 445 accu = (int64_t)w_re*tmpz[ n2+i].im; 446 accu -= (int64_t)w_im*tmpz[ n2+i].re; 447 tmp2 = (int32_t)((accu + 0x40000000) >> 31); 448 accu = (int64_t)w_re*tmpz[n34+i].re; 449 accu -= (int64_t)w_im*tmpz[n34+i].im; 450 tmp3 = (int32_t)((accu + 0x40000000) >> 31); 451 accu = (int64_t)w_re*tmpz[n34+i].im; 452 accu += (int64_t)w_im*tmpz[n34+i].re; 453 tmp4 = (int32_t)((accu + 0x40000000) >> 31); 454 455 tmp5 = tmp1 + tmp3; 456 tmp1 = tmp1 - tmp3; 457 tmp6 = tmp2 + tmp4; 458 tmp2 = tmp2 - tmp4; 459 460 tmpz[ n2+i].re = tmpz[ i].re - tmp5; 461 tmpz[ i].re = tmpz[ i].re + tmp5; 462 tmpz[ n2+i].im = tmpz[ i].im - tmp6; 463 tmpz[ i].im = tmpz[ i].im + tmp6; 464 tmpz[n34+i].re = tmpz[n4+i].re - tmp2; 465 tmpz[ n4+i].re = tmpz[n4+i].re + tmp2; 466 tmpz[n34+i].im = tmpz[n4+i].im + tmp1; 467 tmpz[ n4+i].im = tmpz[n4+i].im - tmp1; 468 469 w_re_ptr += step; 470 w_im_ptr -= step; 471 } 472 } 473 step >>= 1; 474 n4 <<= 1; 475 } 476} 477 478#else /* !FFT_FLOAT */ 479 480#define BUTTERFLIES(a0,a1,a2,a3) {\ 481 BF(t3, t5, t5, t1);\ 482 BF(a2.re, a0.re, a0.re, t5);\ 483 BF(a3.im, a1.im, a1.im, t3);\ 484 BF(t4, t6, t2, t6);\ 485 BF(a3.re, a1.re, a1.re, t4);\ 486 BF(a2.im, a0.im, a0.im, t6);\ 487} 488 489// force loading all the inputs before storing any. 490// this is slightly slower for small data, but avoids store->load aliasing 491// for addresses separated by large powers of 2. 492#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ 493 FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ 494 BF(t3, t5, t5, t1);\ 495 BF(a2.re, a0.re, r0, t5);\ 496 BF(a3.im, a1.im, i1, t3);\ 497 BF(t4, t6, t2, t6);\ 498 BF(a3.re, a1.re, r1, t4);\ 499 BF(a2.im, a0.im, i0, t6);\ 500} 501 502#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ 503 CMUL(t1, t2, a2.re, a2.im, wre, -wim);\ 504 CMUL(t5, t6, a3.re, a3.im, wre, wim);\ 505 BUTTERFLIES(a0,a1,a2,a3)\ 506} 507 508#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ 509 t1 = a2.re;\ 510 t2 = a2.im;\ 511 t5 = a3.re;\ 512 t6 = a3.im;\ 513 BUTTERFLIES(a0,a1,a2,a3)\ 514} 515 516/* z[0...8n-1], w[1...2n-1] */ 517#define PASS(name)\ 518static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ 519{\ 520 FFTDouble t1, t2, t3, t4, t5, t6;\ 521 int o1 = 2*n;\ 522 int o2 = 4*n;\ 523 int o3 = 6*n;\ 524 const FFTSample *wim = wre+o1;\ 525 n--;\ 526\ 527 TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ 528 TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ 529 do {\ 530 z += 2;\ 531 wre += 2;\ 532 wim -= 2;\ 533 TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ 534 TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ 535 } while(--n);\ 536} 537 538PASS(pass) 539#if !CONFIG_SMALL 540#undef BUTTERFLIES 541#define BUTTERFLIES BUTTERFLIES_BIG 542PASS(pass_big) 543#endif 544 545#define DECL_FFT(n,n2,n4)\ 546static void fft##n(FFTComplex *z)\ 547{\ 548 fft##n2(z);\ 549 fft##n4(z+n4*2);\ 550 fft##n4(z+n4*3);\ 551 pass(z,FFT_NAME(ff_cos_##n),n4/2);\ 552} 553 554static void fft4(FFTComplex *z) 555{ 556 FFTDouble t1, t2, t3, t4, t5, t6, t7, t8; 557 558 BF(t3, t1, z[0].re, z[1].re); 559 BF(t8, t6, z[3].re, z[2].re); 560 BF(z[2].re, z[0].re, t1, t6); 561 BF(t4, t2, z[0].im, z[1].im); 562 BF(t7, t5, z[2].im, z[3].im); 563 BF(z[3].im, z[1].im, t4, t8); 564 BF(z[3].re, z[1].re, t3, t7); 565 BF(z[2].im, z[0].im, t2, t5); 566} 567 568static void fft8(FFTComplex *z) 569{ 570 FFTDouble t1, t2, t3, t4, t5, t6; 571 572 fft4(z); 573 574 BF(t1, z[5].re, z[4].re, -z[5].re); 575 BF(t2, z[5].im, z[4].im, -z[5].im); 576 BF(t5, z[7].re, z[6].re, -z[7].re); 577 BF(t6, z[7].im, z[6].im, -z[7].im); 578 579 BUTTERFLIES(z[0],z[2],z[4],z[6]); 580 TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); 581} 582 583#if !CONFIG_SMALL 584static void fft16(FFTComplex *z) 585{ 586 FFTDouble t1, t2, t3, t4, t5, t6; 587 FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1]; 588 FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3]; 589 590 fft8(z); 591 fft4(z+8); 592 fft4(z+12); 593 594 TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); 595 TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); 596 TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3); 597 TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1); 598} 599#else 600DECL_FFT(16,8,4) 601#endif 602DECL_FFT(32,16,8) 603DECL_FFT(64,32,16) 604DECL_FFT(128,64,32) 605DECL_FFT(256,128,64) 606DECL_FFT(512,256,128) 607#if !CONFIG_SMALL 608#define pass pass_big 609#endif 610DECL_FFT(1024,512,256) 611DECL_FFT(2048,1024,512) 612DECL_FFT(4096,2048,1024) 613DECL_FFT(8192,4096,2048) 614DECL_FFT(16384,8192,4096) 615DECL_FFT(32768,16384,8192) 616DECL_FFT(65536,32768,16384) 617DECL_FFT(131072,65536,32768) 618 619static void (* const fft_dispatch[])(FFTComplex*) = { 620 fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, 621 fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072 622}; 623 624static void fft_calc_c(FFTContext *s, FFTComplex *z) 625{ 626 fft_dispatch[s->nbits-2](z); 627} 628#endif /* !FFT_FLOAT */ 629