1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#define TX_FLOAT 20#include "libavutil/tx_priv.h" 21#include "libavutil/attributes.h" 22#include "libavutil/x86/cpu.h" 23 24#include "config.h" 25 26TX_DECL_FN(fft2, sse3) 27TX_DECL_FN(fft4_fwd, sse2) 28TX_DECL_FN(fft4_inv, sse2) 29TX_DECL_FN(fft8, sse3) 30TX_DECL_FN(fft8_ns, sse3) 31TX_DECL_FN(fft8, avx) 32TX_DECL_FN(fft8_ns, avx) 33TX_DECL_FN(fft16, avx) 34TX_DECL_FN(fft16_ns, avx) 35TX_DECL_FN(fft16, fma3) 36TX_DECL_FN(fft16_ns, fma3) 37TX_DECL_FN(fft32, avx) 38TX_DECL_FN(fft32_ns, avx) 39TX_DECL_FN(fft32, fma3) 40TX_DECL_FN(fft32_ns, fma3) 41TX_DECL_FN(fft_sr, fma3) 42TX_DECL_FN(fft_sr_ns, fma3) 43TX_DECL_FN(fft_sr, avx2) 44TX_DECL_FN(fft_sr_ns, avx2) 45 46#define DECL_INIT_FN(basis, interleave) \ 47static av_cold int b ##basis## _i ##interleave(AVTXContext *s, \ 48 const FFTXCodelet *cd, \ 49 uint64_t flags, \ 50 FFTXCodeletOptions *opts, \ 51 int len, int inv, \ 52 const void *scale) \ 53{ \ 54 const int inv_lookup = opts ? opts->invert_lookup : 1; \ 55 ff_tx_init_tabs_float(len); \ 56 if (cd->max_len == 2) \ 57 return ff_tx_gen_ptwo_revtab(s, inv_lookup); \ 58 else \ 59 return ff_tx_gen_split_radix_parity_revtab(s, inv_lookup, \ 60 basis, interleave); \ 61} 62 63DECL_INIT_FN(8, 0) 64DECL_INIT_FN(8, 2) 65 66const FFTXCodelet * const ff_tx_codelet_list_float_x86[] = { 67 TX_DEF(fft2, FFT, 2, 2, 2, 0, 128, NULL, sse3, SSE3, AV_TX_INPLACE, 0), 68 TX_DEF(fft2, FFT, 2, 2, 2, 0, 192, b8_i0, sse3, SSE3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0), 69 TX_DEF(fft4_fwd, FFT, 4, 4, 2, 0, 128, NULL, sse2, SSE2, AV_TX_INPLACE | FF_TX_FORWARD_ONLY, 0), 70 TX_DEF(fft4_fwd, FFT, 4, 4, 2, 0, 192, b8_i0, sse2, SSE2, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0), 71 TX_DEF(fft4_inv, FFT, 4, 4, 2, 0, 128, NULL, sse2, SSE2, AV_TX_INPLACE | FF_TX_INVERSE_ONLY, 0), 72 TX_DEF(fft8, FFT, 8, 8, 2, 0, 128, b8_i0, sse3, SSE3, AV_TX_INPLACE, 0), 73 TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 192, b8_i0, sse3, SSE3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 0), 74 TX_DEF(fft8, FFT, 8, 8, 2, 0, 256, b8_i0, avx, AVX, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW), 75 TX_DEF(fft8_ns, FFT, 8, 8, 2, 0, 320, b8_i0, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 76 AV_CPU_FLAG_AVXSLOW), 77 TX_DEF(fft16, FFT, 16, 16, 2, 0, 256, b8_i2, avx, AVX, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW), 78 TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 79 AV_CPU_FLAG_AVXSLOW), 80 TX_DEF(fft16, FFT, 16, 16, 2, 0, 288, b8_i2, fma3, FMA3, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW), 81 TX_DEF(fft16_ns, FFT, 16, 16, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 82 AV_CPU_FLAG_AVXSLOW), 83 84#if ARCH_X86_64 85 TX_DEF(fft32, FFT, 32, 32, 2, 0, 256, b8_i2, avx, AVX, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW), 86 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 320, b8_i2, avx, AVX, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 87 AV_CPU_FLAG_AVXSLOW), 88 TX_DEF(fft32, FFT, 32, 32, 2, 0, 288, b8_i2, fma3, FMA3, AV_TX_INPLACE, AV_CPU_FLAG_AVXSLOW), 89 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 90 AV_CPU_FLAG_AVXSLOW), 91 TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 288, b8_i2, fma3, FMA3, 0, AV_CPU_FLAG_AVXSLOW), 92 TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 93 AV_CPU_FLAG_AVXSLOW), 94#if HAVE_AVX2_EXTERNAL 95 TX_DEF(fft_sr, FFT, 64, 131072, 2, 0, 320, b8_i2, avx2, AVX2, 0, 96 AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER), 97 TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2, AV_TX_INPLACE | FF_TX_PRESHUFFLE, 98 AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER), 99#endif 100#endif 101 102 NULL, 103}; 104