1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "config.h" 20 21#include "libavutil/attributes.h" 22#include "libavutil/cpu.h" 23#include "libavutil/float_dsp.h" 24#include "cpu.h" 25#include "asm.h" 26 27void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, 28 int len); 29void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1, 30 int len); 31 32void ff_vector_dmul_sse2(double *dst, const double *src0, const double *src1, 33 int len); 34void ff_vector_dmul_avx(double *dst, const double *src0, const double *src1, 35 int len); 36 37void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul, 38 int len); 39void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, 40 int len); 41void ff_vector_fmac_scalar_fma3(float *dst, const float *src, float mul, 42 int len); 43 44void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul, 45 int len); 46 47void ff_vector_dmac_scalar_sse2(double *dst, const double *src, double mul, 48 int len); 49void ff_vector_dmac_scalar_avx(double *dst, const double *src, double mul, 50 int len); 51void ff_vector_dmac_scalar_fma3(double *dst, const double *src, double mul, 52 int len); 53 54void ff_vector_dmul_scalar_sse2(double *dst, const double *src, 55 double mul, int len); 56void ff_vector_dmul_scalar_avx(double *dst, const double *src, 57 double mul, int len); 58 59void ff_vector_fmul_window_sse(float *dst, const float *src0, 60 const float *src1, const float *win, int len); 61 62void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1, 63 const float *src2, int len); 64void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1, 65 const float *src2, int len); 66void ff_vector_fmul_add_fma3(float *dst, const float *src0, const float *src1, 67 const float *src2, int len); 68 69void ff_vector_fmul_reverse_sse(float *dst, const float *src0, 70 const float *src1, int len); 71void ff_vector_fmul_reverse_avx(float *dst, const float *src0, 72 const float *src1, int len); 73void ff_vector_fmul_reverse_avx2(float *dst, const float *src0, 74 const float *src1, int len); 75 76float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); 77 78void ff_butterflies_float_sse(float *av_restrict src0, float *av_restrict src1, int len); 79 80av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) 81{ 82 int cpu_flags = av_get_cpu_flags(); 83 84 if (EXTERNAL_SSE(cpu_flags)) { 85 fdsp->vector_fmul = ff_vector_fmul_sse; 86 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; 87 fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; 88 fdsp->vector_fmul_window = ff_vector_fmul_window_sse; 89 fdsp->vector_fmul_add = ff_vector_fmul_add_sse; 90 fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; 91 fdsp->scalarproduct_float = ff_scalarproduct_float_sse; 92 fdsp->butterflies_float = ff_butterflies_float_sse; 93 } 94 if (EXTERNAL_SSE2(cpu_flags)) { 95 fdsp->vector_dmul = ff_vector_dmul_sse2; 96 fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2; 97 fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; 98 } 99 if (EXTERNAL_AVX_FAST(cpu_flags)) { 100 fdsp->vector_fmul = ff_vector_fmul_avx; 101 fdsp->vector_dmul = ff_vector_dmul_avx; 102 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; 103 fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx; 104 fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx; 105 fdsp->vector_fmul_add = ff_vector_fmul_add_avx; 106 fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; 107 } 108 if (EXTERNAL_AVX2_FAST(cpu_flags)) { 109 fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2; 110 } 111 if (EXTERNAL_FMA3_FAST(cpu_flags)) { 112 fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; 113 fdsp->vector_fmul_add = ff_vector_fmul_add_fma3; 114 fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_fma3; 115 } 116} 117