1/* 2 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "config.h" 22#include "libavutil/cpu.h" 23#include "libavutil/x86/cpu.h" 24#include "libavresample/audio_mix.h" 25 26void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len, 27 int out_ch, int in_ch); 28void ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len, 29 int out_ch, int in_ch); 30 31void ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len, 32 int out_ch, int in_ch); 33void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len, 34 int out_ch, int in_ch); 35 36void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix, 37 int len, int out_ch, int in_ch); 38 39void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len, 40 int out_ch, int in_ch); 41void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len, 42 int out_ch, int in_ch); 43 44void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len, 45 int out_ch, int in_ch); 46void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len, 47 int out_ch, int in_ch); 48void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len, 49 int out_ch, int in_ch); 50 51#define DEFINE_MIX_3_8_TO_1_2(chan) \ 52void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src, \ 53 float **matrix, int len, \ 54 int out_ch, int in_ch); \ 55void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src, \ 56 float **matrix, int len, \ 57 int out_ch, int in_ch); \ 58 \ 59void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src, \ 60 float **matrix, int len, \ 61 int out_ch, int in_ch); \ 62void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src, \ 63 float **matrix, int len, \ 64 int out_ch, int in_ch); \ 65 \ 66void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src, \ 67 float **matrix, int len, \ 68 int out_ch, int in_ch); \ 69void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src, \ 70 float **matrix, int len, \ 71 int out_ch, int in_ch); \ 72 \ 73void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src, \ 74 float **matrix, int len, \ 75 int out_ch, int in_ch); \ 76void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src, \ 77 float **matrix, int len, \ 78 int out_ch, int in_ch); \ 79 \ 80void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src, \ 81 float **matrix, int len, \ 82 int out_ch, int in_ch); \ 83void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src, \ 84 float **matrix, int len, \ 85 int out_ch, int in_ch); \ 86 \ 87void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src, \ 88 float **matrix, int len, \ 89 int out_ch, int in_ch); \ 90void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src, \ 91 float **matrix, int len, \ 92 int out_ch, int in_ch); \ 93 \ 94void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src, \ 95 float **matrix, int len, \ 96 int out_ch, int in_ch); \ 97void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src, \ 98 float **matrix, int len, \ 99 int out_ch, int in_ch); 100 101DEFINE_MIX_3_8_TO_1_2(3) 102DEFINE_MIX_3_8_TO_1_2(4) 103DEFINE_MIX_3_8_TO_1_2(5) 104DEFINE_MIX_3_8_TO_1_2(6) 105DEFINE_MIX_3_8_TO_1_2(7) 106DEFINE_MIX_3_8_TO_1_2(8) 107 108#define SET_MIX_3_8_TO_1_2(chan) \ 109 if (EXTERNAL_SSE(cpu_flags)) { \ 110 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ 111 chan, 1, 16, 4, "SSE", \ 112 ff_mix_ ## chan ## _to_1_fltp_flt_sse); \ 113 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ 114 chan, 2, 16, 4, "SSE", \ 115 ff_mix_## chan ##_to_2_fltp_flt_sse); \ 116 } \ 117 if (EXTERNAL_SSE2(cpu_flags)) { \ 118 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ 119 chan, 1, 16, 8, "SSE2", \ 120 ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \ 121 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ 122 chan, 2, 16, 8, "SSE2", \ 123 ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \ 124 } \ 125 if (EXTERNAL_SSE4(cpu_flags)) { \ 126 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ 127 chan, 1, 16, 8, "SSE4", \ 128 ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \ 129 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ 130 chan, 2, 16, 8, "SSE4", \ 131 ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \ 132 } \ 133 if (EXTERNAL_AVX(cpu_flags)) { \ 134 int ptr_align = 32; \ 135 int smp_align = 8; \ 136 if (ARCH_X86_32 || chan >= 6) { \ 137 ptr_align = 16; \ 138 smp_align = 4; \ 139 } \ 140 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ 141 chan, 1, ptr_align, smp_align, "AVX", \ 142 ff_mix_ ## chan ## _to_1_fltp_flt_avx); \ 143 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ 144 chan, 2, ptr_align, smp_align, "AVX", \ 145 ff_mix_ ## chan ## _to_2_fltp_flt_avx); \ 146 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ 147 chan, 1, 16, 8, "AVX", \ 148 ff_mix_ ## chan ## _to_1_s16p_flt_avx); \ 149 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ 150 chan, 2, 16, 8, "AVX", \ 151 ff_mix_ ## chan ## _to_2_s16p_flt_avx); \ 152 } \ 153 if (EXTERNAL_FMA4(cpu_flags)) { \ 154 int ptr_align = 32; \ 155 int smp_align = 8; \ 156 if (ARCH_X86_32 || chan >= 6) { \ 157 ptr_align = 16; \ 158 smp_align = 4; \ 159 } \ 160 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ 161 chan, 1, ptr_align, smp_align, "FMA4", \ 162 ff_mix_ ## chan ## _to_1_fltp_flt_fma4); \ 163 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ 164 chan, 2, ptr_align, smp_align, "FMA4", \ 165 ff_mix_ ## chan ## _to_2_fltp_flt_fma4); \ 166 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ 167 chan, 1, 16, 8, "FMA4", \ 168 ff_mix_ ## chan ## _to_1_s16p_flt_fma4); \ 169 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ 170 chan, 2, 16, 8, "FMA4", \ 171 ff_mix_ ## chan ## _to_2_s16p_flt_fma4); \ 172 } 173 174av_cold void ff_audio_mix_init_x86(AudioMix *am) 175{ 176 int cpu_flags = av_get_cpu_flags(); 177 178 if (EXTERNAL_SSE(cpu_flags)) { 179 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, 180 2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse); 181 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, 182 1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse); 183 } 184 if (EXTERNAL_SSE2(cpu_flags)) { 185 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, 186 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2); 187 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8, 188 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2); 189 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, 190 1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2); 191 } 192 if (EXTERNAL_SSE4(cpu_flags)) { 193 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, 194 2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4); 195 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, 196 1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4); 197 } 198 if (EXTERNAL_AVX_FAST(cpu_flags)) { 199 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, 200 2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx); 201 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, 202 1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx); 203 } 204 if (EXTERNAL_AVX(cpu_flags)) { 205 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, 206 1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx); 207 } 208 209 SET_MIX_3_8_TO_1_2(3) 210 SET_MIX_3_8_TO_1_2(4) 211 SET_MIX_3_8_TO_1_2(5) 212 SET_MIX_3_8_TO_1_2(6) 213 SET_MIX_3_8_TO_1_2(7) 214 SET_MIX_3_8_TO_1_2(8) 215} 216