1/*
2 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "config.h"
22#include "libavutil/cpu.h"
23#include "libavutil/x86/cpu.h"
24#include "libavresample/audio_mix.h"
25
26void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
27                                int out_ch, int in_ch);
28void ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len,
29                                int out_ch, int in_ch);
30
31void ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len,
32                                 int out_ch, int in_ch);
33void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
34                                 int out_ch, int in_ch);
35
36void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
37                                int len, int out_ch, int in_ch);
38
39void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
40                                int out_ch, int in_ch);
41void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
42                                int out_ch, int in_ch);
43
44void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
45                                 int out_ch, int in_ch);
46void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
47                                 int out_ch, int in_ch);
48void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
49                                 int out_ch, int in_ch);
50
51#define DEFINE_MIX_3_8_TO_1_2(chan)                                     \
52void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src,                 \
53                                           float **matrix, int len,     \
54                                           int out_ch, int in_ch);      \
55void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src,                 \
56                                           float **matrix, int len,     \
57                                           int out_ch, int in_ch);      \
58                                                                        \
59void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src,              \
60                                            float **matrix, int len,    \
61                                            int out_ch, int in_ch);     \
62void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src,              \
63                                            float **matrix, int len,    \
64                                            int out_ch, int in_ch);     \
65                                                                        \
66void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src,              \
67                                            float **matrix, int len,    \
68                                            int out_ch, int in_ch);     \
69void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src,              \
70                                            float **matrix, int len,    \
71                                            int out_ch, int in_ch);     \
72                                                                        \
73void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src,                 \
74                                           float **matrix, int len,     \
75                                           int out_ch, int in_ch);      \
76void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src,                 \
77                                           float **matrix, int len,     \
78                                           int out_ch, int in_ch);      \
79                                                                        \
80void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src,               \
81                                           float **matrix, int len,     \
82                                           int out_ch, int in_ch);      \
83void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src,               \
84                                           float **matrix, int len,     \
85                                           int out_ch, int in_ch);      \
86                                                                        \
87void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src,                \
88                                            float **matrix, int len,    \
89                                            int out_ch, int in_ch);     \
90void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src,                \
91                                            float **matrix, int len,    \
92                                            int out_ch, int in_ch);     \
93                                                                        \
94void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src,              \
95                                            float **matrix, int len,    \
96                                            int out_ch, int in_ch);     \
97void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src,              \
98                                            float **matrix, int len,    \
99                                            int out_ch, int in_ch);
100
101DEFINE_MIX_3_8_TO_1_2(3)
102DEFINE_MIX_3_8_TO_1_2(4)
103DEFINE_MIX_3_8_TO_1_2(5)
104DEFINE_MIX_3_8_TO_1_2(6)
105DEFINE_MIX_3_8_TO_1_2(7)
106DEFINE_MIX_3_8_TO_1_2(8)
107
108#define SET_MIX_3_8_TO_1_2(chan)                                            \
109    if (EXTERNAL_SSE(cpu_flags)) {                                          \
110        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
111                              chan, 1, 16, 4, "SSE",                        \
112                              ff_mix_ ## chan ## _to_1_fltp_flt_sse);       \
113        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
114                              chan, 2, 16, 4, "SSE",                        \
115                              ff_mix_## chan ##_to_2_fltp_flt_sse);         \
116    }                                                                       \
117    if (EXTERNAL_SSE2(cpu_flags)) {                                         \
118        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
119                              chan, 1, 16, 8, "SSE2",                       \
120                              ff_mix_ ## chan ## _to_1_s16p_flt_sse2);      \
121        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
122                              chan, 2, 16, 8, "SSE2",                       \
123                              ff_mix_ ## chan ## _to_2_s16p_flt_sse2);      \
124    }                                                                       \
125    if (EXTERNAL_SSE4(cpu_flags)) {                                         \
126        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
127                              chan, 1, 16, 8, "SSE4",                       \
128                              ff_mix_ ## chan ## _to_1_s16p_flt_sse4);      \
129        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
130                              chan, 2, 16, 8, "SSE4",                       \
131                              ff_mix_ ## chan ## _to_2_s16p_flt_sse4);      \
132    }                                                                       \
133    if (EXTERNAL_AVX(cpu_flags)) {                                          \
134        int ptr_align = 32;                                                 \
135        int smp_align = 8;                                                  \
136        if (ARCH_X86_32 || chan >= 6) {                                     \
137            ptr_align = 16;                                                 \
138            smp_align = 4;                                                  \
139        }                                                                   \
140        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
141                              chan, 1, ptr_align, smp_align, "AVX",         \
142                              ff_mix_ ## chan ## _to_1_fltp_flt_avx);       \
143        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
144                              chan, 2, ptr_align, smp_align, "AVX",         \
145                              ff_mix_ ## chan ## _to_2_fltp_flt_avx);       \
146        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
147                              chan, 1, 16, 8, "AVX",                        \
148                              ff_mix_ ## chan ## _to_1_s16p_flt_avx);       \
149        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
150                              chan, 2, 16, 8, "AVX",                        \
151                              ff_mix_ ## chan ## _to_2_s16p_flt_avx);       \
152    }                                                                       \
153    if (EXTERNAL_FMA4(cpu_flags)) {                                         \
154        int ptr_align = 32;                                                 \
155        int smp_align = 8;                                                  \
156        if (ARCH_X86_32 || chan >= 6) {                                     \
157            ptr_align = 16;                                                 \
158            smp_align = 4;                                                  \
159        }                                                                   \
160        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
161                              chan, 1, ptr_align, smp_align, "FMA4",        \
162                              ff_mix_ ## chan ## _to_1_fltp_flt_fma4);      \
163        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
164                              chan, 2, ptr_align, smp_align, "FMA4",        \
165                              ff_mix_ ## chan ## _to_2_fltp_flt_fma4);      \
166        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
167                              chan, 1, 16, 8, "FMA4",                       \
168                              ff_mix_ ## chan ## _to_1_s16p_flt_fma4);      \
169        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
170                              chan, 2, 16, 8, "FMA4",                       \
171                              ff_mix_ ## chan ## _to_2_s16p_flt_fma4);      \
172    }
173
174av_cold void ff_audio_mix_init_x86(AudioMix *am)
175{
176    int cpu_flags = av_get_cpu_flags();
177
178    if (EXTERNAL_SSE(cpu_flags)) {
179        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
180                              2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
181        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
182                              1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
183    }
184    if (EXTERNAL_SSE2(cpu_flags)) {
185        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
186                              2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
187        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
188                              2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
189        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
190                              1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
191    }
192    if (EXTERNAL_SSE4(cpu_flags)) {
193        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
194                              2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
195        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
196                              1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
197    }
198    if (EXTERNAL_AVX_FAST(cpu_flags)) {
199        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
200                              2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
201        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
202                              1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
203    }
204    if (EXTERNAL_AVX(cpu_flags)) {
205        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
206                              1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
207    }
208
209    SET_MIX_3_8_TO_1_2(3)
210    SET_MIX_3_8_TO_1_2(4)
211    SET_MIX_3_8_TO_1_2(5)
212    SET_MIX_3_8_TO_1_2(6)
213    SET_MIX_3_8_TO_1_2(7)
214    SET_MIX_3_8_TO_1_2(8)
215}
216