1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "config.h"
22cabdff1aSopenharmony_ci#include "libavutil/cpu.h"
23cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h"
24cabdff1aSopenharmony_ci#include "libavresample/audio_mix.h"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_civoid ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
27cabdff1aSopenharmony_ci                                int out_ch, int in_ch);
28cabdff1aSopenharmony_civoid ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len,
29cabdff1aSopenharmony_ci                                int out_ch, int in_ch);
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_civoid ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len,
32cabdff1aSopenharmony_ci                                 int out_ch, int in_ch);
33cabdff1aSopenharmony_civoid ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
34cabdff1aSopenharmony_ci                                 int out_ch, int in_ch);
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_civoid ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
37cabdff1aSopenharmony_ci                                int len, int out_ch, int in_ch);
38cabdff1aSopenharmony_ci
39cabdff1aSopenharmony_civoid ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
40cabdff1aSopenharmony_ci                                int out_ch, int in_ch);
41cabdff1aSopenharmony_civoid ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
42cabdff1aSopenharmony_ci                                int out_ch, int in_ch);
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_civoid ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
45cabdff1aSopenharmony_ci                                 int out_ch, int in_ch);
46cabdff1aSopenharmony_civoid ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
47cabdff1aSopenharmony_ci                                 int out_ch, int in_ch);
48cabdff1aSopenharmony_civoid ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
49cabdff1aSopenharmony_ci                                 int out_ch, int in_ch);
50cabdff1aSopenharmony_ci
51cabdff1aSopenharmony_ci#define DEFINE_MIX_3_8_TO_1_2(chan)                                     \
52cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src,                 \
53cabdff1aSopenharmony_ci                                           float **matrix, int len,     \
54cabdff1aSopenharmony_ci                                           int out_ch, int in_ch);      \
55cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src,                 \
56cabdff1aSopenharmony_ci                                           float **matrix, int len,     \
57cabdff1aSopenharmony_ci                                           int out_ch, int in_ch);      \
58cabdff1aSopenharmony_ci                                                                        \
59cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src,              \
60cabdff1aSopenharmony_ci                                            float **matrix, int len,    \
61cabdff1aSopenharmony_ci                                            int out_ch, int in_ch);     \
62cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src,              \
63cabdff1aSopenharmony_ci                                            float **matrix, int len,    \
64cabdff1aSopenharmony_ci                                            int out_ch, int in_ch);     \
65cabdff1aSopenharmony_ci                                                                        \
66cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src,              \
67cabdff1aSopenharmony_ci                                            float **matrix, int len,    \
68cabdff1aSopenharmony_ci                                            int out_ch, int in_ch);     \
69cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src,              \
70cabdff1aSopenharmony_ci                                            float **matrix, int len,    \
71cabdff1aSopenharmony_ci                                            int out_ch, int in_ch);     \
72cabdff1aSopenharmony_ci                                                                        \
73cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src,                 \
74cabdff1aSopenharmony_ci                                           float **matrix, int len,     \
75cabdff1aSopenharmony_ci                                           int out_ch, int in_ch);      \
76cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src,                 \
77cabdff1aSopenharmony_ci                                           float **matrix, int len,     \
78cabdff1aSopenharmony_ci                                           int out_ch, int in_ch);      \
79cabdff1aSopenharmony_ci                                                                        \
80cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src,               \
81cabdff1aSopenharmony_ci                                           float **matrix, int len,     \
82cabdff1aSopenharmony_ci                                           int out_ch, int in_ch);      \
83cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src,               \
84cabdff1aSopenharmony_ci                                           float **matrix, int len,     \
85cabdff1aSopenharmony_ci                                           int out_ch, int in_ch);      \
86cabdff1aSopenharmony_ci                                                                        \
87cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src,                \
88cabdff1aSopenharmony_ci                                            float **matrix, int len,    \
89cabdff1aSopenharmony_ci                                            int out_ch, int in_ch);     \
90cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src,                \
91cabdff1aSopenharmony_ci                                            float **matrix, int len,    \
92cabdff1aSopenharmony_ci                                            int out_ch, int in_ch);     \
93cabdff1aSopenharmony_ci                                                                        \
94cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src,              \
95cabdff1aSopenharmony_ci                                            float **matrix, int len,    \
96cabdff1aSopenharmony_ci                                            int out_ch, int in_ch);     \
97cabdff1aSopenharmony_civoid ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src,              \
98cabdff1aSopenharmony_ci                                            float **matrix, int len,    \
99cabdff1aSopenharmony_ci                                            int out_ch, int in_ch);
100cabdff1aSopenharmony_ci
101cabdff1aSopenharmony_ciDEFINE_MIX_3_8_TO_1_2(3)
102cabdff1aSopenharmony_ciDEFINE_MIX_3_8_TO_1_2(4)
103cabdff1aSopenharmony_ciDEFINE_MIX_3_8_TO_1_2(5)
104cabdff1aSopenharmony_ciDEFINE_MIX_3_8_TO_1_2(6)
105cabdff1aSopenharmony_ciDEFINE_MIX_3_8_TO_1_2(7)
106cabdff1aSopenharmony_ciDEFINE_MIX_3_8_TO_1_2(8)
107cabdff1aSopenharmony_ci
108cabdff1aSopenharmony_ci#define SET_MIX_3_8_TO_1_2(chan)                                            \
109cabdff1aSopenharmony_ci    if (EXTERNAL_SSE(cpu_flags)) {                                          \
110cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
111cabdff1aSopenharmony_ci                              chan, 1, 16, 4, "SSE",                        \
112cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_1_fltp_flt_sse);       \
113cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
114cabdff1aSopenharmony_ci                              chan, 2, 16, 4, "SSE",                        \
115cabdff1aSopenharmony_ci                              ff_mix_## chan ##_to_2_fltp_flt_sse);         \
116cabdff1aSopenharmony_ci    }                                                                       \
117cabdff1aSopenharmony_ci    if (EXTERNAL_SSE2(cpu_flags)) {                                         \
118cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
119cabdff1aSopenharmony_ci                              chan, 1, 16, 8, "SSE2",                       \
120cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_1_s16p_flt_sse2);      \
121cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
122cabdff1aSopenharmony_ci                              chan, 2, 16, 8, "SSE2",                       \
123cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_2_s16p_flt_sse2);      \
124cabdff1aSopenharmony_ci    }                                                                       \
125cabdff1aSopenharmony_ci    if (EXTERNAL_SSE4(cpu_flags)) {                                         \
126cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
127cabdff1aSopenharmony_ci                              chan, 1, 16, 8, "SSE4",                       \
128cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_1_s16p_flt_sse4);      \
129cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
130cabdff1aSopenharmony_ci                              chan, 2, 16, 8, "SSE4",                       \
131cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_2_s16p_flt_sse4);      \
132cabdff1aSopenharmony_ci    }                                                                       \
133cabdff1aSopenharmony_ci    if (EXTERNAL_AVX(cpu_flags)) {                                          \
134cabdff1aSopenharmony_ci        int ptr_align = 32;                                                 \
135cabdff1aSopenharmony_ci        int smp_align = 8;                                                  \
136cabdff1aSopenharmony_ci        if (ARCH_X86_32 || chan >= 6) {                                     \
137cabdff1aSopenharmony_ci            ptr_align = 16;                                                 \
138cabdff1aSopenharmony_ci            smp_align = 4;                                                  \
139cabdff1aSopenharmony_ci        }                                                                   \
140cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
141cabdff1aSopenharmony_ci                              chan, 1, ptr_align, smp_align, "AVX",         \
142cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_1_fltp_flt_avx);       \
143cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
144cabdff1aSopenharmony_ci                              chan, 2, ptr_align, smp_align, "AVX",         \
145cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_2_fltp_flt_avx);       \
146cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
147cabdff1aSopenharmony_ci                              chan, 1, 16, 8, "AVX",                        \
148cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_1_s16p_flt_avx);       \
149cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
150cabdff1aSopenharmony_ci                              chan, 2, 16, 8, "AVX",                        \
151cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_2_s16p_flt_avx);       \
152cabdff1aSopenharmony_ci    }                                                                       \
153cabdff1aSopenharmony_ci    if (EXTERNAL_FMA4(cpu_flags)) {                                         \
154cabdff1aSopenharmony_ci        int ptr_align = 32;                                                 \
155cabdff1aSopenharmony_ci        int smp_align = 8;                                                  \
156cabdff1aSopenharmony_ci        if (ARCH_X86_32 || chan >= 6) {                                     \
157cabdff1aSopenharmony_ci            ptr_align = 16;                                                 \
158cabdff1aSopenharmony_ci            smp_align = 4;                                                  \
159cabdff1aSopenharmony_ci        }                                                                   \
160cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
161cabdff1aSopenharmony_ci                              chan, 1, ptr_align, smp_align, "FMA4",        \
162cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_1_fltp_flt_fma4);      \
163cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
164cabdff1aSopenharmony_ci                              chan, 2, ptr_align, smp_align, "FMA4",        \
165cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_2_fltp_flt_fma4);      \
166cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
167cabdff1aSopenharmony_ci                              chan, 1, 16, 8, "FMA4",                       \
168cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_1_s16p_flt_fma4);      \
169cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
170cabdff1aSopenharmony_ci                              chan, 2, 16, 8, "FMA4",                       \
171cabdff1aSopenharmony_ci                              ff_mix_ ## chan ## _to_2_s16p_flt_fma4);      \
172cabdff1aSopenharmony_ci    }
173cabdff1aSopenharmony_ci
174cabdff1aSopenharmony_ciav_cold void ff_audio_mix_init_x86(AudioMix *am)
175cabdff1aSopenharmony_ci{
176cabdff1aSopenharmony_ci    int cpu_flags = av_get_cpu_flags();
177cabdff1aSopenharmony_ci
178cabdff1aSopenharmony_ci    if (EXTERNAL_SSE(cpu_flags)) {
179cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
180cabdff1aSopenharmony_ci                              2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
181cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
182cabdff1aSopenharmony_ci                              1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
183cabdff1aSopenharmony_ci    }
184cabdff1aSopenharmony_ci    if (EXTERNAL_SSE2(cpu_flags)) {
185cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
186cabdff1aSopenharmony_ci                              2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
187cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
188cabdff1aSopenharmony_ci                              2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
189cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
190cabdff1aSopenharmony_ci                              1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
191cabdff1aSopenharmony_ci    }
192cabdff1aSopenharmony_ci    if (EXTERNAL_SSE4(cpu_flags)) {
193cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
194cabdff1aSopenharmony_ci                              2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
195cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
196cabdff1aSopenharmony_ci                              1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
197cabdff1aSopenharmony_ci    }
198cabdff1aSopenharmony_ci    if (EXTERNAL_AVX_FAST(cpu_flags)) {
199cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
200cabdff1aSopenharmony_ci                              2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
201cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
202cabdff1aSopenharmony_ci                              1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
203cabdff1aSopenharmony_ci    }
204cabdff1aSopenharmony_ci    if (EXTERNAL_AVX(cpu_flags)) {
205cabdff1aSopenharmony_ci        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
206cabdff1aSopenharmony_ci                              1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
207cabdff1aSopenharmony_ci    }
208cabdff1aSopenharmony_ci
209cabdff1aSopenharmony_ci    SET_MIX_3_8_TO_1_2(3)
210cabdff1aSopenharmony_ci    SET_MIX_3_8_TO_1_2(4)
211cabdff1aSopenharmony_ci    SET_MIX_3_8_TO_1_2(5)
212cabdff1aSopenharmony_ci    SET_MIX_3_8_TO_1_2(6)
213cabdff1aSopenharmony_ci    SET_MIX_3_8_TO_1_2(7)
214cabdff1aSopenharmony_ci    SET_MIX_3_8_TO_1_2(8)
215cabdff1aSopenharmony_ci}
216