153a5a1b3Sopenharmony_ci/***
253a5a1b3Sopenharmony_ci  This file is part of PulseAudio.
353a5a1b3Sopenharmony_ci
453a5a1b3Sopenharmony_ci  Copyright 2004-2006 Lennart Poettering
553a5a1b3Sopenharmony_ci  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com>
653a5a1b3Sopenharmony_ci
753a5a1b3Sopenharmony_ci  PulseAudio is free software; you can redistribute it and/or modify
853a5a1b3Sopenharmony_ci  it under the terms of the GNU Lesser General Public License as published
953a5a1b3Sopenharmony_ci  by the Free Software Foundation; either version 2.1 of the License,
1053a5a1b3Sopenharmony_ci  or (at your option) any later version.
1153a5a1b3Sopenharmony_ci
1253a5a1b3Sopenharmony_ci  PulseAudio is distributed in the hope that it will be useful, but
1353a5a1b3Sopenharmony_ci  WITHOUT ANY WARRANTY; without even the implied warranty of
1453a5a1b3Sopenharmony_ci  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1553a5a1b3Sopenharmony_ci  General Public License for more details.
1653a5a1b3Sopenharmony_ci
1753a5a1b3Sopenharmony_ci  You should have received a copy of the GNU Lesser General Public License
1853a5a1b3Sopenharmony_ci  along with PulseAudio; if not, see <http://www.gnu.org/licenses/>.
1953a5a1b3Sopenharmony_ci***/
2053a5a1b3Sopenharmony_ci
2153a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H
2253a5a1b3Sopenharmony_ci#include <config.h>
2353a5a1b3Sopenharmony_ci#endif
2453a5a1b3Sopenharmony_ci
2553a5a1b3Sopenharmony_ci#include <pulse/sample.h>
2653a5a1b3Sopenharmony_ci#include <pulse/volume.h>
2753a5a1b3Sopenharmony_ci#include <pulsecore/log.h>
2853a5a1b3Sopenharmony_ci#include <pulsecore/macro.h>
2953a5a1b3Sopenharmony_ci
3053a5a1b3Sopenharmony_ci#include "cpu-x86.h"
3153a5a1b3Sopenharmony_ci#include "remap.h"
3253a5a1b3Sopenharmony_ci
3353a5a1b3Sopenharmony_ci#define LOAD_SAMPLES                                   \
3453a5a1b3Sopenharmony_ci                " movq (%1), %%mm0              \n\t"  \
3553a5a1b3Sopenharmony_ci                " movq 8(%1), %%mm2             \n\t"  \
3653a5a1b3Sopenharmony_ci                " movq 16(%1), %%mm4            \n\t"  \
3753a5a1b3Sopenharmony_ci                " movq 24(%1), %%mm6            \n\t"  \
3853a5a1b3Sopenharmony_ci                " movq %%mm0, %%mm1             \n\t"  \
3953a5a1b3Sopenharmony_ci                " movq %%mm2, %%mm3             \n\t"  \
4053a5a1b3Sopenharmony_ci                " movq %%mm4, %%mm5             \n\t"  \
4153a5a1b3Sopenharmony_ci                " movq %%mm6, %%mm7             \n\t"
4253a5a1b3Sopenharmony_ci
4353a5a1b3Sopenharmony_ci#define UNPACK_SAMPLES(s)                              \
4453a5a1b3Sopenharmony_ci                " punpckl"#s" %%mm0, %%mm0      \n\t"  \
4553a5a1b3Sopenharmony_ci                " punpckh"#s" %%mm1, %%mm1      \n\t"  \
4653a5a1b3Sopenharmony_ci                " punpckl"#s" %%mm2, %%mm2      \n\t"  \
4753a5a1b3Sopenharmony_ci                " punpckh"#s" %%mm3, %%mm3      \n\t"  \
4853a5a1b3Sopenharmony_ci                " punpckl"#s" %%mm4, %%mm4      \n\t"  \
4953a5a1b3Sopenharmony_ci                " punpckh"#s" %%mm5, %%mm5      \n\t"  \
5053a5a1b3Sopenharmony_ci                " punpckl"#s" %%mm6, %%mm6      \n\t"  \
5153a5a1b3Sopenharmony_ci                " punpckh"#s" %%mm7, %%mm7      \n\t"
5253a5a1b3Sopenharmony_ci
5353a5a1b3Sopenharmony_ci#define STORE_SAMPLES                                  \
5453a5a1b3Sopenharmony_ci                " movq %%mm0, (%0)              \n\t"  \
5553a5a1b3Sopenharmony_ci                " movq %%mm1, 8(%0)             \n\t"  \
5653a5a1b3Sopenharmony_ci                " movq %%mm2, 16(%0)            \n\t"  \
5753a5a1b3Sopenharmony_ci                " movq %%mm3, 24(%0)            \n\t"  \
5853a5a1b3Sopenharmony_ci                " movq %%mm4, 32(%0)            \n\t"  \
5953a5a1b3Sopenharmony_ci                " movq %%mm5, 40(%0)            \n\t"  \
6053a5a1b3Sopenharmony_ci                " movq %%mm6, 48(%0)            \n\t"  \
6153a5a1b3Sopenharmony_ci                " movq %%mm7, 56(%0)            \n\t"  \
6253a5a1b3Sopenharmony_ci                " add $32, %1                   \n\t"  \
6353a5a1b3Sopenharmony_ci                " add $64, %0                   \n\t"
6453a5a1b3Sopenharmony_ci
6553a5a1b3Sopenharmony_ci#define HANDLE_SINGLE_dq()                            \
6653a5a1b3Sopenharmony_ci                " movd (%1), %%mm0              \n\t"  \
6753a5a1b3Sopenharmony_ci                " punpckldq %%mm0, %%mm0        \n\t"  \
6853a5a1b3Sopenharmony_ci                " movq %%mm0, (%0)              \n\t"  \
6953a5a1b3Sopenharmony_ci                " add $4, %1                    \n\t"  \
7053a5a1b3Sopenharmony_ci                " add $8, %0                    \n\t"
7153a5a1b3Sopenharmony_ci
7253a5a1b3Sopenharmony_ci#define HANDLE_SINGLE_wd()                             \
7353a5a1b3Sopenharmony_ci                " movw (%1), %w3                \n\t"  \
7453a5a1b3Sopenharmony_ci                " movd %3,  %%mm0               \n\t"  \
7553a5a1b3Sopenharmony_ci                " punpcklwd %%mm0, %%mm0        \n\t"  \
7653a5a1b3Sopenharmony_ci                " movd %%mm0, (%0)              \n\t"  \
7753a5a1b3Sopenharmony_ci                " add $2, %1                    \n\t"  \
7853a5a1b3Sopenharmony_ci                " add $4, %0                    \n\t"
7953a5a1b3Sopenharmony_ci
8053a5a1b3Sopenharmony_ci#define MONO_TO_STEREO(s,shift,mask)                   \
8153a5a1b3Sopenharmony_ci                " mov %4, %2                    \n\t"  \
8253a5a1b3Sopenharmony_ci                " sar $"#shift", %2             \n\t"  \
8353a5a1b3Sopenharmony_ci                " cmp $0, %2                    \n\t"  \
8453a5a1b3Sopenharmony_ci                " je 2f                         \n\t"  \
8553a5a1b3Sopenharmony_ci                "1:                             \n\t"  \
8653a5a1b3Sopenharmony_ci                LOAD_SAMPLES                           \
8753a5a1b3Sopenharmony_ci                UNPACK_SAMPLES(s)                      \
8853a5a1b3Sopenharmony_ci                STORE_SAMPLES                          \
8953a5a1b3Sopenharmony_ci                " dec %2                        \n\t"  \
9053a5a1b3Sopenharmony_ci                " jne 1b                        \n\t"  \
9153a5a1b3Sopenharmony_ci                "2:                             \n\t"  \
9253a5a1b3Sopenharmony_ci                " mov %4, %2                    \n\t"  \
9353a5a1b3Sopenharmony_ci                " and $"#mask", %2              \n\t"  \
9453a5a1b3Sopenharmony_ci                " je 4f                         \n\t"  \
9553a5a1b3Sopenharmony_ci                "3:                             \n\t"  \
9653a5a1b3Sopenharmony_ci                HANDLE_SINGLE_##s()                    \
9753a5a1b3Sopenharmony_ci                " dec %2                        \n\t"  \
9853a5a1b3Sopenharmony_ci                " jne 3b                        \n\t"  \
9953a5a1b3Sopenharmony_ci                "4:                             \n\t"  \
10053a5a1b3Sopenharmony_ci                " emms                          \n\t"
10153a5a1b3Sopenharmony_ci
10253a5a1b3Sopenharmony_ci#if defined (__i386__) || defined (__amd64__)
10353a5a1b3Sopenharmony_cistatic void remap_mono_to_stereo_s16ne_mmx(pa_remap_t *m, int16_t *dst, const int16_t *src, unsigned n) {
10453a5a1b3Sopenharmony_ci    pa_reg_x86 temp, temp2;
10553a5a1b3Sopenharmony_ci
10653a5a1b3Sopenharmony_ci    __asm__ __volatile__ (
10753a5a1b3Sopenharmony_ci        MONO_TO_STEREO(wd,4,15) /* do words to doubles */
10853a5a1b3Sopenharmony_ci        : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2)
10953a5a1b3Sopenharmony_ci        : "r" ((pa_reg_x86)n)
11053a5a1b3Sopenharmony_ci        : "cc"
11153a5a1b3Sopenharmony_ci    );
11253a5a1b3Sopenharmony_ci}
11353a5a1b3Sopenharmony_ci
11453a5a1b3Sopenharmony_ci/* Works for both S32NE and FLOAT32NE */
11553a5a1b3Sopenharmony_cistatic void remap_mono_to_stereo_any32ne_mmx(pa_remap_t *m, float *dst, const float *src, unsigned n) {
11653a5a1b3Sopenharmony_ci    pa_reg_x86 temp, temp2;
11753a5a1b3Sopenharmony_ci
11853a5a1b3Sopenharmony_ci    __asm__ __volatile__ (
11953a5a1b3Sopenharmony_ci        MONO_TO_STEREO(dq,3,7) /* do doubles to quads */
12053a5a1b3Sopenharmony_ci        : "+r" (dst), "+r" (src), "=&r" (temp), "=&r" (temp2)
12153a5a1b3Sopenharmony_ci        : "r" ((pa_reg_x86)n)
12253a5a1b3Sopenharmony_ci        : "cc"
12353a5a1b3Sopenharmony_ci    );
12453a5a1b3Sopenharmony_ci}
12553a5a1b3Sopenharmony_ci
12653a5a1b3Sopenharmony_ci/* set the function that will execute the remapping based on the matrices */
12753a5a1b3Sopenharmony_cistatic void init_remap_mmx(pa_remap_t *m) {
12853a5a1b3Sopenharmony_ci    unsigned n_oc, n_ic;
12953a5a1b3Sopenharmony_ci
13053a5a1b3Sopenharmony_ci    n_oc = m->o_ss.channels;
13153a5a1b3Sopenharmony_ci    n_ic = m->i_ss.channels;
13253a5a1b3Sopenharmony_ci
13353a5a1b3Sopenharmony_ci    /* find some common channel remappings, fall back to full matrix operation. */
13453a5a1b3Sopenharmony_ci    if (n_ic == 1 && n_oc == 2 &&
13553a5a1b3Sopenharmony_ci            m->map_table_i[0][0] == 0x10000 && m->map_table_i[1][0] == 0x10000) {
13653a5a1b3Sopenharmony_ci
13753a5a1b3Sopenharmony_ci        pa_log_info("Using MMX mono to stereo remapping");
13853a5a1b3Sopenharmony_ci        pa_set_remap_func(m, (pa_do_remap_func_t) remap_mono_to_stereo_s16ne_mmx,
13953a5a1b3Sopenharmony_ci            (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx,
14053a5a1b3Sopenharmony_ci            (pa_do_remap_func_t) remap_mono_to_stereo_any32ne_mmx);
14153a5a1b3Sopenharmony_ci    }
14253a5a1b3Sopenharmony_ci}
14353a5a1b3Sopenharmony_ci#endif /* defined (__i386__) || defined (__amd64__) */
14453a5a1b3Sopenharmony_ci
14553a5a1b3Sopenharmony_civoid pa_remap_func_init_mmx(pa_cpu_x86_flag_t flags) {
14653a5a1b3Sopenharmony_ci#if defined (__i386__) || defined (__amd64__)
14753a5a1b3Sopenharmony_ci
14853a5a1b3Sopenharmony_ci    if (flags & PA_CPU_X86_MMX) {
14953a5a1b3Sopenharmony_ci        pa_log_info("Initialising MMX optimized remappers.");
15053a5a1b3Sopenharmony_ci
15153a5a1b3Sopenharmony_ci        pa_set_init_remap_func((pa_init_remap_func_t) init_remap_mmx);
15253a5a1b3Sopenharmony_ci    }
15353a5a1b3Sopenharmony_ci
15453a5a1b3Sopenharmony_ci#endif /* defined (__i386__) || defined (__amd64__) */
15553a5a1b3Sopenharmony_ci}
156