153a5a1b3Sopenharmony_ci/*** 253a5a1b3Sopenharmony_ci This file is part of PulseAudio. 353a5a1b3Sopenharmony_ci 453a5a1b3Sopenharmony_ci Copyright 2004-2006 Lennart Poettering 553a5a1b3Sopenharmony_ci Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB 653a5a1b3Sopenharmony_ci 753a5a1b3Sopenharmony_ci PulseAudio is free software; you can redistribute it and/or modify 853a5a1b3Sopenharmony_ci it under the terms of the GNU Lesser General Public License as published 953a5a1b3Sopenharmony_ci by the Free Software Foundation; either version 2.1 of the License, 1053a5a1b3Sopenharmony_ci or (at your option) any later version. 1153a5a1b3Sopenharmony_ci 1253a5a1b3Sopenharmony_ci PulseAudio is distributed in the hope that it will be useful, but 1353a5a1b3Sopenharmony_ci WITHOUT ANY WARRANTY; without even the implied warranty of 1453a5a1b3Sopenharmony_ci MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1553a5a1b3Sopenharmony_ci General Public License for more details. 1653a5a1b3Sopenharmony_ci 1753a5a1b3Sopenharmony_ci You should have received a copy of the GNU Lesser General Public License 1853a5a1b3Sopenharmony_ci along with PulseAudio; if not, see <http://www.gnu.org/licenses/>. 1953a5a1b3Sopenharmony_ci***/ 2053a5a1b3Sopenharmony_ci 2153a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H 2253a5a1b3Sopenharmony_ci#include <config.h> 2353a5a1b3Sopenharmony_ci#endif 2453a5a1b3Sopenharmony_ci 2553a5a1b3Sopenharmony_ci#include <stdio.h> 2653a5a1b3Sopenharmony_ci#include <stdlib.h> 2753a5a1b3Sopenharmony_ci 2853a5a1b3Sopenharmony_ci#include <pulsecore/macro.h> 2953a5a1b3Sopenharmony_ci#include <pulsecore/endianmacros.h> 3053a5a1b3Sopenharmony_ci 3153a5a1b3Sopenharmony_ci#include "cpu-x86.h" 3253a5a1b3Sopenharmony_ci#include "sconv.h" 3353a5a1b3Sopenharmony_ci 3453a5a1b3Sopenharmony_ci#if (!defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__FreeBSD_kernel__) && defined (__i386__)) || defined (__amd64__) 3553a5a1b3Sopenharmony_ci 3653a5a1b3Sopenharmony_cistatic const PA_DECLARE_ALIGNED (16, float, scale[4]) = { 0x8000, 0x8000, 0x8000, 0x8000 }; 3753a5a1b3Sopenharmony_ci 3853a5a1b3Sopenharmony_cistatic void pa_sconv_s16le_from_f32ne_sse(unsigned n, const float *a, int16_t *b) { 3953a5a1b3Sopenharmony_ci pa_reg_x86 temp, i; 4053a5a1b3Sopenharmony_ci 4153a5a1b3Sopenharmony_ci __asm__ __volatile__ ( 4253a5a1b3Sopenharmony_ci " movaps %5, %%xmm5 \n\t" 4353a5a1b3Sopenharmony_ci " xor %0, %0 \n\t" 4453a5a1b3Sopenharmony_ci 4553a5a1b3Sopenharmony_ci " mov %4, %1 \n\t" 4653a5a1b3Sopenharmony_ci " sar $3, %1 \n\t" /* 8 floats at a time */ 4753a5a1b3Sopenharmony_ci " cmp $0, %1 \n\t" 4853a5a1b3Sopenharmony_ci " je 2f \n\t" 4953a5a1b3Sopenharmony_ci 5053a5a1b3Sopenharmony_ci "1: \n\t" 5153a5a1b3Sopenharmony_ci " movups (%q2, %0, 2), %%xmm0 \n\t" /* read 8 floats */ 5253a5a1b3Sopenharmony_ci " movups 16(%q2, %0, 2), %%xmm2 \n\t" 5353a5a1b3Sopenharmony_ci " mulps %%xmm5, %%xmm0 \n\t" /* *= 0x8000 */ 5453a5a1b3Sopenharmony_ci " mulps %%xmm5, %%xmm2 \n\t" 5553a5a1b3Sopenharmony_ci 5653a5a1b3Sopenharmony_ci " cvtps2pi %%xmm0, %%mm0 \n\t" /* low part to int */ 5753a5a1b3Sopenharmony_ci " cvtps2pi %%xmm2, %%mm2 \n\t" 5853a5a1b3Sopenharmony_ci " movhlps %%xmm0, %%xmm0 \n\t" /* bring high part in position */ 5953a5a1b3Sopenharmony_ci " movhlps %%xmm2, %%xmm2 \n\t" 6053a5a1b3Sopenharmony_ci " cvtps2pi %%xmm0, %%mm1 \n\t" /* high part to int */ 6153a5a1b3Sopenharmony_ci " cvtps2pi %%xmm2, %%mm3 \n\t" 6253a5a1b3Sopenharmony_ci 6353a5a1b3Sopenharmony_ci " packssdw %%mm1, %%mm0 \n\t" /* pack parts */ 6453a5a1b3Sopenharmony_ci " packssdw %%mm3, %%mm2 \n\t" 6553a5a1b3Sopenharmony_ci " movq %%mm0, (%q3, %0) \n\t" 6653a5a1b3Sopenharmony_ci " movq %%mm2, 8(%q3, %0) \n\t" 6753a5a1b3Sopenharmony_ci 6853a5a1b3Sopenharmony_ci " add $16, %0 \n\t" 6953a5a1b3Sopenharmony_ci " dec %1 \n\t" 7053a5a1b3Sopenharmony_ci " jne 1b \n\t" 7153a5a1b3Sopenharmony_ci 7253a5a1b3Sopenharmony_ci "2: \n\t" 7353a5a1b3Sopenharmony_ci " mov %4, %1 \n\t" /* prepare for leftovers */ 7453a5a1b3Sopenharmony_ci " and $7, %1 \n\t" 7553a5a1b3Sopenharmony_ci " je 5f \n\t" 7653a5a1b3Sopenharmony_ci 7753a5a1b3Sopenharmony_ci "3: \n\t" 7853a5a1b3Sopenharmony_ci " movss (%q2, %0, 2), %%xmm0 \n\t" 7953a5a1b3Sopenharmony_ci " mulss %%xmm5, %%xmm0 \n\t" 8053a5a1b3Sopenharmony_ci " cvtss2si %%xmm0, %4 \n\t" 8153a5a1b3Sopenharmony_ci " add $0x8000, %4 \n\t" /* check for saturation */ 8253a5a1b3Sopenharmony_ci " and $~0xffff, %4 \n\t" 8353a5a1b3Sopenharmony_ci " cvtss2si %%xmm0, %4 \n\t" 8453a5a1b3Sopenharmony_ci " je 4f \n\t" 8553a5a1b3Sopenharmony_ci " sar $31, %4 \n\t" 8653a5a1b3Sopenharmony_ci " xor $0x7fff, %4 \n\t" 8753a5a1b3Sopenharmony_ci 8853a5a1b3Sopenharmony_ci "4: \n\t" 8953a5a1b3Sopenharmony_ci " movw %w4, (%q3, %0) \n\t" /* store leftover */ 9053a5a1b3Sopenharmony_ci " add $2, %0 \n\t" 9153a5a1b3Sopenharmony_ci " dec %1 \n\t" 9253a5a1b3Sopenharmony_ci " jne 3b \n\t" 9353a5a1b3Sopenharmony_ci 9453a5a1b3Sopenharmony_ci "5: \n\t" 9553a5a1b3Sopenharmony_ci " emms \n\t" 9653a5a1b3Sopenharmony_ci 9753a5a1b3Sopenharmony_ci : "=&r" (i), "=&r" (temp) 9853a5a1b3Sopenharmony_ci : "r" (a), "r" (b), "r" ((pa_reg_x86)n), "m" (*scale) 9953a5a1b3Sopenharmony_ci : "cc", "memory" 10053a5a1b3Sopenharmony_ci ); 10153a5a1b3Sopenharmony_ci} 10253a5a1b3Sopenharmony_ci 10353a5a1b3Sopenharmony_cistatic void pa_sconv_s16le_from_f32ne_sse2(unsigned n, const float *a, int16_t *b) { 10453a5a1b3Sopenharmony_ci pa_reg_x86 temp, i; 10553a5a1b3Sopenharmony_ci 10653a5a1b3Sopenharmony_ci __asm__ __volatile__ ( 10753a5a1b3Sopenharmony_ci " movaps %5, %%xmm5 \n\t" 10853a5a1b3Sopenharmony_ci " xor %0, %0 \n\t" 10953a5a1b3Sopenharmony_ci 11053a5a1b3Sopenharmony_ci " mov %4, %1 \n\t" 11153a5a1b3Sopenharmony_ci " sar $3, %1 \n\t" /* 8 floats at a time */ 11253a5a1b3Sopenharmony_ci " cmp $0, %1 \n\t" 11353a5a1b3Sopenharmony_ci " je 2f \n\t" 11453a5a1b3Sopenharmony_ci 11553a5a1b3Sopenharmony_ci "1: \n\t" 11653a5a1b3Sopenharmony_ci " movups (%q2, %0, 2), %%xmm0 \n\t" /* read 8 floats */ 11753a5a1b3Sopenharmony_ci " movups 16(%q2, %0, 2), %%xmm2 \n\t" 11853a5a1b3Sopenharmony_ci " mulps %%xmm5, %%xmm0 \n\t" /* *= 0x8000 */ 11953a5a1b3Sopenharmony_ci " mulps %%xmm5, %%xmm2 \n\t" 12053a5a1b3Sopenharmony_ci 12153a5a1b3Sopenharmony_ci " cvtps2dq %%xmm0, %%xmm0 \n\t" 12253a5a1b3Sopenharmony_ci " cvtps2dq %%xmm2, %%xmm2 \n\t" 12353a5a1b3Sopenharmony_ci 12453a5a1b3Sopenharmony_ci " packssdw %%xmm2, %%xmm0 \n\t" 12553a5a1b3Sopenharmony_ci " movdqu %%xmm0, (%q3, %0) \n\t" 12653a5a1b3Sopenharmony_ci 12753a5a1b3Sopenharmony_ci " add $16, %0 \n\t" 12853a5a1b3Sopenharmony_ci " dec %1 \n\t" 12953a5a1b3Sopenharmony_ci " jne 1b \n\t" 13053a5a1b3Sopenharmony_ci 13153a5a1b3Sopenharmony_ci "2: \n\t" 13253a5a1b3Sopenharmony_ci " mov %4, %1 \n\t" /* prepare for leftovers */ 13353a5a1b3Sopenharmony_ci " and $7, %1 \n\t" 13453a5a1b3Sopenharmony_ci " je 5f \n\t" 13553a5a1b3Sopenharmony_ci 13653a5a1b3Sopenharmony_ci "3: \n\t" 13753a5a1b3Sopenharmony_ci " movss (%q2, %0, 2), %%xmm0 \n\t" 13853a5a1b3Sopenharmony_ci " mulss %%xmm5, %%xmm0 \n\t" 13953a5a1b3Sopenharmony_ci " cvtss2si %%xmm0, %4 \n\t" 14053a5a1b3Sopenharmony_ci " add $0x8000, %4 \n\t" 14153a5a1b3Sopenharmony_ci " and $~0xffff, %4 \n\t" /* check for saturation */ 14253a5a1b3Sopenharmony_ci " cvtss2si %%xmm0, %4 \n\t" 14353a5a1b3Sopenharmony_ci " je 4f \n\t" 14453a5a1b3Sopenharmony_ci " sar $31, %4 \n\t" 14553a5a1b3Sopenharmony_ci " xor $0x7fff, %4 \n\t" 14653a5a1b3Sopenharmony_ci 14753a5a1b3Sopenharmony_ci "4: \n\t" 14853a5a1b3Sopenharmony_ci " movw %w4, (%q3, %0) \n\t" /* store leftover */ 14953a5a1b3Sopenharmony_ci " add $2, %0 \n\t" 15053a5a1b3Sopenharmony_ci " dec %1 \n\t" 15153a5a1b3Sopenharmony_ci " jne 3b \n\t" 15253a5a1b3Sopenharmony_ci 15353a5a1b3Sopenharmony_ci "5: \n\t" 15453a5a1b3Sopenharmony_ci 15553a5a1b3Sopenharmony_ci : "=&r" (i), "=&r" (temp) 15653a5a1b3Sopenharmony_ci : "r" (a), "r" (b), "r" ((pa_reg_x86)n), "m" (*scale) 15753a5a1b3Sopenharmony_ci : "cc", "memory" 15853a5a1b3Sopenharmony_ci ); 15953a5a1b3Sopenharmony_ci} 16053a5a1b3Sopenharmony_ci 16153a5a1b3Sopenharmony_ci#endif /* defined (__i386__) || defined (__amd64__) */ 16253a5a1b3Sopenharmony_ci 16353a5a1b3Sopenharmony_civoid pa_convert_func_init_sse(pa_cpu_x86_flag_t flags) { 16453a5a1b3Sopenharmony_ci#if (!defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__FreeBSD_kernel__) && defined (__i386__)) || defined (__amd64__) 16553a5a1b3Sopenharmony_ci 16653a5a1b3Sopenharmony_ci if (flags & PA_CPU_X86_SSE2) { 16753a5a1b3Sopenharmony_ci pa_log_info("Initialising SSE2 optimized conversions."); 16853a5a1b3Sopenharmony_ci pa_set_convert_from_float32ne_function(PA_SAMPLE_S16LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_sse2); 16953a5a1b3Sopenharmony_ci pa_set_convert_to_s16ne_function(PA_SAMPLE_FLOAT32LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_sse2); 17053a5a1b3Sopenharmony_ci } else if (flags & PA_CPU_X86_SSE) { 17153a5a1b3Sopenharmony_ci pa_log_info("Initialising SSE optimized conversions."); 17253a5a1b3Sopenharmony_ci pa_set_convert_from_float32ne_function(PA_SAMPLE_S16LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_sse); 17353a5a1b3Sopenharmony_ci pa_set_convert_to_s16ne_function(PA_SAMPLE_FLOAT32LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_sse); 17453a5a1b3Sopenharmony_ci } 17553a5a1b3Sopenharmony_ci 17653a5a1b3Sopenharmony_ci#endif /* defined (__i386__) || defined (__amd64__) */ 17753a5a1b3Sopenharmony_ci} 178