153a5a1b3Sopenharmony_ci/*** 253a5a1b3Sopenharmony_ci This file is part of PulseAudio. 353a5a1b3Sopenharmony_ci 453a5a1b3Sopenharmony_ci Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> 553a5a1b3Sopenharmony_ci 653a5a1b3Sopenharmony_ci PulseAudio is free software; you can redistribute it and/or modify 753a5a1b3Sopenharmony_ci it under the terms of the GNU Lesser General Public License as published 853a5a1b3Sopenharmony_ci by the Free Software Foundation; either version 2.1 of the License, 953a5a1b3Sopenharmony_ci or (at your option) any later version. 1053a5a1b3Sopenharmony_ci 1153a5a1b3Sopenharmony_ci PulseAudio is distributed in the hope that it will be useful, but 1253a5a1b3Sopenharmony_ci WITHOUT ANY WARRANTY; without even the implied warranty of 1353a5a1b3Sopenharmony_ci MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1453a5a1b3Sopenharmony_ci General Public License for more details. 1553a5a1b3Sopenharmony_ci***/ 1653a5a1b3Sopenharmony_ci 1753a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H 1853a5a1b3Sopenharmony_ci#include <config.h> 1953a5a1b3Sopenharmony_ci#endif 2053a5a1b3Sopenharmony_ci 2153a5a1b3Sopenharmony_ci#include <pulsecore/macro.h> 2253a5a1b3Sopenharmony_ci#include <pulsecore/endianmacros.h> 2353a5a1b3Sopenharmony_ci 2453a5a1b3Sopenharmony_ci#include "cpu-arm.h" 2553a5a1b3Sopenharmony_ci#include "sconv.h" 2653a5a1b3Sopenharmony_ci 2753a5a1b3Sopenharmony_ci#include <math.h> 2853a5a1b3Sopenharmony_ci#include <arm_neon.h> 2953a5a1b3Sopenharmony_ci 3053a5a1b3Sopenharmony_cistatic void pa_sconv_s16le_from_f32ne_neon(unsigned n, const float *src, int16_t *dst) { 3153a5a1b3Sopenharmony_ci unsigned i = n & 3; 3253a5a1b3Sopenharmony_ci 3353a5a1b3Sopenharmony_ci __asm__ __volatile__ ( 3453a5a1b3Sopenharmony_ci "movs %[n], %[n], lsr #2 \n\t" 3553a5a1b3Sopenharmony_ci "beq 2f \n\t" 3653a5a1b3Sopenharmony_ci 3753a5a1b3Sopenharmony_ci "1: \n\t" 3853a5a1b3Sopenharmony_ci "vld1.32 {q0}, [%[src]]! \n\t" 3953a5a1b3Sopenharmony_ci "vcvt.s32.f32 q0, q0, #31 \n\t" /* s32<-f32 as 16:16 fixed-point, with implicit multiplication by 32768 */ 4053a5a1b3Sopenharmony_ci "vqrshrn.s32 d0, q0, #16 \n\t" /* shift, round, narrow */ 4153a5a1b3Sopenharmony_ci "subs %[n], %[n], #1 \n\t" 4253a5a1b3Sopenharmony_ci "vst1.16 {d0}, [%[dst]]! \n\t" 4353a5a1b3Sopenharmony_ci "bgt 1b \n\t" 4453a5a1b3Sopenharmony_ci 4553a5a1b3Sopenharmony_ci "2: \n\t" 4653a5a1b3Sopenharmony_ci 4753a5a1b3Sopenharmony_ci : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) /* output operands (or input operands that get modified) */ 4853a5a1b3Sopenharmony_ci : /* input operands */ 4953a5a1b3Sopenharmony_ci : "memory", "cc", "q0" /* clobber list */ 5053a5a1b3Sopenharmony_ci ); 5153a5a1b3Sopenharmony_ci 5253a5a1b3Sopenharmony_ci /* leftovers */ 5353a5a1b3Sopenharmony_ci while (i--) { 5453a5a1b3Sopenharmony_ci *dst++ = (int16_t) PA_CLAMP_UNLIKELY(lrintf(*src * (1 << 15)), -0x8000, 0x7FFF); 5553a5a1b3Sopenharmony_ci src++; 5653a5a1b3Sopenharmony_ci } 5753a5a1b3Sopenharmony_ci} 5853a5a1b3Sopenharmony_ci 5953a5a1b3Sopenharmony_cistatic void pa_sconv_s16le_to_f32ne_neon(unsigned n, const int16_t *src, float *dst) { 6053a5a1b3Sopenharmony_ci unsigned i = n & 3; 6153a5a1b3Sopenharmony_ci const float invscale = 1.0f / (1 << 15); 6253a5a1b3Sopenharmony_ci 6353a5a1b3Sopenharmony_ci __asm__ __volatile__ ( 6453a5a1b3Sopenharmony_ci "movs %[n], %[n], lsr #2 \n\t" 6553a5a1b3Sopenharmony_ci "beq 2f \n\t" 6653a5a1b3Sopenharmony_ci 6753a5a1b3Sopenharmony_ci "1: \n\t" 6853a5a1b3Sopenharmony_ci "vld1.16 {d0}, [%[src]]! \n\t" 6953a5a1b3Sopenharmony_ci "vmovl.s16 q0, d0 \n\t" /* widen */ 7053a5a1b3Sopenharmony_ci "vcvt.f32.s32 q0, q0, #15 \n\t" /* f32<-s32 and divide by (1<<15) */ 7153a5a1b3Sopenharmony_ci "subs %[n], %[n], #1 \n\t" 7253a5a1b3Sopenharmony_ci "vst1.32 {q0}, [%[dst]]! \n\t" 7353a5a1b3Sopenharmony_ci "bgt 1b \n\t" 7453a5a1b3Sopenharmony_ci 7553a5a1b3Sopenharmony_ci "2: \n\t" 7653a5a1b3Sopenharmony_ci 7753a5a1b3Sopenharmony_ci : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) /* output operands (or input operands that get modified) */ 7853a5a1b3Sopenharmony_ci : /* input operands */ 7953a5a1b3Sopenharmony_ci : "memory", "cc", "q0" /* clobber list */ 8053a5a1b3Sopenharmony_ci ); 8153a5a1b3Sopenharmony_ci 8253a5a1b3Sopenharmony_ci /* leftovers */ 8353a5a1b3Sopenharmony_ci while (i--) { 8453a5a1b3Sopenharmony_ci *dst++ = *src++ * invscale; 8553a5a1b3Sopenharmony_ci } 8653a5a1b3Sopenharmony_ci} 8753a5a1b3Sopenharmony_ci 8853a5a1b3Sopenharmony_civoid pa_convert_func_init_neon(pa_cpu_arm_flag_t flags) { 8953a5a1b3Sopenharmony_ci pa_log_info("Initialising ARM NEON optimized conversions."); 9053a5a1b3Sopenharmony_ci pa_set_convert_from_float32ne_function(PA_SAMPLE_S16LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_neon); 9153a5a1b3Sopenharmony_ci pa_set_convert_to_float32ne_function(PA_SAMPLE_S16LE, (pa_convert_func_t) pa_sconv_s16le_to_f32ne_neon); 9253a5a1b3Sopenharmony_ci#ifndef WORDS_BIGENDIAN 9353a5a1b3Sopenharmony_ci pa_set_convert_from_s16ne_function(PA_SAMPLE_FLOAT32LE, (pa_convert_func_t) pa_sconv_s16le_to_f32ne_neon); 9453a5a1b3Sopenharmony_ci pa_set_convert_to_s16ne_function(PA_SAMPLE_FLOAT32LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_neon); 9553a5a1b3Sopenharmony_ci#endif 9653a5a1b3Sopenharmony_ci} 97