1cabdff1aSopenharmony_ci/* 2cabdff1aSopenharmony_ci * SIMD-optimized LPC functions 3cabdff1aSopenharmony_ci * Copyright (c) 2007 Loren Merritt 4cabdff1aSopenharmony_ci * 5cabdff1aSopenharmony_ci * This file is part of FFmpeg. 6cabdff1aSopenharmony_ci * 7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or 8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either 10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version. 11cabdff1aSopenharmony_ci * 12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful, 13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15cabdff1aSopenharmony_ci * Lesser General Public License for more details. 16cabdff1aSopenharmony_ci * 17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public 18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software 19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20cabdff1aSopenharmony_ci */ 21cabdff1aSopenharmony_ci 22cabdff1aSopenharmony_ci#include "libavutil/attributes.h" 23cabdff1aSopenharmony_ci#include "libavutil/cpu.h" 24cabdff1aSopenharmony_ci#include "libavutil/mem_internal.h" 25cabdff1aSopenharmony_ci#include "libavutil/x86/asm.h" 26cabdff1aSopenharmony_ci#include "libavutil/x86/cpu.h" 27cabdff1aSopenharmony_ci#include "libavcodec/lpc.h" 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ciDECLARE_ASM_CONST(16, double, pd_1)[2] = { 1.0, 1.0 }; 30cabdff1aSopenharmony_ciDECLARE_ASM_CONST(16, double, pd_2)[2] = { 2.0, 2.0 }; 31cabdff1aSopenharmony_ci 32cabdff1aSopenharmony_ci#if HAVE_SSE2_INLINE 33cabdff1aSopenharmony_ci 34cabdff1aSopenharmony_cistatic void lpc_apply_welch_window_sse2(const int32_t *data, int len, 35cabdff1aSopenharmony_ci double *w_data) 36cabdff1aSopenharmony_ci{ 37cabdff1aSopenharmony_ci double c = 2.0 / (len-1.0); 38cabdff1aSopenharmony_ci int n2 = len>>1; 39cabdff1aSopenharmony_ci x86_reg i = -n2*sizeof(int32_t); 40cabdff1aSopenharmony_ci x86_reg j = n2*sizeof(int32_t); 41cabdff1aSopenharmony_ci __asm__ volatile( 42cabdff1aSopenharmony_ci "movsd %4, %%xmm7 \n\t" 43cabdff1aSopenharmony_ci "movapd "MANGLE(pd_1)", %%xmm6 \n\t" 44cabdff1aSopenharmony_ci "movapd "MANGLE(pd_2)", %%xmm5 \n\t" 45cabdff1aSopenharmony_ci "movlhps %%xmm7, %%xmm7 \n\t" 46cabdff1aSopenharmony_ci "subpd %%xmm5, %%xmm7 \n\t" 47cabdff1aSopenharmony_ci "addsd %%xmm6, %%xmm7 \n\t" 48cabdff1aSopenharmony_ci "test $1, %5 \n\t" 49cabdff1aSopenharmony_ci "jz 2f \n\t" 50cabdff1aSopenharmony_ci#define WELCH(MOVPD, offset)\ 51cabdff1aSopenharmony_ci "1: \n\t"\ 52cabdff1aSopenharmony_ci "movapd %%xmm7, %%xmm1 \n\t"\ 53cabdff1aSopenharmony_ci "mulpd %%xmm1, %%xmm1 \n\t"\ 54cabdff1aSopenharmony_ci "movapd %%xmm6, %%xmm0 \n\t"\ 55cabdff1aSopenharmony_ci "subpd %%xmm1, %%xmm0 \n\t"\ 56cabdff1aSopenharmony_ci "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ 57cabdff1aSopenharmony_ci "cvtpi2pd (%3,%0), %%xmm2 \n\t"\ 58cabdff1aSopenharmony_ci "cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\ 59cabdff1aSopenharmony_ci "mulpd %%xmm0, %%xmm2 \n\t"\ 60cabdff1aSopenharmony_ci "mulpd %%xmm1, %%xmm3 \n\t"\ 61cabdff1aSopenharmony_ci "movapd %%xmm2, (%2,%0,2) \n\t"\ 62cabdff1aSopenharmony_ci MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\ 63cabdff1aSopenharmony_ci "subpd %%xmm5, %%xmm7 \n\t"\ 64cabdff1aSopenharmony_ci "sub $8, %1 \n\t"\ 65cabdff1aSopenharmony_ci "add $8, %0 \n\t"\ 66cabdff1aSopenharmony_ci "jl 1b \n\t"\ 67cabdff1aSopenharmony_ci 68cabdff1aSopenharmony_ci WELCH("movupd", -1) 69cabdff1aSopenharmony_ci "jmp 3f \n\t" 70cabdff1aSopenharmony_ci "2: \n\t" 71cabdff1aSopenharmony_ci WELCH("movapd", -2) 72cabdff1aSopenharmony_ci "3: \n\t" 73cabdff1aSopenharmony_ci :"+&r"(i), "+&r"(j) 74cabdff1aSopenharmony_ci :"r"(w_data+n2), "r"(data+n2), "m"(c), "r"(len) 75cabdff1aSopenharmony_ci NAMED_CONSTRAINTS_ARRAY_ADD(pd_1,pd_2) 76cabdff1aSopenharmony_ci XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3", 77cabdff1aSopenharmony_ci "%xmm5", "%xmm6", "%xmm7") 78cabdff1aSopenharmony_ci ); 79cabdff1aSopenharmony_ci#undef WELCH 80cabdff1aSopenharmony_ci} 81cabdff1aSopenharmony_ci 82cabdff1aSopenharmony_cistatic void lpc_compute_autocorr_sse2(const double *data, int len, int lag, 83cabdff1aSopenharmony_ci double *autoc) 84cabdff1aSopenharmony_ci{ 85cabdff1aSopenharmony_ci int j; 86cabdff1aSopenharmony_ci 87cabdff1aSopenharmony_ci if((x86_reg)data & 15) 88cabdff1aSopenharmony_ci data++; 89cabdff1aSopenharmony_ci 90cabdff1aSopenharmony_ci for(j=0; j<lag; j+=2){ 91cabdff1aSopenharmony_ci x86_reg i = -len*sizeof(double); 92cabdff1aSopenharmony_ci if(j == lag-2) { 93cabdff1aSopenharmony_ci __asm__ volatile( 94cabdff1aSopenharmony_ci "movsd "MANGLE(pd_1)", %%xmm0 \n\t" 95cabdff1aSopenharmony_ci "movsd "MANGLE(pd_1)", %%xmm1 \n\t" 96cabdff1aSopenharmony_ci "movsd "MANGLE(pd_1)", %%xmm2 \n\t" 97cabdff1aSopenharmony_ci "1: \n\t" 98cabdff1aSopenharmony_ci "movapd (%2,%0), %%xmm3 \n\t" 99cabdff1aSopenharmony_ci "movupd -8(%3,%0), %%xmm4 \n\t" 100cabdff1aSopenharmony_ci "movapd (%3,%0), %%xmm5 \n\t" 101cabdff1aSopenharmony_ci "mulpd %%xmm3, %%xmm4 \n\t" 102cabdff1aSopenharmony_ci "mulpd %%xmm3, %%xmm5 \n\t" 103cabdff1aSopenharmony_ci "mulpd -16(%3,%0), %%xmm3 \n\t" 104cabdff1aSopenharmony_ci "addpd %%xmm4, %%xmm1 \n\t" 105cabdff1aSopenharmony_ci "addpd %%xmm5, %%xmm0 \n\t" 106cabdff1aSopenharmony_ci "addpd %%xmm3, %%xmm2 \n\t" 107cabdff1aSopenharmony_ci "add $16, %0 \n\t" 108cabdff1aSopenharmony_ci "jl 1b \n\t" 109cabdff1aSopenharmony_ci "movhlps %%xmm0, %%xmm3 \n\t" 110cabdff1aSopenharmony_ci "movhlps %%xmm1, %%xmm4 \n\t" 111cabdff1aSopenharmony_ci "movhlps %%xmm2, %%xmm5 \n\t" 112cabdff1aSopenharmony_ci "addsd %%xmm3, %%xmm0 \n\t" 113cabdff1aSopenharmony_ci "addsd %%xmm4, %%xmm1 \n\t" 114cabdff1aSopenharmony_ci "addsd %%xmm5, %%xmm2 \n\t" 115cabdff1aSopenharmony_ci "movsd %%xmm0, (%1) \n\t" 116cabdff1aSopenharmony_ci "movsd %%xmm1, 8(%1) \n\t" 117cabdff1aSopenharmony_ci "movsd %%xmm2, 16(%1) \n\t" 118cabdff1aSopenharmony_ci :"+&r"(i) 119cabdff1aSopenharmony_ci :"r"(autoc+j), "r"(data+len), "r"(data+len-j) 120cabdff1aSopenharmony_ci NAMED_CONSTRAINTS_ARRAY_ADD(pd_1) 121cabdff1aSopenharmony_ci :"memory" 122cabdff1aSopenharmony_ci ); 123cabdff1aSopenharmony_ci } else { 124cabdff1aSopenharmony_ci __asm__ volatile( 125cabdff1aSopenharmony_ci "movsd "MANGLE(pd_1)", %%xmm0 \n\t" 126cabdff1aSopenharmony_ci "movsd "MANGLE(pd_1)", %%xmm1 \n\t" 127cabdff1aSopenharmony_ci "1: \n\t" 128cabdff1aSopenharmony_ci "movapd (%3,%0), %%xmm3 \n\t" 129cabdff1aSopenharmony_ci "movupd -8(%4,%0), %%xmm4 \n\t" 130cabdff1aSopenharmony_ci "mulpd %%xmm3, %%xmm4 \n\t" 131cabdff1aSopenharmony_ci "mulpd (%4,%0), %%xmm3 \n\t" 132cabdff1aSopenharmony_ci "addpd %%xmm4, %%xmm1 \n\t" 133cabdff1aSopenharmony_ci "addpd %%xmm3, %%xmm0 \n\t" 134cabdff1aSopenharmony_ci "add $16, %0 \n\t" 135cabdff1aSopenharmony_ci "jl 1b \n\t" 136cabdff1aSopenharmony_ci "movhlps %%xmm0, %%xmm3 \n\t" 137cabdff1aSopenharmony_ci "movhlps %%xmm1, %%xmm4 \n\t" 138cabdff1aSopenharmony_ci "addsd %%xmm3, %%xmm0 \n\t" 139cabdff1aSopenharmony_ci "addsd %%xmm4, %%xmm1 \n\t" 140cabdff1aSopenharmony_ci "movsd %%xmm0, %1 \n\t" 141cabdff1aSopenharmony_ci "movsd %%xmm1, %2 \n\t" 142cabdff1aSopenharmony_ci :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) 143cabdff1aSopenharmony_ci :"r"(data+len), "r"(data+len-j) 144cabdff1aSopenharmony_ci NAMED_CONSTRAINTS_ARRAY_ADD(pd_1) 145cabdff1aSopenharmony_ci ); 146cabdff1aSopenharmony_ci } 147cabdff1aSopenharmony_ci } 148cabdff1aSopenharmony_ci} 149cabdff1aSopenharmony_ci 150cabdff1aSopenharmony_ci#endif /* HAVE_SSE2_INLINE */ 151cabdff1aSopenharmony_ci 152cabdff1aSopenharmony_ciav_cold void ff_lpc_init_x86(LPCContext *c) 153cabdff1aSopenharmony_ci{ 154cabdff1aSopenharmony_ci#if HAVE_SSE2_INLINE 155cabdff1aSopenharmony_ci int cpu_flags = av_get_cpu_flags(); 156cabdff1aSopenharmony_ci 157cabdff1aSopenharmony_ci if (INLINE_SSE2_SLOW(cpu_flags)) { 158cabdff1aSopenharmony_ci c->lpc_apply_welch_window = lpc_apply_welch_window_sse2; 159cabdff1aSopenharmony_ci c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; 160cabdff1aSopenharmony_ci } 161cabdff1aSopenharmony_ci#endif /* HAVE_SSE2_INLINE */ 162cabdff1aSopenharmony_ci} 163