1cabdff1aSopenharmony_ci;****************************************************************************** 2cabdff1aSopenharmony_ci;* SIMD optimized AAC encoder DSP functions 3cabdff1aSopenharmony_ci;* 4cabdff1aSopenharmony_ci;* Copyright (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com> 5cabdff1aSopenharmony_ci;* 6cabdff1aSopenharmony_ci;* This file is part of FFmpeg. 7cabdff1aSopenharmony_ci;* 8cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or 9cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public 10cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either 11cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version. 12cabdff1aSopenharmony_ci;* 13cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful, 14cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of 15cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16cabdff1aSopenharmony_ci;* Lesser General Public License for more details. 17cabdff1aSopenharmony_ci;* 18cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public 19cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software 20cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21cabdff1aSopenharmony_ci;****************************************************************************** 22cabdff1aSopenharmony_ci 23cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm" 24cabdff1aSopenharmony_ci 25cabdff1aSopenharmony_ciSECTION_RODATA 26cabdff1aSopenharmony_ci 27cabdff1aSopenharmony_cifloat_abs_mask: times 4 dd 0x7fffffff 28cabdff1aSopenharmony_ci 29cabdff1aSopenharmony_ciSECTION .text 30cabdff1aSopenharmony_ci 31cabdff1aSopenharmony_ci;******************************************************************* 32cabdff1aSopenharmony_ci;void ff_abs_pow34(float *out, const float *in, const int size); 33cabdff1aSopenharmony_ci;******************************************************************* 34cabdff1aSopenharmony_ciINIT_XMM sse 35cabdff1aSopenharmony_cicglobal abs_pow34, 3, 3, 3, out, in, size 36cabdff1aSopenharmony_ci mova m2, [float_abs_mask] 37cabdff1aSopenharmony_ci shl sizeq, 2 38cabdff1aSopenharmony_ci add inq, sizeq 39cabdff1aSopenharmony_ci add outq, sizeq 40cabdff1aSopenharmony_ci neg sizeq 41cabdff1aSopenharmony_ci.loop: 42cabdff1aSopenharmony_ci andps m0, m2, [inq+sizeq] 43cabdff1aSopenharmony_ci sqrtps m1, m0 44cabdff1aSopenharmony_ci mulps m0, m1 45cabdff1aSopenharmony_ci sqrtps m0, m0 46cabdff1aSopenharmony_ci mova [outq+sizeq], m0 47cabdff1aSopenharmony_ci add sizeq, mmsize 48cabdff1aSopenharmony_ci jl .loop 49cabdff1aSopenharmony_ci RET 50cabdff1aSopenharmony_ci 51cabdff1aSopenharmony_ci;******************************************************************* 52cabdff1aSopenharmony_ci;void ff_aac_quantize_bands(int *out, const float *in, const float *scaled, 53cabdff1aSopenharmony_ci; int size, int is_signed, int maxval, const float Q34, 54cabdff1aSopenharmony_ci; const float rounding) 55cabdff1aSopenharmony_ci;******************************************************************* 56cabdff1aSopenharmony_ciINIT_XMM sse2 57cabdff1aSopenharmony_cicglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding 58cabdff1aSopenharmony_ci%if UNIX64 == 0 59cabdff1aSopenharmony_ci movss m0, Q34m 60cabdff1aSopenharmony_ci movss m1, roundingm 61cabdff1aSopenharmony_ci cvtsi2ss m3, dword maxvalm 62cabdff1aSopenharmony_ci%else 63cabdff1aSopenharmony_ci cvtsi2ss m3, maxvald 64cabdff1aSopenharmony_ci%endif 65cabdff1aSopenharmony_ci shufps m0, m0, 0 66cabdff1aSopenharmony_ci shufps m1, m1, 0 67cabdff1aSopenharmony_ci shufps m3, m3, 0 68cabdff1aSopenharmony_ci shl is_signedd, 31 69cabdff1aSopenharmony_ci movd m4, is_signedd 70cabdff1aSopenharmony_ci shufps m4, m4, 0 71cabdff1aSopenharmony_ci shl sized, 2 72cabdff1aSopenharmony_ci add inq, sizeq 73cabdff1aSopenharmony_ci add outq, sizeq 74cabdff1aSopenharmony_ci add scaledq, sizeq 75cabdff1aSopenharmony_ci neg sizeq 76cabdff1aSopenharmony_ci.loop: 77cabdff1aSopenharmony_ci mulps m2, m0, [scaledq+sizeq] 78cabdff1aSopenharmony_ci addps m2, m1 79cabdff1aSopenharmony_ci minps m2, m3 80cabdff1aSopenharmony_ci andps m5, m4, [inq+sizeq] 81cabdff1aSopenharmony_ci orps m2, m5 82cabdff1aSopenharmony_ci cvttps2dq m2, m2 83cabdff1aSopenharmony_ci mova [outq+sizeq], m2 84cabdff1aSopenharmony_ci add sizeq, mmsize 85cabdff1aSopenharmony_ci jl .loop 86cabdff1aSopenharmony_ci RET 87