1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* SIMD optimized AAC encoder DSP functions
3cabdff1aSopenharmony_ci;*
4cabdff1aSopenharmony_ci;* Copyright (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
5cabdff1aSopenharmony_ci;*
6cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
7cabdff1aSopenharmony_ci;*
8cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
9cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
10cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
11cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
12cabdff1aSopenharmony_ci;*
13cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
14cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
17cabdff1aSopenharmony_ci;*
18cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
19cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
20cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21cabdff1aSopenharmony_ci;******************************************************************************
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ciSECTION_RODATA
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_cifloat_abs_mask: times 4 dd 0x7fffffff
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ciSECTION .text
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ci;*******************************************************************
32cabdff1aSopenharmony_ci;void ff_abs_pow34(float *out, const float *in, const int size);
33cabdff1aSopenharmony_ci;*******************************************************************
34cabdff1aSopenharmony_ciINIT_XMM sse
35cabdff1aSopenharmony_cicglobal abs_pow34, 3, 3, 3, out, in, size
36cabdff1aSopenharmony_ci    mova   m2, [float_abs_mask]
37cabdff1aSopenharmony_ci    shl    sizeq, 2
38cabdff1aSopenharmony_ci    add    inq, sizeq
39cabdff1aSopenharmony_ci    add    outq, sizeq
40cabdff1aSopenharmony_ci    neg    sizeq
41cabdff1aSopenharmony_ci.loop:
42cabdff1aSopenharmony_ci    andps  m0, m2, [inq+sizeq]
43cabdff1aSopenharmony_ci    sqrtps m1, m0
44cabdff1aSopenharmony_ci    mulps  m0, m1
45cabdff1aSopenharmony_ci    sqrtps m0, m0
46cabdff1aSopenharmony_ci    mova   [outq+sizeq], m0
47cabdff1aSopenharmony_ci    add    sizeq, mmsize
48cabdff1aSopenharmony_ci    jl    .loop
49cabdff1aSopenharmony_ci    RET
50cabdff1aSopenharmony_ci
51cabdff1aSopenharmony_ci;*******************************************************************
52cabdff1aSopenharmony_ci;void ff_aac_quantize_bands(int *out, const float *in, const float *scaled,
53cabdff1aSopenharmony_ci;                           int size, int is_signed, int maxval, const float Q34,
54cabdff1aSopenharmony_ci;                           const float rounding)
55cabdff1aSopenharmony_ci;*******************************************************************
56cabdff1aSopenharmony_ciINIT_XMM sse2
57cabdff1aSopenharmony_cicglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
58cabdff1aSopenharmony_ci%if UNIX64 == 0
59cabdff1aSopenharmony_ci    movss     m0, Q34m
60cabdff1aSopenharmony_ci    movss     m1, roundingm
61cabdff1aSopenharmony_ci    cvtsi2ss  m3, dword maxvalm
62cabdff1aSopenharmony_ci%else
63cabdff1aSopenharmony_ci    cvtsi2ss  m3, maxvald
64cabdff1aSopenharmony_ci%endif
65cabdff1aSopenharmony_ci    shufps    m0, m0, 0
66cabdff1aSopenharmony_ci    shufps    m1, m1, 0
67cabdff1aSopenharmony_ci    shufps    m3, m3, 0
68cabdff1aSopenharmony_ci    shl       is_signedd, 31
69cabdff1aSopenharmony_ci    movd      m4, is_signedd
70cabdff1aSopenharmony_ci    shufps    m4, m4, 0
71cabdff1aSopenharmony_ci    shl       sized,   2
72cabdff1aSopenharmony_ci    add       inq, sizeq
73cabdff1aSopenharmony_ci    add       outq, sizeq
74cabdff1aSopenharmony_ci    add       scaledq, sizeq
75cabdff1aSopenharmony_ci    neg       sizeq
76cabdff1aSopenharmony_ci.loop:
77cabdff1aSopenharmony_ci    mulps     m2, m0, [scaledq+sizeq]
78cabdff1aSopenharmony_ci    addps     m2, m1
79cabdff1aSopenharmony_ci    minps     m2, m3
80cabdff1aSopenharmony_ci    andps     m5, m4, [inq+sizeq]
81cabdff1aSopenharmony_ci    orps      m2, m5
82cabdff1aSopenharmony_ci    cvttps2dq m2, m2
83cabdff1aSopenharmony_ci    mova      [outq+sizeq], m2
84cabdff1aSopenharmony_ci    add       sizeq, mmsize
85cabdff1aSopenharmony_ci    jl       .loop
86cabdff1aSopenharmony_ci    RET
87