1cabdff1aSopenharmony_ci;******************************************************************************
2cabdff1aSopenharmony_ci;* SIMD optimized SBC encoder DSP functions
3cabdff1aSopenharmony_ci;*
4cabdff1aSopenharmony_ci;* Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
5cabdff1aSopenharmony_ci;* Copyright (C) 2008-2010  Nokia Corporation
6cabdff1aSopenharmony_ci;* Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
7cabdff1aSopenharmony_ci;* Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
8cabdff1aSopenharmony_ci;* Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
9cabdff1aSopenharmony_ci;*
10cabdff1aSopenharmony_ci;* This file is part of FFmpeg.
11cabdff1aSopenharmony_ci;*
12cabdff1aSopenharmony_ci;* FFmpeg is free software; you can redistribute it and/or
13cabdff1aSopenharmony_ci;* modify it under the terms of the GNU Lesser General Public
14cabdff1aSopenharmony_ci;* License as published by the Free Software Foundation; either
15cabdff1aSopenharmony_ci;* version 2.1 of the License, or (at your option) any later version.
16cabdff1aSopenharmony_ci;*
17cabdff1aSopenharmony_ci;* FFmpeg is distributed in the hope that it will be useful,
18cabdff1aSopenharmony_ci;* but WITHOUT ANY WARRANTY; without even the implied warranty of
19cabdff1aSopenharmony_ci;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20cabdff1aSopenharmony_ci;* Lesser General Public License for more details.
21cabdff1aSopenharmony_ci;*
22cabdff1aSopenharmony_ci;* You should have received a copy of the GNU Lesser General Public
23cabdff1aSopenharmony_ci;* License along with FFmpeg; if not, write to the Free Software
24cabdff1aSopenharmony_ci;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25cabdff1aSopenharmony_ci;******************************************************************************
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_ci%include "libavutil/x86/x86util.asm"
28cabdff1aSopenharmony_ci
29cabdff1aSopenharmony_ciSECTION_RODATA
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ciscale_mask: times 2 dd 0x8000    ; 1 << (SBC_PROTO_FIXED_SCALE - 1)
32cabdff1aSopenharmony_ci
33cabdff1aSopenharmony_ciSECTION .text
34cabdff1aSopenharmony_ci
35cabdff1aSopenharmony_ci%macro NIDN 3
36cabdff1aSopenharmony_ci%ifnidn %2, %3
37cabdff1aSopenharmony_ci    %1            %2, %3
38cabdff1aSopenharmony_ci%endif
39cabdff1aSopenharmony_ci%endmacro
40cabdff1aSopenharmony_ci
41cabdff1aSopenharmony_ci%macro ANALYZE_MAC 9 ; out1, out2, in1, in2, tmp1, tmp2, add1, add2, offset
42cabdff1aSopenharmony_ci    NIDN movq,    %5, %3
43cabdff1aSopenharmony_ci    NIDN movq,    %6, %4
44cabdff1aSopenharmony_ci    pmaddwd       %5, [constsq+%9]
45cabdff1aSopenharmony_ci    pmaddwd       %6, [constsq+%9+8]
46cabdff1aSopenharmony_ci    NIDN paddd,   %1, %7
47cabdff1aSopenharmony_ci    NIDN paddd,   %2, %8
48cabdff1aSopenharmony_ci%endmacro
49cabdff1aSopenharmony_ci
50cabdff1aSopenharmony_ci%macro ANALYZE_MAC_IN 7 ; out1, out2, tmp1, tmp2, add1, add2, offset
51cabdff1aSopenharmony_ci    ANALYZE_MAC   %1, %2, [inq+%7], [inq+%7+8], %3, %4, %5, %6, %7
52cabdff1aSopenharmony_ci%endmacro
53cabdff1aSopenharmony_ci
54cabdff1aSopenharmony_ci%macro ANALYZE_MAC_REG 7 ; out1, out2, in, tmp1, tmp2, offset, pack
55cabdff1aSopenharmony_ci%ifidn %7, pack
56cabdff1aSopenharmony_ci    psrad         %3, 16    ; SBC_PROTO_FIXED_SCALE
57cabdff1aSopenharmony_ci    packssdw      %3, %3
58cabdff1aSopenharmony_ci%endif
59cabdff1aSopenharmony_ci    ANALYZE_MAC   %1, %2, %3, %3, %4, %5, %4, %5, %6
60cabdff1aSopenharmony_ci%endmacro
61cabdff1aSopenharmony_ci
62cabdff1aSopenharmony_ci;*******************************************************************
63cabdff1aSopenharmony_ci;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts);
64cabdff1aSopenharmony_ci;*******************************************************************
65cabdff1aSopenharmony_ciINIT_MMX mmx
66cabdff1aSopenharmony_cicglobal sbc_analyze_4, 3, 3, 4, in, out, consts
67cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m0, m1, [scale_mask], [scale_mask], 0
68cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m2, m3, m2, m3, 16
69cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m2, m3, m2, m3, 32
70cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m2, m3, m2, m3, 48
71cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m2, m3, m2, m3, 64
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m0, m2, m0, m0, m2, 80, pack
74cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m0, m2, m1, m1, m3, 96, pack
75cabdff1aSopenharmony_ci
76cabdff1aSopenharmony_ci    movq          [outq  ], m0
77cabdff1aSopenharmony_ci    movq          [outq+8], m2
78cabdff1aSopenharmony_ci
79cabdff1aSopenharmony_ci    RET
80cabdff1aSopenharmony_ci
81cabdff1aSopenharmony_ci
82cabdff1aSopenharmony_ci;*******************************************************************
83cabdff1aSopenharmony_ci;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts);
84cabdff1aSopenharmony_ci;*******************************************************************
85cabdff1aSopenharmony_ciINIT_MMX mmx
86cabdff1aSopenharmony_cicglobal sbc_analyze_8, 3, 3, 4, in, out, consts
87cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m0, m1, [scale_mask], [scale_mask],  0
88cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m2, m3, m2, m3, [scale_mask], [scale_mask], 16
89cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m4, m5, m4, m5,  32
90cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m2, m3, m6, m7, m6, m7,  48
91cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m4, m5, m4, m5,  64
92cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m2, m3, m6, m7, m6, m7,  80
93cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m4, m5, m4, m5,  96
94cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m2, m3, m6, m7, m6, m7, 112
95cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m0, m1, m4, m5, m4, m5, 128
96cabdff1aSopenharmony_ci    ANALYZE_MAC_IN   m2, m3, m6, m7, m6, m7, 144
97cabdff1aSopenharmony_ci
98cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m4, m5, m0, m4, m5, 160, pack
99cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m4, m5, m1, m6, m7, 192, pack
100cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m4, m5, m2, m6, m7, 224, pack
101cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m4, m5, m3, m6, m7, 256, pack
102cabdff1aSopenharmony_ci
103cabdff1aSopenharmony_ci    movq          [outq  ], m4
104cabdff1aSopenharmony_ci    movq          [outq+8], m5
105cabdff1aSopenharmony_ci
106cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m0, m5, m0, m0, m5, 176, no
107cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m0, m5, m1, m1, m7, 208, no
108cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m0, m5, m2, m2, m7, 240, no
109cabdff1aSopenharmony_ci    ANALYZE_MAC_REG  m0, m5, m3, m3, m7, 272, no
110cabdff1aSopenharmony_ci
111cabdff1aSopenharmony_ci    movq          [outq+16], m0
112cabdff1aSopenharmony_ci    movq          [outq+24], m5
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_ci    RET
115cabdff1aSopenharmony_ci
116cabdff1aSopenharmony_ci
117cabdff1aSopenharmony_ci;*******************************************************************
118cabdff1aSopenharmony_ci;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
119cabdff1aSopenharmony_ci;                              uint32_t scale_factor[2][8],
120cabdff1aSopenharmony_ci;                              int blocks, int channels, int subbands)
121cabdff1aSopenharmony_ci;*******************************************************************
122cabdff1aSopenharmony_ciINIT_MMX mmx
123cabdff1aSopenharmony_cicglobal sbc_calc_scalefactors, 5, 7, 4, sb_sample_f, scale_factor, blocks, channels, subbands, ptr, blk
124cabdff1aSopenharmony_ci    ; subbands = 4 * subbands * channels
125cabdff1aSopenharmony_ci    movq          m3, [scale_mask]
126cabdff1aSopenharmony_ci    shl           subbandsd, 2
127cabdff1aSopenharmony_ci    cmp           channelsd, 2
128cabdff1aSopenharmony_ci    jl            .loop_1
129cabdff1aSopenharmony_ci    shl           subbandsd, 1
130cabdff1aSopenharmony_ci
131cabdff1aSopenharmony_ci.loop_1:
132cabdff1aSopenharmony_ci    sub           subbandsq, 8
133cabdff1aSopenharmony_ci    lea           ptrq, [sb_sample_fq + subbandsq]
134cabdff1aSopenharmony_ci
135cabdff1aSopenharmony_ci    ; blk = (blocks - 1) * 64;
136cabdff1aSopenharmony_ci    lea           blkq, [blocksq - 1]
137cabdff1aSopenharmony_ci    shl           blkd, 6
138cabdff1aSopenharmony_ci
139cabdff1aSopenharmony_ci    movq          m0, m3
140cabdff1aSopenharmony_ci.loop_2:
141cabdff1aSopenharmony_ci    movq          m1, [ptrq+blkq]
142cabdff1aSopenharmony_ci    pxor          m2, m2
143cabdff1aSopenharmony_ci    pcmpgtd       m1, m2
144cabdff1aSopenharmony_ci    paddd         m1, [ptrq+blkq]
145cabdff1aSopenharmony_ci    pcmpgtd       m2, m1
146cabdff1aSopenharmony_ci    pxor          m1, m2
147cabdff1aSopenharmony_ci
148cabdff1aSopenharmony_ci    por           m0, m1
149cabdff1aSopenharmony_ci
150cabdff1aSopenharmony_ci    sub           blkq, 64
151cabdff1aSopenharmony_ci    jns           .loop_2
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_ci    movd          blkd, m0
154cabdff1aSopenharmony_ci    psrlq         m0,   32
155cabdff1aSopenharmony_ci    bsr           blkd, blkd
156cabdff1aSopenharmony_ci    sub           blkd, 15    ; SCALE_OUT_BITS
157cabdff1aSopenharmony_ci    mov           [scale_factorq + subbandsq], blkd
158cabdff1aSopenharmony_ci
159cabdff1aSopenharmony_ci    movd          blkd, m0
160cabdff1aSopenharmony_ci    bsr           blkd, blkd
161cabdff1aSopenharmony_ci    sub           blkd, 15    ; SCALE_OUT_BITS
162cabdff1aSopenharmony_ci    mov           [scale_factorq + subbandsq + 4], blkd
163cabdff1aSopenharmony_ci
164cabdff1aSopenharmony_ci    cmp           subbandsq, 0
165cabdff1aSopenharmony_ci    jg            .loop_1
166cabdff1aSopenharmony_ci
167cabdff1aSopenharmony_ci    emms
168cabdff1aSopenharmony_ci    RET
169