1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/aarch64/asm.S"
22cabdff1aSopenharmony_ci#include "asm-offsets.h"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci.macro resample_one     fmt, es=2
25cabdff1aSopenharmony_ci.ifnc \fmt, dbl
26cabdff1aSopenharmony_ci    .macro  M_MUL2      x:vararg
27cabdff1aSopenharmony_ci    .endm
28cabdff1aSopenharmony_ci    .macro  M_MLA2      x:vararg
29cabdff1aSopenharmony_ci    .endm
30cabdff1aSopenharmony_ci.endif
31cabdff1aSopenharmony_cifunction ff_resample_one_\fmt\()_neon, export=1
32cabdff1aSopenharmony_ci        sxtw            x2,  w2
33cabdff1aSopenharmony_ci        ldr             x9,  [x0, #FILTER_BANK]
34cabdff1aSopenharmony_ci        ldr             w6,  [x0, #FILTER_LENGTH]
35cabdff1aSopenharmony_ci        ldp             w7,  w8,  [x0, #PHASE_SHIFT]    // and phase_mask
36cabdff1aSopenharmony_ci        lsr             x10, x4,  x7                    // sample_index
37cabdff1aSopenharmony_ci        and             x4,  x4,  x8
38cabdff1aSopenharmony_ci        lsl             x11, x6,  #\es          // filter_length * elem_size
39cabdff1aSopenharmony_ci        add             x3,  x3,  x10, lsl #\es // src[sample_index]
40cabdff1aSopenharmony_ci        madd            x9,  x11, x4,  x9       // filter
41cabdff1aSopenharmony_ci        cmp             w6,  #16
42cabdff1aSopenharmony_ci        b.lt            5f
43cabdff1aSopenharmony_ci8:      // remaining filter_length at least 16
44cabdff1aSopenharmony_ci        subs            w6,  w6,  #16
45cabdff1aSopenharmony_ci        LOAD8           v4,  v5,  v6,  v7,  x3
46cabdff1aSopenharmony_ci        LOAD8           v16, v17, v18, v19, x9
47cabdff1aSopenharmony_ci        M_MUL           v0,  v4,  v16, v1
48cabdff1aSopenharmony_ci        M_MUL2          v1,  v6,  v18
49cabdff1aSopenharmony_ci7:
50cabdff1aSopenharmony_ci        LOAD8           v20, v21, v22, v23, x3
51cabdff1aSopenharmony_ci        M_MLA           v0,  v5,  v17, v1
52cabdff1aSopenharmony_ci        M_MLA2          v1,  v7,  v19
53cabdff1aSopenharmony_ci        LOAD8           v24, v25, v26, v27, x9
54cabdff1aSopenharmony_ci        M_MLA           v0,  v20, v24, v1
55cabdff1aSopenharmony_ci        M_MLA2          v1,  v22, v26
56cabdff1aSopenharmony_ci        b.eq            6f
57cabdff1aSopenharmony_ci        cmp             w6,  #16
58cabdff1aSopenharmony_ci        M_MLA           v0,  v21, v25, v1
59cabdff1aSopenharmony_ci        M_MLA2          v1,  v23, v27
60cabdff1aSopenharmony_ci        b.lt            4f
61cabdff1aSopenharmony_ci        subs            w6,  w6,  #16
62cabdff1aSopenharmony_ci        LOAD8           v4,  v5,  v6,  v7,  x3
63cabdff1aSopenharmony_ci        LOAD8           v16, v17, v18, v19, x9
64cabdff1aSopenharmony_ci        M_MLA           v0,  v4,  v16, v1
65cabdff1aSopenharmony_ci        M_MLA2          v1,  v6,  v18
66cabdff1aSopenharmony_ci        b               7b
67cabdff1aSopenharmony_ci6:
68cabdff1aSopenharmony_ci        M_MLA           v0,  v21, v25,  v1
69cabdff1aSopenharmony_ci        M_MLA2          v1,  v23, v27
70cabdff1aSopenharmony_ci        STORE_ONE       0,   x1,  x2,   v1
71cabdff1aSopenharmony_ci        ret
72cabdff1aSopenharmony_ci5:
73cabdff1aSopenharmony_ci        movi            v0.16b, #0
74cabdff1aSopenharmony_ci        movi            v1.16b, #0
75cabdff1aSopenharmony_ci4:      // remaining filter_length 1-15
76cabdff1aSopenharmony_ci        cmp             w6,  #4
77cabdff1aSopenharmony_ci        b.lt            2f
78cabdff1aSopenharmony_ci        subs            w6,  w6,  #4
79cabdff1aSopenharmony_ci        LOAD4           v4,  v5,  x3
80cabdff1aSopenharmony_ci        LOAD4           v6,  v7,  x9
81cabdff1aSopenharmony_ci        M_MLA           v0,  v4,  v6,  v1
82cabdff1aSopenharmony_ci        M_MLA2          v1,  v5,  v7
83cabdff1aSopenharmony_ci        b.eq            0f
84cabdff1aSopenharmony_ci        b               4b
85cabdff1aSopenharmony_ci2:      // remaining filter_length 1-3
86cabdff1aSopenharmony_ci        cmp             w6,  #2
87cabdff1aSopenharmony_ci        b.lt            1f
88cabdff1aSopenharmony_ci        LOAD2           2,   x3
89cabdff1aSopenharmony_ci        LOAD2           3,   x9
90cabdff1aSopenharmony_ci        subs            w6,  w6,  #2
91cabdff1aSopenharmony_ci        M_MLA           v0,  v2,  v3
92cabdff1aSopenharmony_ci        b.eq            0f
93cabdff1aSopenharmony_ci1:      // remaining filter_length 1
94cabdff1aSopenharmony_ci        LOAD1           6,   x3
95cabdff1aSopenharmony_ci        LOAD1           7,   x9
96cabdff1aSopenharmony_ci        M_MLA           v0,  v6,  v7
97cabdff1aSopenharmony_ci0:
98cabdff1aSopenharmony_ci        STORE_ONE       0,   x1,  x2,  v1
99cabdff1aSopenharmony_ci        ret
100cabdff1aSopenharmony_ciendfunc
101cabdff1aSopenharmony_ci
102cabdff1aSopenharmony_ci.purgem LOAD1
103cabdff1aSopenharmony_ci.purgem LOAD2
104cabdff1aSopenharmony_ci.purgem LOAD4
105cabdff1aSopenharmony_ci.purgem LOAD8
106cabdff1aSopenharmony_ci.purgem M_MLA
107cabdff1aSopenharmony_ci.purgem M_MLA2
108cabdff1aSopenharmony_ci.purgem M_MUL
109cabdff1aSopenharmony_ci.purgem M_MUL2
110cabdff1aSopenharmony_ci.purgem STORE_ONE
111cabdff1aSopenharmony_ci.endm
112cabdff1aSopenharmony_ci
113cabdff1aSopenharmony_ci
114cabdff1aSopenharmony_ci.macro  LOAD1           d1, addr
115cabdff1aSopenharmony_ci        ldr             d\d1, [\addr], #8
116cabdff1aSopenharmony_ci.endm
117cabdff1aSopenharmony_ci.macro  LOAD2           d1, addr
118cabdff1aSopenharmony_ci        ld1             {v\d1\().2d}, [\addr], #16
119cabdff1aSopenharmony_ci.endm
120cabdff1aSopenharmony_ci.macro  LOAD4           d1, d2, addr
121cabdff1aSopenharmony_ci        ld1             {\d1\().2d,\d2\().2d}, [\addr], #32
122cabdff1aSopenharmony_ci.endm
123cabdff1aSopenharmony_ci.macro  LOAD8           d1, d2, d3, d4, addr
124cabdff1aSopenharmony_ci        ld1             {\d1\().2d,\d2\().2d,\d3\().2d,\d4\().2d}, [\addr], #64
125cabdff1aSopenharmony_ci.endm
126cabdff1aSopenharmony_ci.macro  M_MLA           d, r0, r1, d2:vararg
127cabdff1aSopenharmony_ci        fmla            \d\().2d, \r0\().2d, \r1\().2d
128cabdff1aSopenharmony_ci.endm
129cabdff1aSopenharmony_ci.macro  M_MLA2          second:vararg
130cabdff1aSopenharmony_ci        M_MLA           \second
131cabdff1aSopenharmony_ci.endm
132cabdff1aSopenharmony_ci.macro  M_MUL           d, r0, r1, d2:vararg
133cabdff1aSopenharmony_ci        fmul            \d\().2d, \r0\().2d, \r1\().2d
134cabdff1aSopenharmony_ci.endm
135cabdff1aSopenharmony_ci.macro  M_MUL2          second:vararg
136cabdff1aSopenharmony_ci        M_MUL           \second
137cabdff1aSopenharmony_ci.endm
138cabdff1aSopenharmony_ci.macro  STORE_ONE       rn, addr, idx, d2
139cabdff1aSopenharmony_ci        fadd            v\rn\().2d,  v\rn\().2d,  \d2\().2d
140cabdff1aSopenharmony_ci        faddp           d\rn\(),  v\rn\().2d
141cabdff1aSopenharmony_ci        str             d\rn\(),  [\addr, \idx, lsl #3]
142cabdff1aSopenharmony_ci.endm
143cabdff1aSopenharmony_ci
144cabdff1aSopenharmony_ciresample_one dbl, 3
145cabdff1aSopenharmony_ci
146cabdff1aSopenharmony_ci
147cabdff1aSopenharmony_ci.macro  LOAD1           d1, addr
148cabdff1aSopenharmony_ci        ldr             s\d1, [\addr], #4
149cabdff1aSopenharmony_ci.endm
150cabdff1aSopenharmony_ci.macro  LOAD2           d1, addr
151cabdff1aSopenharmony_ci        ld1             {v\d1\().2s}, [\addr], #8
152cabdff1aSopenharmony_ci.endm
153cabdff1aSopenharmony_ci.macro  LOAD4           d1, d2, addr
154cabdff1aSopenharmony_ci        ld1             {\d1\().4s}, [\addr], #16
155cabdff1aSopenharmony_ci.endm
156cabdff1aSopenharmony_ci.macro  LOAD8           d1, d2, d3, d4, addr
157cabdff1aSopenharmony_ci        ld1             {\d1\().4s,\d2\().4s}, [\addr], #32
158cabdff1aSopenharmony_ci.endm
159cabdff1aSopenharmony_ci.macro  M_MLA           d, r0, r1, d2:vararg
160cabdff1aSopenharmony_ci        fmla            \d\().4s, \r0\().4s, \r1\().4s
161cabdff1aSopenharmony_ci.endm
162cabdff1aSopenharmony_ci.macro  M_MUL           d, r0, r1, d2:vararg
163cabdff1aSopenharmony_ci        fmul            \d\().4s, \r0\().4s, \r1\().4s
164cabdff1aSopenharmony_ci.endm
165cabdff1aSopenharmony_ci.macro  STORE_ONE       rn, addr, idx, d2
166cabdff1aSopenharmony_ci        faddp           v\rn\().4s,  v\rn\().4s,  v\rn\().4s
167cabdff1aSopenharmony_ci        faddp           s\rn\(),  v\rn\().2s
168cabdff1aSopenharmony_ci        str             s\rn\(),  [\addr, \idx, lsl #2]
169cabdff1aSopenharmony_ci.endm
170cabdff1aSopenharmony_ci
171cabdff1aSopenharmony_ciresample_one flt
172cabdff1aSopenharmony_ci
173cabdff1aSopenharmony_ci
174cabdff1aSopenharmony_ci.macro  LOAD1           d1, addr
175cabdff1aSopenharmony_ci        ldr             h\d1, [\addr], #2
176cabdff1aSopenharmony_ci.endm
177cabdff1aSopenharmony_ci.macro  LOAD2           d1, addr
178cabdff1aSopenharmony_ci        ldr             s\d1, [\addr], #4
179cabdff1aSopenharmony_ci.endm
180cabdff1aSopenharmony_ci.macro  LOAD4           d1, d2, addr
181cabdff1aSopenharmony_ci        ld1             {\d1\().4h}, [\addr], #8
182cabdff1aSopenharmony_ci.endm
183cabdff1aSopenharmony_ci.macro  LOAD8           d1, d2, d3, d4, addr
184cabdff1aSopenharmony_ci        ld1             {\d1\().4h,\d2\().4h}, [\addr], #16
185cabdff1aSopenharmony_ci.endm
186cabdff1aSopenharmony_ci.macro  M_MLA           d, r0, r1, d2:vararg
187cabdff1aSopenharmony_ci        smlal           \d\().4s, \r0\().4h, \r1\().4h
188cabdff1aSopenharmony_ci.endm
189cabdff1aSopenharmony_ci.macro  M_MUL           d, r0, r1, d2:vararg
190cabdff1aSopenharmony_ci        smull           \d\().4s, \r0\().4h, \r1\().4h
191cabdff1aSopenharmony_ci.endm
192cabdff1aSopenharmony_ci.macro  STORE_ONE       rn, addr, idx, d2
193cabdff1aSopenharmony_ci        addp            v\rn\().4s,  v\rn\().4s,  v\rn\().4s
194cabdff1aSopenharmony_ci        addp            v\rn\().4s,  v\rn\().4s,  v\rn\().4s
195cabdff1aSopenharmony_ci        sqrshrn         v\rn\().4h,  v\rn\().4s,  #15
196cabdff1aSopenharmony_ci        str             h\rn\(),  [\addr, \idx, lsl #1]
197cabdff1aSopenharmony_ci.endm
198cabdff1aSopenharmony_ci
199cabdff1aSopenharmony_ciresample_one s16, 1
200cabdff1aSopenharmony_ci
201cabdff1aSopenharmony_ci
202cabdff1aSopenharmony_ci.macro  LOAD1           d1, addr
203cabdff1aSopenharmony_ci        ldr             s\d1, [\addr], #4
204cabdff1aSopenharmony_ci.endm
205cabdff1aSopenharmony_ci.macro  LOAD2           d1, addr
206cabdff1aSopenharmony_ci        ld1             {v\d1\().2s}, [\addr], #8
207cabdff1aSopenharmony_ci.endm
208cabdff1aSopenharmony_ci.macro  LOAD4           d1, d2, addr
209cabdff1aSopenharmony_ci        ld1             {\d1\().4s}, [\addr], #16
210cabdff1aSopenharmony_ci.endm
211cabdff1aSopenharmony_ci.macro  LOAD8           d1, d2, d3, d4, addr
212cabdff1aSopenharmony_ci        ld1             {\d1\().4s,\d2\().4s}, [\addr], #32
213cabdff1aSopenharmony_ci.endm
214cabdff1aSopenharmony_ci.macro  M_MLA           d1, r0, r1, d2:vararg
215cabdff1aSopenharmony_ci        smlal           \d1\().2d, \r0\().2s, \r1\().2s
216cabdff1aSopenharmony_ci.ifnb \d2
217cabdff1aSopenharmony_ci        smlal2          \d2\().2d, \r0\().4s, \r1\().4s
218cabdff1aSopenharmony_ci.endif
219cabdff1aSopenharmony_ci.endm
220cabdff1aSopenharmony_ci.macro  M_MUL           d1, r0, r1, d2:vararg
221cabdff1aSopenharmony_ci        smull           \d1\().2d, \r0\().2s, \r1\().2s
222cabdff1aSopenharmony_ci.ifnb \d2
223cabdff1aSopenharmony_ci        smull2          \d2\().2d, \r0\().4s, \r1\().4s
224cabdff1aSopenharmony_ci.endif
225cabdff1aSopenharmony_ci.endm
226cabdff1aSopenharmony_ci.macro  STORE_ONE       rn, addr, idx, d2
227cabdff1aSopenharmony_ci        add             v\rn\().2d,  v\rn\().2d,  \d2\().2d
228cabdff1aSopenharmony_ci        addp            d\rn\(),     v\rn\().2d
229cabdff1aSopenharmony_ci        sqrshrn         v\rn\().2s,  v\rn\().2d,  #30
230cabdff1aSopenharmony_ci        str             s\rn\(),  [\addr, \idx, lsl #2]
231cabdff1aSopenharmony_ci.endm
232cabdff1aSopenharmony_ci
233cabdff1aSopenharmony_ciresample_one s32
234