1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2014 Peter Meerwald <pmeerw@pmeerw.net>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
22cabdff1aSopenharmony_ci
23cabdff1aSopenharmony_ci#include "asm-offsets.h"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci.macro resample_one     fmt, es=2
26cabdff1aSopenharmony_cifunction ff_resample_one_\fmt\()_neon, export=1
27cabdff1aSopenharmony_ci        push            {r4, r5}
28cabdff1aSopenharmony_ci        add             r1, r1, r2, lsl #\es
29cabdff1aSopenharmony_ci
30cabdff1aSopenharmony_ci        ldr             r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
31cabdff1aSopenharmony_ci        ldr             ip, [sp, #8] /* index */
32cabdff1aSopenharmony_ci        ldr             r5, [r0, #FILTER_LENGTH]
33cabdff1aSopenharmony_ci        and             r2, ip, r2 /* (index & phase_mask) */
34cabdff1aSopenharmony_ci        ldr             r4, [r0, #PHASE_SHIFT]
35cabdff1aSopenharmony_ci        lsr             r4, ip, r4 /* compute sample_index */
36cabdff1aSopenharmony_ci        mul             r2, r2, r5
37cabdff1aSopenharmony_ci
38cabdff1aSopenharmony_ci        ldr             ip, [r0, #FILTER_BANK]
39cabdff1aSopenharmony_ci        add             r3, r3, r4, lsl #\es /* &src[sample_index] */
40cabdff1aSopenharmony_ci
41cabdff1aSopenharmony_ci        cmp             r5, #8
42cabdff1aSopenharmony_ci        add             r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
43cabdff1aSopenharmony_ci
44cabdff1aSopenharmony_ci        blt             5f
45cabdff1aSopenharmony_ci8:
46cabdff1aSopenharmony_ci        subs            r5, r5, #8
47cabdff1aSopenharmony_ci        LOAD4
48cabdff1aSopenharmony_ci        MUL4
49cabdff1aSopenharmony_ci7:
50cabdff1aSopenharmony_ci        LOAD4
51cabdff1aSopenharmony_ci        beq             6f
52cabdff1aSopenharmony_ci        cmp             r5, #8
53cabdff1aSopenharmony_ci        MLA4
54cabdff1aSopenharmony_ci        blt             4f
55cabdff1aSopenharmony_ci        subs            r5, r5, #8
56cabdff1aSopenharmony_ci        LOAD4
57cabdff1aSopenharmony_ci        MLA4
58cabdff1aSopenharmony_ci        b               7b
59cabdff1aSopenharmony_ci6:
60cabdff1aSopenharmony_ci        MLA4
61cabdff1aSopenharmony_ci        STORE
62cabdff1aSopenharmony_ci        pop             {r4, r5}
63cabdff1aSopenharmony_ci        bx              lr
64cabdff1aSopenharmony_ci5:
65cabdff1aSopenharmony_ci        INIT4
66cabdff1aSopenharmony_ci4:      /* remaining filter_length 1 to 7 */
67cabdff1aSopenharmony_ci        cmp             r5, #4
68cabdff1aSopenharmony_ci        blt             2f
69cabdff1aSopenharmony_ci        subs            r5, r5, #4
70cabdff1aSopenharmony_ci        LOAD4
71cabdff1aSopenharmony_ci        MLA4
72cabdff1aSopenharmony_ci        beq             0f
73cabdff1aSopenharmony_ci2:      /* remaining filter_length 1 to 3 */
74cabdff1aSopenharmony_ci        cmp             r5, #2
75cabdff1aSopenharmony_ci        blt             1f
76cabdff1aSopenharmony_ci        subs            r5, r5, #2
77cabdff1aSopenharmony_ci        LOAD2
78cabdff1aSopenharmony_ci        MLA2
79cabdff1aSopenharmony_ci        beq             0f
80cabdff1aSopenharmony_ci1:      /* remaining filter_length 1 */
81cabdff1aSopenharmony_ci        LOAD1
82cabdff1aSopenharmony_ci        MLA1
83cabdff1aSopenharmony_ci0:
84cabdff1aSopenharmony_ci        STORE
85cabdff1aSopenharmony_ci        pop             {r4, r5}
86cabdff1aSopenharmony_ci        bx              lr
87cabdff1aSopenharmony_ciendfunc
88cabdff1aSopenharmony_ci
89cabdff1aSopenharmony_ci.purgem LOAD1
90cabdff1aSopenharmony_ci.purgem LOAD2
91cabdff1aSopenharmony_ci.purgem LOAD4
92cabdff1aSopenharmony_ci.purgem MLA1
93cabdff1aSopenharmony_ci.purgem MLA2
94cabdff1aSopenharmony_ci.purgem MLA4
95cabdff1aSopenharmony_ci.purgem MUL4
96cabdff1aSopenharmony_ci.purgem INIT4
97cabdff1aSopenharmony_ci.purgem STORE
98cabdff1aSopenharmony_ci.endm
99cabdff1aSopenharmony_ci
100cabdff1aSopenharmony_ci
101cabdff1aSopenharmony_ci/* float32 */
102cabdff1aSopenharmony_ci.macro  LOAD1
103cabdff1aSopenharmony_ci        veor.32         d0, d0
104cabdff1aSopenharmony_ci        vld1.32         {d0[0]}, [r0]! /* load filter */
105cabdff1aSopenharmony_ci        vld1.32         {d4[0]}, [r3]! /* load src */
106cabdff1aSopenharmony_ci.endm
107cabdff1aSopenharmony_ci.macro  LOAD2
108cabdff1aSopenharmony_ci        vld1.32         {d0}, [r0]! /* load filter */
109cabdff1aSopenharmony_ci        vld1.32         {d4}, [r3]! /* load src */
110cabdff1aSopenharmony_ci.endm
111cabdff1aSopenharmony_ci.macro  LOAD4
112cabdff1aSopenharmony_ci        vld1.32         {d0,d1}, [r0]! /* load filter */
113cabdff1aSopenharmony_ci        vld1.32         {d4,d5}, [r3]! /* load src */
114cabdff1aSopenharmony_ci.endm
115cabdff1aSopenharmony_ci.macro  MLA1
116cabdff1aSopenharmony_ci        vmla.f32        d16, d0, d4[0]
117cabdff1aSopenharmony_ci.endm
118cabdff1aSopenharmony_ci.macro  MLA2
119cabdff1aSopenharmony_ci        vmla.f32        d16, d0, d4
120cabdff1aSopenharmony_ci.endm
121cabdff1aSopenharmony_ci.macro  MLA4
122cabdff1aSopenharmony_ci        vmla.f32        d16, d0, d4
123cabdff1aSopenharmony_ci        vmla.f32        d17, d1, d5
124cabdff1aSopenharmony_ci.endm
125cabdff1aSopenharmony_ci.macro  MUL4
126cabdff1aSopenharmony_ci        vmul.f32        d16, d0, d4
127cabdff1aSopenharmony_ci        vmul.f32        d17, d1, d5
128cabdff1aSopenharmony_ci.endm
129cabdff1aSopenharmony_ci.macro  INIT4
130cabdff1aSopenharmony_ci        veor.f32        q8, q8
131cabdff1aSopenharmony_ci.endm
132cabdff1aSopenharmony_ci.macro  STORE
133cabdff1aSopenharmony_ci        vpadd.f32       d16, d16, d17
134cabdff1aSopenharmony_ci        vpadd.f32       d16, d16, d16
135cabdff1aSopenharmony_ci        vst1.32         d16[0], [r1]
136cabdff1aSopenharmony_ci.endm
137cabdff1aSopenharmony_ci
138cabdff1aSopenharmony_ciresample_one flt, 2
139cabdff1aSopenharmony_ci
140cabdff1aSopenharmony_ci
141cabdff1aSopenharmony_ci/* s32 */
142cabdff1aSopenharmony_ci.macro  LOAD1
143cabdff1aSopenharmony_ci        veor.32         d0, d0
144cabdff1aSopenharmony_ci        vld1.32         {d0[0]}, [r0]! /* load filter */
145cabdff1aSopenharmony_ci        vld1.32         {d4[0]}, [r3]! /* load src */
146cabdff1aSopenharmony_ci.endm
147cabdff1aSopenharmony_ci.macro  LOAD2
148cabdff1aSopenharmony_ci        vld1.32         {d0}, [r0]! /* load filter */
149cabdff1aSopenharmony_ci        vld1.32         {d4}, [r3]! /* load src */
150cabdff1aSopenharmony_ci.endm
151cabdff1aSopenharmony_ci.macro  LOAD4
152cabdff1aSopenharmony_ci        vld1.32         {d0,d1}, [r0]! /* load filter */
153cabdff1aSopenharmony_ci        vld1.32         {d4,d5}, [r3]! /* load src */
154cabdff1aSopenharmony_ci.endm
155cabdff1aSopenharmony_ci.macro  MLA1
156cabdff1aSopenharmony_ci        vmlal.s32       q8, d0, d4[0]
157cabdff1aSopenharmony_ci.endm
158cabdff1aSopenharmony_ci.macro  MLA2
159cabdff1aSopenharmony_ci        vmlal.s32       q8, d0, d4
160cabdff1aSopenharmony_ci.endm
161cabdff1aSopenharmony_ci.macro  MLA4
162cabdff1aSopenharmony_ci        vmlal.s32       q8, d0, d4
163cabdff1aSopenharmony_ci        vmlal.s32       q9, d1, d5
164cabdff1aSopenharmony_ci.endm
165cabdff1aSopenharmony_ci.macro  MUL4
166cabdff1aSopenharmony_ci        vmull.s32       q8, d0, d4
167cabdff1aSopenharmony_ci        vmull.s32       q9, d1, d5
168cabdff1aSopenharmony_ci.endm
169cabdff1aSopenharmony_ci.macro  INIT4
170cabdff1aSopenharmony_ci        veor.s64        q8, q8
171cabdff1aSopenharmony_ci        veor.s64        q9, q9
172cabdff1aSopenharmony_ci.endm
173cabdff1aSopenharmony_ci.macro  STORE
174cabdff1aSopenharmony_ci        vadd.s64        q8, q8, q9
175cabdff1aSopenharmony_ci        vadd.s64        d16, d16, d17
176cabdff1aSopenharmony_ci        vqrshrn.s64     d16, q8, #30
177cabdff1aSopenharmony_ci        vst1.32         d16[0], [r1]
178cabdff1aSopenharmony_ci.endm
179cabdff1aSopenharmony_ci
180cabdff1aSopenharmony_ciresample_one s32, 2
181cabdff1aSopenharmony_ci
182cabdff1aSopenharmony_ci
183cabdff1aSopenharmony_ci/* s16 */
184cabdff1aSopenharmony_ci.macro  LOAD1
185cabdff1aSopenharmony_ci        veor.16         d0, d0
186cabdff1aSopenharmony_ci        vld1.16         {d0[0]}, [r0]! /* load filter */
187cabdff1aSopenharmony_ci        vld1.16         {d4[0]}, [r3]! /* load src */
188cabdff1aSopenharmony_ci.endm
189cabdff1aSopenharmony_ci.macro  LOAD2
190cabdff1aSopenharmony_ci        veor.16         d0, d0
191cabdff1aSopenharmony_ci        vld1.32         {d0[0]}, [r0]! /* load filter */
192cabdff1aSopenharmony_ci        veor.16         d4, d4
193cabdff1aSopenharmony_ci        vld1.32         {d4[0]}, [r3]! /* load src */
194cabdff1aSopenharmony_ci.endm
195cabdff1aSopenharmony_ci.macro  LOAD4
196cabdff1aSopenharmony_ci        vld1.16         {d0}, [r0]! /* load filter */
197cabdff1aSopenharmony_ci        vld1.16         {d4}, [r3]! /* load src */
198cabdff1aSopenharmony_ci.endm
199cabdff1aSopenharmony_ci.macro  MLA1
200cabdff1aSopenharmony_ci        vmlal.s16       q8, d0, d4[0]
201cabdff1aSopenharmony_ci.endm
202cabdff1aSopenharmony_ci.macro  MLA2
203cabdff1aSopenharmony_ci        vmlal.s16       q8, d0, d4
204cabdff1aSopenharmony_ci.endm
205cabdff1aSopenharmony_ci.macro  MLA4
206cabdff1aSopenharmony_ci        vmlal.s16       q8, d0, d4
207cabdff1aSopenharmony_ci.endm
208cabdff1aSopenharmony_ci.macro  MUL4
209cabdff1aSopenharmony_ci        vmull.s16       q8, d0, d4
210cabdff1aSopenharmony_ci.endm
211cabdff1aSopenharmony_ci.macro  INIT4
212cabdff1aSopenharmony_ci        veor.s32        q8, q8
213cabdff1aSopenharmony_ci.endm
214cabdff1aSopenharmony_ci.macro  STORE
215cabdff1aSopenharmony_ci        vpadd.s32       d16, d16, d17
216cabdff1aSopenharmony_ci        vpadd.s32       d16, d16, d16
217cabdff1aSopenharmony_ci        vqrshrn.s32     d16, q8, #15
218cabdff1aSopenharmony_ci        vst1.16         d16[0], [r1]
219cabdff1aSopenharmony_ci.endm
220cabdff1aSopenharmony_ci
221cabdff1aSopenharmony_ciresample_one s16, 1
222cabdff1aSopenharmony_ci
223cabdff1aSopenharmony_ci
224cabdff1aSopenharmony_ci.macro resample_linear  fmt, es=2
225cabdff1aSopenharmony_cifunction ff_resample_linear_\fmt\()_neon, export=1
226cabdff1aSopenharmony_ci        push            {r4, r5}
227cabdff1aSopenharmony_ci        add             r1, r1, r2, lsl #\es
228cabdff1aSopenharmony_ci
229cabdff1aSopenharmony_ci        ldr             r2, [r0, #PHASE_SHIFT+4] /* phase_mask */
230cabdff1aSopenharmony_ci        ldr             ip, [sp, #8] /* index */
231cabdff1aSopenharmony_ci        ldr             r5, [r0, #FILTER_LENGTH]
232cabdff1aSopenharmony_ci        and             r2, ip, r2 /* (index & phase_mask) */
233cabdff1aSopenharmony_ci        ldr             r4, [r0, #PHASE_SHIFT]
234cabdff1aSopenharmony_ci        lsr             r4, ip, r4 /* compute sample_index */
235cabdff1aSopenharmony_ci        mul             r2, r2, r5
236cabdff1aSopenharmony_ci
237cabdff1aSopenharmony_ci        ldr             ip, [r0, #FILTER_BANK]
238cabdff1aSopenharmony_ci        add             r3, r3, r4, lsl #\es /* &src[sample_index] */
239cabdff1aSopenharmony_ci
240cabdff1aSopenharmony_ci        cmp             r5, #8
241cabdff1aSopenharmony_ci        ldr             r4, [r0, #SRC_INCR]
242cabdff1aSopenharmony_ci        add             r0, ip, r2, lsl #\es /* filter = &filter_bank[...] */
243cabdff1aSopenharmony_ci        add             r2, r0, r5, lsl #\es /* filter[... + c->filter_length] */
244cabdff1aSopenharmony_ci
245cabdff1aSopenharmony_ci        blt             5f
246cabdff1aSopenharmony_ci8:
247cabdff1aSopenharmony_ci        subs            r5, r5, #8
248cabdff1aSopenharmony_ci        LOAD4
249cabdff1aSopenharmony_ci        MUL4
250cabdff1aSopenharmony_ci7:
251cabdff1aSopenharmony_ci        LOAD4
252cabdff1aSopenharmony_ci        beq             6f
253cabdff1aSopenharmony_ci        cmp             r5, #8
254cabdff1aSopenharmony_ci        MLA4
255cabdff1aSopenharmony_ci        blt             4f
256cabdff1aSopenharmony_ci        subs            r5, r5, #8
257cabdff1aSopenharmony_ci        LOAD4
258cabdff1aSopenharmony_ci        MLA4
259cabdff1aSopenharmony_ci        b               7b
260cabdff1aSopenharmony_ci6:
261cabdff1aSopenharmony_ci        MLA4
262cabdff1aSopenharmony_ci        STORE
263cabdff1aSopenharmony_ci        pop             {r4, r5}
264cabdff1aSopenharmony_ci        bx              lr
265cabdff1aSopenharmony_ci5:
266cabdff1aSopenharmony_ci        INIT4
267cabdff1aSopenharmony_ci4:      /* remaining filter_length 1 to 7 */
268cabdff1aSopenharmony_ci        cmp             r5, #4
269cabdff1aSopenharmony_ci        blt             2f
270cabdff1aSopenharmony_ci        subs            r5, r5, #4
271cabdff1aSopenharmony_ci        LOAD4
272cabdff1aSopenharmony_ci        MLA4
273cabdff1aSopenharmony_ci        beq             0f
274cabdff1aSopenharmony_ci2:      /* remaining filter_length 1 to 3 */
275cabdff1aSopenharmony_ci        cmp             r5, #2
276cabdff1aSopenharmony_ci        blt             1f
277cabdff1aSopenharmony_ci        subs            r5, r5, #2
278cabdff1aSopenharmony_ci        LOAD2
279cabdff1aSopenharmony_ci        MLA2
280cabdff1aSopenharmony_ci        beq             0f
281cabdff1aSopenharmony_ci1:      /* remaining filter_length 1 */
282cabdff1aSopenharmony_ci        LOAD1
283cabdff1aSopenharmony_ci        MLA1
284cabdff1aSopenharmony_ci0:
285cabdff1aSopenharmony_ci        STORE
286cabdff1aSopenharmony_ci        pop             {r4, r5}
287cabdff1aSopenharmony_ci        bx              lr
288cabdff1aSopenharmony_ciendfunc
289cabdff1aSopenharmony_ci
290cabdff1aSopenharmony_ci.purgem LOAD1
291cabdff1aSopenharmony_ci.purgem LOAD2
292cabdff1aSopenharmony_ci.purgem LOAD4
293cabdff1aSopenharmony_ci.purgem MLA1
294cabdff1aSopenharmony_ci.purgem MLA2
295cabdff1aSopenharmony_ci.purgem MLA4
296cabdff1aSopenharmony_ci.purgem MUL4
297cabdff1aSopenharmony_ci.purgem INIT4
298cabdff1aSopenharmony_ci.purgem STORE
299cabdff1aSopenharmony_ci.endm
300cabdff1aSopenharmony_ci
301cabdff1aSopenharmony_ci
302cabdff1aSopenharmony_ci/* float32 linear */
303cabdff1aSopenharmony_ci.macro  LOAD1
304cabdff1aSopenharmony_ci        veor.32         d0, d0
305cabdff1aSopenharmony_ci        veor.32         d2, d2
306cabdff1aSopenharmony_ci        vld1.32         {d0[0]}, [r0]! /* load filter */
307cabdff1aSopenharmony_ci        vld1.32         {d2[0]}, [r2]! /* load filter */
308cabdff1aSopenharmony_ci        vld1.32         {d4[0]}, [r3]! /* load src */
309cabdff1aSopenharmony_ci.endm
310cabdff1aSopenharmony_ci.macro  LOAD2
311cabdff1aSopenharmony_ci        vld1.32         {d0}, [r0]! /* load filter */
312cabdff1aSopenharmony_ci        vld1.32         {d2}, [r2]! /* load filter */
313cabdff1aSopenharmony_ci        vld1.32         {d4}, [r3]! /* load src */
314cabdff1aSopenharmony_ci.endm
315cabdff1aSopenharmony_ci.macro  LOAD4
316cabdff1aSopenharmony_ci        vld1.32         {d0,d1}, [r0]! /* load filter */
317cabdff1aSopenharmony_ci        vld1.32         {d2,d3}, [r2]! /* load filter */
318cabdff1aSopenharmony_ci        vld1.32         {d4,d5}, [r3]! /* load src */
319cabdff1aSopenharmony_ci.endm
320cabdff1aSopenharmony_ci.macro  MLA1
321cabdff1aSopenharmony_ci        vmla.f32        d18, d0, d4[0]
322cabdff1aSopenharmony_ci        vmla.f32        d16, d2, d4[0]
323cabdff1aSopenharmony_ci.endm
324cabdff1aSopenharmony_ci.macro  MLA2
325cabdff1aSopenharmony_ci        vmla.f32        d18, d0, d4
326cabdff1aSopenharmony_ci        vmla.f32        d16, d2, d4
327cabdff1aSopenharmony_ci.endm
328cabdff1aSopenharmony_ci.macro  MLA4
329cabdff1aSopenharmony_ci        vmla.f32        q9, q0, q2
330cabdff1aSopenharmony_ci        vmla.f32        q8, q1, q2
331cabdff1aSopenharmony_ci.endm
332cabdff1aSopenharmony_ci.macro  MUL4
333cabdff1aSopenharmony_ci        vmul.f32        q9, q0, q2
334cabdff1aSopenharmony_ci        vmul.f32        q8, q1, q2
335cabdff1aSopenharmony_ci.endm
336cabdff1aSopenharmony_ci.macro  INIT4
337cabdff1aSopenharmony_ci        veor.f32        q9, q9
338cabdff1aSopenharmony_ci        veor.f32        q8, q8
339cabdff1aSopenharmony_ci.endm
340cabdff1aSopenharmony_ci.macro  STORE
341cabdff1aSopenharmony_ci        vldr            s0, [sp, #12] /* frac */
342cabdff1aSopenharmony_ci        vmov            s1, r4
343cabdff1aSopenharmony_ci        vcvt.f32.s32    d0, d0
344cabdff1aSopenharmony_ci
345cabdff1aSopenharmony_ci        vsub.f32        q8, q8, q9 /* v2 - val */
346cabdff1aSopenharmony_ci        vpadd.f32       d18, d18, d19
347cabdff1aSopenharmony_ci        vpadd.f32       d16, d16, d17
348cabdff1aSopenharmony_ci        vpadd.f32       d2, d18, d18
349cabdff1aSopenharmony_ci        vpadd.f32       d1, d16, d16
350cabdff1aSopenharmony_ci
351cabdff1aSopenharmony_ci        vmul.f32        s2, s2, s0 /* (v2 - val) * frac */
352cabdff1aSopenharmony_ci        vdiv.f32        s2, s2, s1 /* / c->src_incr */
353cabdff1aSopenharmony_ci        vadd.f32        s4, s4, s2
354cabdff1aSopenharmony_ci
355cabdff1aSopenharmony_ci        vstr            s4, [r1]
356cabdff1aSopenharmony_ci.endm
357cabdff1aSopenharmony_ci
358cabdff1aSopenharmony_ciresample_linear flt, 2
359