1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
22cabdff1aSopenharmony_ci#include "neon.S"
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci        /* H.264 qpel MC */
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_ci.macro  lowpass_const   r
27cabdff1aSopenharmony_ci        movw            \r,  #5
28cabdff1aSopenharmony_ci        movt            \r,  #20
29cabdff1aSopenharmony_ci        vmov.32         d6[0], \r
30cabdff1aSopenharmony_ci.endm
31cabdff1aSopenharmony_ci
32cabdff1aSopenharmony_ci.macro  lowpass_8       r0,  r1,  r2,  r3,  d0,  d1,  narrow=1
33cabdff1aSopenharmony_ci  .if \narrow
34cabdff1aSopenharmony_ci        t0 .req q0
35cabdff1aSopenharmony_ci        t1 .req q8
36cabdff1aSopenharmony_ci  .else
37cabdff1aSopenharmony_ci        t0 .req \d0
38cabdff1aSopenharmony_ci        t1 .req \d1
39cabdff1aSopenharmony_ci  .endif
40cabdff1aSopenharmony_ci        vext.8          d2,  \r0, \r1, #2
41cabdff1aSopenharmony_ci        vext.8          d3,  \r0, \r1, #3
42cabdff1aSopenharmony_ci        vaddl.u8        q1,  d2,  d3
43cabdff1aSopenharmony_ci        vext.8          d4,  \r0, \r1, #1
44cabdff1aSopenharmony_ci        vext.8          d5,  \r0, \r1, #4
45cabdff1aSopenharmony_ci        vaddl.u8        q2,  d4,  d5
46cabdff1aSopenharmony_ci        vext.8          d30, \r0, \r1, #5
47cabdff1aSopenharmony_ci        vaddl.u8        t0,  \r0, d30
48cabdff1aSopenharmony_ci        vext.8          d18, \r2, \r3, #2
49cabdff1aSopenharmony_ci        vmla.i16        t0,  q1,  d6[1]
50cabdff1aSopenharmony_ci        vext.8          d19, \r2, \r3, #3
51cabdff1aSopenharmony_ci        vaddl.u8        q9,  d18, d19
52cabdff1aSopenharmony_ci        vext.8          d20, \r2, \r3, #1
53cabdff1aSopenharmony_ci        vmls.i16        t0,  q2,  d6[0]
54cabdff1aSopenharmony_ci        vext.8          d21, \r2, \r3, #4
55cabdff1aSopenharmony_ci        vaddl.u8        q10, d20, d21
56cabdff1aSopenharmony_ci        vext.8          d31, \r2, \r3, #5
57cabdff1aSopenharmony_ci        vaddl.u8        t1,  \r2, d31
58cabdff1aSopenharmony_ci        vmla.i16        t1,  q9,  d6[1]
59cabdff1aSopenharmony_ci        vmls.i16        t1,  q10, d6[0]
60cabdff1aSopenharmony_ci  .if \narrow
61cabdff1aSopenharmony_ci        vqrshrun.s16    \d0, t0,  #5
62cabdff1aSopenharmony_ci        vqrshrun.s16    \d1, t1,  #5
63cabdff1aSopenharmony_ci  .endif
64cabdff1aSopenharmony_ci        .unreq  t0
65cabdff1aSopenharmony_ci        .unreq  t1
66cabdff1aSopenharmony_ci.endm
67cabdff1aSopenharmony_ci
68cabdff1aSopenharmony_ci.macro  lowpass_8_1     r0,  r1,  d0,  narrow=1
69cabdff1aSopenharmony_ci  .if \narrow
70cabdff1aSopenharmony_ci        t0 .req q0
71cabdff1aSopenharmony_ci  .else
72cabdff1aSopenharmony_ci        t0 .req \d0
73cabdff1aSopenharmony_ci  .endif
74cabdff1aSopenharmony_ci        vext.8          d2,  \r0, \r1, #2
75cabdff1aSopenharmony_ci        vext.8          d3,  \r0, \r1, #3
76cabdff1aSopenharmony_ci        vaddl.u8        q1,  d2,  d3
77cabdff1aSopenharmony_ci        vext.8          d4,  \r0, \r1, #1
78cabdff1aSopenharmony_ci        vext.8          d5,  \r0, \r1, #4
79cabdff1aSopenharmony_ci        vaddl.u8        q2,  d4,  d5
80cabdff1aSopenharmony_ci        vext.8          d30, \r0, \r1, #5
81cabdff1aSopenharmony_ci        vaddl.u8        t0,  \r0, d30
82cabdff1aSopenharmony_ci        vmla.i16        t0,  q1,  d6[1]
83cabdff1aSopenharmony_ci        vmls.i16        t0,  q2,  d6[0]
84cabdff1aSopenharmony_ci  .if \narrow
85cabdff1aSopenharmony_ci        vqrshrun.s16    \d0, t0,  #5
86cabdff1aSopenharmony_ci  .endif
87cabdff1aSopenharmony_ci        .unreq  t0
88cabdff1aSopenharmony_ci.endm
89cabdff1aSopenharmony_ci
90cabdff1aSopenharmony_ci.macro  lowpass_8.16    r0,  r1,  l0,  h0,  l1,  h1,  d
91cabdff1aSopenharmony_ci        vext.16         q1,  \r0, \r1, #2
92cabdff1aSopenharmony_ci        vext.16         q0,  \r0, \r1, #3
93cabdff1aSopenharmony_ci        vaddl.s16       q9,  d2,  d0
94cabdff1aSopenharmony_ci        vext.16         q2,  \r0, \r1, #1
95cabdff1aSopenharmony_ci        vaddl.s16       q1,  d3,  d1
96cabdff1aSopenharmony_ci        vext.16         q3,  \r0, \r1, #4
97cabdff1aSopenharmony_ci        vaddl.s16       q10, d4,  d6
98cabdff1aSopenharmony_ci        vext.16         \r1, \r0, \r1, #5
99cabdff1aSopenharmony_ci        vaddl.s16       q2,  d5,  d7
100cabdff1aSopenharmony_ci        vaddl.s16       q0,  \h0, \h1
101cabdff1aSopenharmony_ci        vaddl.s16       q8,  \l0, \l1
102cabdff1aSopenharmony_ci
103cabdff1aSopenharmony_ci        vshl.i32        q3,  q9,  #4
104cabdff1aSopenharmony_ci        vshl.i32        q9,  q9,  #2
105cabdff1aSopenharmony_ci        vshl.i32        q15, q10, #2
106cabdff1aSopenharmony_ci        vadd.i32        q9,  q9,  q3
107cabdff1aSopenharmony_ci        vadd.i32        q10, q10, q15
108cabdff1aSopenharmony_ci
109cabdff1aSopenharmony_ci        vshl.i32        q3,  q1,  #4
110cabdff1aSopenharmony_ci        vshl.i32        q1,  q1,  #2
111cabdff1aSopenharmony_ci        vshl.i32        q15, q2,  #2
112cabdff1aSopenharmony_ci        vadd.i32        q1,  q1,  q3
113cabdff1aSopenharmony_ci        vadd.i32        q2,  q2,  q15
114cabdff1aSopenharmony_ci
115cabdff1aSopenharmony_ci        vadd.i32        q9,  q9,  q8
116cabdff1aSopenharmony_ci        vsub.i32        q9,  q9,  q10
117cabdff1aSopenharmony_ci
118cabdff1aSopenharmony_ci        vadd.i32        q1,  q1,  q0
119cabdff1aSopenharmony_ci        vsub.i32        q1,  q1,  q2
120cabdff1aSopenharmony_ci
121cabdff1aSopenharmony_ci        vrshrn.s32      d18, q9,  #10
122cabdff1aSopenharmony_ci        vrshrn.s32      d19, q1,  #10
123cabdff1aSopenharmony_ci
124cabdff1aSopenharmony_ci        vqmovun.s16     \d,  q9
125cabdff1aSopenharmony_ci.endm
126cabdff1aSopenharmony_ci
127cabdff1aSopenharmony_cifunction put_h264_qpel16_h_lowpass_neon_packed
128cabdff1aSopenharmony_ci        mov             r4,  lr
129cabdff1aSopenharmony_ci        mov             r12, #16
130cabdff1aSopenharmony_ci        mov             r3,  #8
131cabdff1aSopenharmony_ci        bl              put_h264_qpel8_h_lowpass_neon
132cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #4
133cabdff1aSopenharmony_ci        add             r1,  r1,  #8
134cabdff1aSopenharmony_ci        mov             r12, #16
135cabdff1aSopenharmony_ci        mov             lr,  r4
136cabdff1aSopenharmony_ci        b               put_h264_qpel8_h_lowpass_neon
137cabdff1aSopenharmony_ciendfunc
138cabdff1aSopenharmony_ci
139cabdff1aSopenharmony_ci.macro  h264_qpel_h_lowpass type
140cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_h_lowpass_neon
141cabdff1aSopenharmony_ci        push            {lr}
142cabdff1aSopenharmony_ci        mov             r12, #16
143cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_h_lowpass_neon
144cabdff1aSopenharmony_ci        sub             r0,  r0,  r3, lsl #4
145cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #4
146cabdff1aSopenharmony_ci        add             r0,  r0,  #8
147cabdff1aSopenharmony_ci        add             r1,  r1,  #8
148cabdff1aSopenharmony_ci        mov             r12, #16
149cabdff1aSopenharmony_ci        pop             {lr}
150cabdff1aSopenharmony_ciendfunc
151cabdff1aSopenharmony_ci
152cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_h_lowpass_neon
153cabdff1aSopenharmony_ci1:      vld1.8          {d0, d1},  [r1], r2
154cabdff1aSopenharmony_ci        vld1.8          {d16,d17}, [r1], r2
155cabdff1aSopenharmony_ci        subs            r12, r12, #2
156cabdff1aSopenharmony_ci        lowpass_8       d0,  d1,  d16, d17, d0,  d16
157cabdff1aSopenharmony_ci  .ifc \type,avg
158cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0,:64], r3
159cabdff1aSopenharmony_ci        vld1.8          {d3},     [r0,:64]
160cabdff1aSopenharmony_ci        vrhadd.u8       d0,  d0,  d2
161cabdff1aSopenharmony_ci        vrhadd.u8       d16, d16, d3
162cabdff1aSopenharmony_ci        sub             r0,  r0,  r3
163cabdff1aSopenharmony_ci  .endif
164cabdff1aSopenharmony_ci        vst1.8          {d0},     [r0,:64], r3
165cabdff1aSopenharmony_ci        vst1.8          {d16},    [r0,:64], r3
166cabdff1aSopenharmony_ci        bne             1b
167cabdff1aSopenharmony_ci        bx              lr
168cabdff1aSopenharmony_ciendfunc
169cabdff1aSopenharmony_ci.endm
170cabdff1aSopenharmony_ci
171cabdff1aSopenharmony_ci        h264_qpel_h_lowpass put
172cabdff1aSopenharmony_ci        h264_qpel_h_lowpass avg
173cabdff1aSopenharmony_ci
174cabdff1aSopenharmony_ci.macro  h264_qpel_h_lowpass_l2 type
175cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_h_lowpass_l2_neon
176cabdff1aSopenharmony_ci        push            {lr}
177cabdff1aSopenharmony_ci        mov             r12, #16
178cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_h_lowpass_l2_neon
179cabdff1aSopenharmony_ci        sub             r0,  r0,  r2, lsl #4
180cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #4
181cabdff1aSopenharmony_ci        sub             r3,  r3,  r2, lsl #4
182cabdff1aSopenharmony_ci        add             r0,  r0,  #8
183cabdff1aSopenharmony_ci        add             r1,  r1,  #8
184cabdff1aSopenharmony_ci        add             r3,  r3,  #8
185cabdff1aSopenharmony_ci        mov             r12, #16
186cabdff1aSopenharmony_ci        pop             {lr}
187cabdff1aSopenharmony_ciendfunc
188cabdff1aSopenharmony_ci
189cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_h_lowpass_l2_neon
190cabdff1aSopenharmony_ci1:      vld1.8          {d0, d1},  [r1], r2
191cabdff1aSopenharmony_ci        vld1.8          {d16,d17}, [r1], r2
192cabdff1aSopenharmony_ci        vld1.8          {d28},     [r3], r2
193cabdff1aSopenharmony_ci        vld1.8          {d29},     [r3], r2
194cabdff1aSopenharmony_ci        subs            r12, r12, #2
195cabdff1aSopenharmony_ci        lowpass_8       d0,  d1,  d16, d17, d0,  d1
196cabdff1aSopenharmony_ci        vrhadd.u8       q0,  q0,  q14
197cabdff1aSopenharmony_ci  .ifc \type,avg
198cabdff1aSopenharmony_ci        vld1.8          {d2},      [r0,:64], r2
199cabdff1aSopenharmony_ci        vld1.8          {d3},      [r0,:64]
200cabdff1aSopenharmony_ci        vrhadd.u8       q0,  q0,  q1
201cabdff1aSopenharmony_ci        sub             r0,  r0,  r2
202cabdff1aSopenharmony_ci  .endif
203cabdff1aSopenharmony_ci        vst1.8          {d0},      [r0,:64], r2
204cabdff1aSopenharmony_ci        vst1.8          {d1},      [r0,:64], r2
205cabdff1aSopenharmony_ci        bne             1b
206cabdff1aSopenharmony_ci        bx              lr
207cabdff1aSopenharmony_ciendfunc
208cabdff1aSopenharmony_ci.endm
209cabdff1aSopenharmony_ci
210cabdff1aSopenharmony_ci        h264_qpel_h_lowpass_l2 put
211cabdff1aSopenharmony_ci        h264_qpel_h_lowpass_l2 avg
212cabdff1aSopenharmony_ci
213cabdff1aSopenharmony_cifunction put_h264_qpel16_v_lowpass_neon_packed
214cabdff1aSopenharmony_ci        mov             r4,  lr
215cabdff1aSopenharmony_ci        mov             r2,  #8
216cabdff1aSopenharmony_ci        bl              put_h264_qpel8_v_lowpass_neon
217cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
218cabdff1aSopenharmony_ci        bl              put_h264_qpel8_v_lowpass_neon
219cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #4
220cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
221cabdff1aSopenharmony_ci        add             r1,  r1,  #8
222cabdff1aSopenharmony_ci        bl              put_h264_qpel8_v_lowpass_neon
223cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
224cabdff1aSopenharmony_ci        mov             lr,  r4
225cabdff1aSopenharmony_ci        b               put_h264_qpel8_v_lowpass_neon
226cabdff1aSopenharmony_ciendfunc
227cabdff1aSopenharmony_ci
228cabdff1aSopenharmony_ci.macro  h264_qpel_v_lowpass type
229cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_v_lowpass_neon
230cabdff1aSopenharmony_ci        mov             r4,  lr
231cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_neon
232cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
233cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_neon
234cabdff1aSopenharmony_ci        sub             r0,  r0,  r2, lsl #4
235cabdff1aSopenharmony_ci        add             r0,  r0,  #8
236cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #4
237cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
238cabdff1aSopenharmony_ci        add             r1,  r1,  #8
239cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_neon
240cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
241cabdff1aSopenharmony_ci        mov             lr,  r4
242cabdff1aSopenharmony_ciendfunc
243cabdff1aSopenharmony_ci
244cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_v_lowpass_neon
245cabdff1aSopenharmony_ci        vld1.8          {d8},  [r1], r3
246cabdff1aSopenharmony_ci        vld1.8          {d10}, [r1], r3
247cabdff1aSopenharmony_ci        vld1.8          {d12}, [r1], r3
248cabdff1aSopenharmony_ci        vld1.8          {d14}, [r1], r3
249cabdff1aSopenharmony_ci        vld1.8          {d22}, [r1], r3
250cabdff1aSopenharmony_ci        vld1.8          {d24}, [r1], r3
251cabdff1aSopenharmony_ci        vld1.8          {d26}, [r1], r3
252cabdff1aSopenharmony_ci        vld1.8          {d28}, [r1], r3
253cabdff1aSopenharmony_ci        vld1.8          {d9},  [r1], r3
254cabdff1aSopenharmony_ci        vld1.8          {d11}, [r1], r3
255cabdff1aSopenharmony_ci        vld1.8          {d13}, [r1], r3
256cabdff1aSopenharmony_ci        vld1.8          {d15}, [r1], r3
257cabdff1aSopenharmony_ci        vld1.8          {d23}, [r1]
258cabdff1aSopenharmony_ci
259cabdff1aSopenharmony_ci        transpose_8x8   q4,  q5,  q6,  q7,  q11, q12, q13, q14
260cabdff1aSopenharmony_ci        lowpass_8       d8,  d9,  d10, d11, d8,  d10
261cabdff1aSopenharmony_ci        lowpass_8       d12, d13, d14, d15, d12, d14
262cabdff1aSopenharmony_ci        lowpass_8       d22, d23, d24, d25, d22, d24
263cabdff1aSopenharmony_ci        lowpass_8       d26, d27, d28, d29, d26, d28
264cabdff1aSopenharmony_ci        transpose_8x8   d8,  d10, d12, d14, d22, d24, d26, d28
265cabdff1aSopenharmony_ci
266cabdff1aSopenharmony_ci  .ifc \type,avg
267cabdff1aSopenharmony_ci        vld1.8          {d9},  [r0,:64], r2
268cabdff1aSopenharmony_ci        vld1.8          {d11}, [r0,:64], r2
269cabdff1aSopenharmony_ci        vld1.8          {d13}, [r0,:64], r2
270cabdff1aSopenharmony_ci        vrhadd.u8       d8,  d8,  d9
271cabdff1aSopenharmony_ci        vld1.8          {d15}, [r0,:64], r2
272cabdff1aSopenharmony_ci        vrhadd.u8       d10, d10, d11
273cabdff1aSopenharmony_ci        vld1.8          {d23}, [r0,:64], r2
274cabdff1aSopenharmony_ci        vrhadd.u8       d12, d12, d13
275cabdff1aSopenharmony_ci        vld1.8          {d25}, [r0,:64], r2
276cabdff1aSopenharmony_ci        vrhadd.u8       d14, d14, d15
277cabdff1aSopenharmony_ci        vld1.8          {d27}, [r0,:64], r2
278cabdff1aSopenharmony_ci        vrhadd.u8       d22, d22, d23
279cabdff1aSopenharmony_ci        vld1.8          {d29}, [r0,:64], r2
280cabdff1aSopenharmony_ci        vrhadd.u8       d24, d24, d25
281cabdff1aSopenharmony_ci        vrhadd.u8       d26, d26, d27
282cabdff1aSopenharmony_ci        vrhadd.u8       d28, d28, d29
283cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #3
284cabdff1aSopenharmony_ci  .endif
285cabdff1aSopenharmony_ci
286cabdff1aSopenharmony_ci        vst1.8          {d8},  [r0,:64], r2
287cabdff1aSopenharmony_ci        vst1.8          {d10}, [r0,:64], r2
288cabdff1aSopenharmony_ci        vst1.8          {d12}, [r0,:64], r2
289cabdff1aSopenharmony_ci        vst1.8          {d14}, [r0,:64], r2
290cabdff1aSopenharmony_ci        vst1.8          {d22}, [r0,:64], r2
291cabdff1aSopenharmony_ci        vst1.8          {d24}, [r0,:64], r2
292cabdff1aSopenharmony_ci        vst1.8          {d26}, [r0,:64], r2
293cabdff1aSopenharmony_ci        vst1.8          {d28}, [r0,:64], r2
294cabdff1aSopenharmony_ci
295cabdff1aSopenharmony_ci        bx              lr
296cabdff1aSopenharmony_ciendfunc
297cabdff1aSopenharmony_ci.endm
298cabdff1aSopenharmony_ci
299cabdff1aSopenharmony_ci        h264_qpel_v_lowpass put
300cabdff1aSopenharmony_ci        h264_qpel_v_lowpass avg
301cabdff1aSopenharmony_ci
302cabdff1aSopenharmony_ci.macro  h264_qpel_v_lowpass_l2 type
303cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_v_lowpass_l2_neon
304cabdff1aSopenharmony_ci        mov             r4,  lr
305cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
306cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
307cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
308cabdff1aSopenharmony_ci        sub             r0,  r0,  r3, lsl #4
309cabdff1aSopenharmony_ci        sub             r12, r12, r2, lsl #4
310cabdff1aSopenharmony_ci        add             r0,  r0,  #8
311cabdff1aSopenharmony_ci        add             r12, r12, #8
312cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #4
313cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
314cabdff1aSopenharmony_ci        add             r1,  r1,  #8
315cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
316cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
317cabdff1aSopenharmony_ci        mov             lr,  r4
318cabdff1aSopenharmony_ciendfunc
319cabdff1aSopenharmony_ci
320cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_v_lowpass_l2_neon
321cabdff1aSopenharmony_ci        vld1.8          {d8},  [r1], r3
322cabdff1aSopenharmony_ci        vld1.8          {d10}, [r1], r3
323cabdff1aSopenharmony_ci        vld1.8          {d12}, [r1], r3
324cabdff1aSopenharmony_ci        vld1.8          {d14}, [r1], r3
325cabdff1aSopenharmony_ci        vld1.8          {d22}, [r1], r3
326cabdff1aSopenharmony_ci        vld1.8          {d24}, [r1], r3
327cabdff1aSopenharmony_ci        vld1.8          {d26}, [r1], r3
328cabdff1aSopenharmony_ci        vld1.8          {d28}, [r1], r3
329cabdff1aSopenharmony_ci        vld1.8          {d9},  [r1], r3
330cabdff1aSopenharmony_ci        vld1.8          {d11}, [r1], r3
331cabdff1aSopenharmony_ci        vld1.8          {d13}, [r1], r3
332cabdff1aSopenharmony_ci        vld1.8          {d15}, [r1], r3
333cabdff1aSopenharmony_ci        vld1.8          {d23}, [r1]
334cabdff1aSopenharmony_ci
335cabdff1aSopenharmony_ci        transpose_8x8   q4,  q5,  q6,  q7,  q11, q12, q13, q14
336cabdff1aSopenharmony_ci        lowpass_8       d8,  d9,  d10, d11, d8,  d9
337cabdff1aSopenharmony_ci        lowpass_8       d12, d13, d14, d15, d12, d13
338cabdff1aSopenharmony_ci        lowpass_8       d22, d23, d24, d25, d22, d23
339cabdff1aSopenharmony_ci        lowpass_8       d26, d27, d28, d29, d26, d27
340cabdff1aSopenharmony_ci        transpose_8x8   d8,  d9,  d12, d13, d22, d23, d26, d27
341cabdff1aSopenharmony_ci
342cabdff1aSopenharmony_ci        vld1.8          {d0},  [r12], r2
343cabdff1aSopenharmony_ci        vld1.8          {d1},  [r12], r2
344cabdff1aSopenharmony_ci        vld1.8          {d2},  [r12], r2
345cabdff1aSopenharmony_ci        vld1.8          {d3},  [r12], r2
346cabdff1aSopenharmony_ci        vld1.8          {d4},  [r12], r2
347cabdff1aSopenharmony_ci        vrhadd.u8       q0,  q0,  q4
348cabdff1aSopenharmony_ci        vld1.8          {d5},  [r12], r2
349cabdff1aSopenharmony_ci        vrhadd.u8       q1,  q1,  q6
350cabdff1aSopenharmony_ci        vld1.8          {d10}, [r12], r2
351cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q11
352cabdff1aSopenharmony_ci        vld1.8          {d11}, [r12], r2
353cabdff1aSopenharmony_ci        vrhadd.u8       q5,  q5,  q13
354cabdff1aSopenharmony_ci
355cabdff1aSopenharmony_ci  .ifc \type,avg
356cabdff1aSopenharmony_ci        vld1.8          {d16}, [r0,:64], r3
357cabdff1aSopenharmony_ci        vld1.8          {d17}, [r0,:64], r3
358cabdff1aSopenharmony_ci        vrhadd.u8       d0,  d0,  d16
359cabdff1aSopenharmony_ci        vld1.8          {d16}, [r0,:64], r3
360cabdff1aSopenharmony_ci        vrhadd.u8       d1,  d1,  d17
361cabdff1aSopenharmony_ci        vld1.8          {d17}, [r0,:64], r3
362cabdff1aSopenharmony_ci        vrhadd.u8       d2,  d2,  d16
363cabdff1aSopenharmony_ci        vld1.8          {d16}, [r0,:64], r3
364cabdff1aSopenharmony_ci        vrhadd.u8       d3,  d3,  d17
365cabdff1aSopenharmony_ci        vld1.8          {d17}, [r0,:64], r3
366cabdff1aSopenharmony_ci        vrhadd.u8       d4,  d4,  d16
367cabdff1aSopenharmony_ci        vld1.8          {d16}, [r0,:64], r3
368cabdff1aSopenharmony_ci        vrhadd.u8       d5,  d5,  d17
369cabdff1aSopenharmony_ci        vld1.8          {d17}, [r0,:64], r3
370cabdff1aSopenharmony_ci        vrhadd.u8       d10, d10, d16
371cabdff1aSopenharmony_ci        vrhadd.u8       d11, d11, d17
372cabdff1aSopenharmony_ci        sub             r0,  r0,  r3,  lsl #3
373cabdff1aSopenharmony_ci  .endif
374cabdff1aSopenharmony_ci
375cabdff1aSopenharmony_ci        vst1.8          {d0},  [r0,:64], r3
376cabdff1aSopenharmony_ci        vst1.8          {d1},  [r0,:64], r3
377cabdff1aSopenharmony_ci        vst1.8          {d2},  [r0,:64], r3
378cabdff1aSopenharmony_ci        vst1.8          {d3},  [r0,:64], r3
379cabdff1aSopenharmony_ci        vst1.8          {d4},  [r0,:64], r3
380cabdff1aSopenharmony_ci        vst1.8          {d5},  [r0,:64], r3
381cabdff1aSopenharmony_ci        vst1.8          {d10}, [r0,:64], r3
382cabdff1aSopenharmony_ci        vst1.8          {d11}, [r0,:64], r3
383cabdff1aSopenharmony_ci
384cabdff1aSopenharmony_ci        bx              lr
385cabdff1aSopenharmony_ciendfunc
386cabdff1aSopenharmony_ci.endm
387cabdff1aSopenharmony_ci
388cabdff1aSopenharmony_ci        h264_qpel_v_lowpass_l2 put
389cabdff1aSopenharmony_ci        h264_qpel_v_lowpass_l2 avg
390cabdff1aSopenharmony_ci
391cabdff1aSopenharmony_cifunction put_h264_qpel8_hv_lowpass_neon_top
392cabdff1aSopenharmony_ci        lowpass_const   r12
393cabdff1aSopenharmony_ci        mov             r12, #12
394cabdff1aSopenharmony_ci1:      vld1.8          {d0, d1},  [r1], r3
395cabdff1aSopenharmony_ci        vld1.8          {d16,d17}, [r1], r3
396cabdff1aSopenharmony_ci        subs            r12, r12, #2
397cabdff1aSopenharmony_ci        lowpass_8       d0,  d1,  d16, d17, q11, q12, narrow=0
398cabdff1aSopenharmony_ci        vst1.8          {d22-d25}, [r4,:128]!
399cabdff1aSopenharmony_ci        bne             1b
400cabdff1aSopenharmony_ci
401cabdff1aSopenharmony_ci        vld1.8          {d0, d1},  [r1]
402cabdff1aSopenharmony_ci        lowpass_8_1     d0,  d1,  q12, narrow=0
403cabdff1aSopenharmony_ci
404cabdff1aSopenharmony_ci        mov             r12, #-16
405cabdff1aSopenharmony_ci        add             r4,  r4,  r12
406cabdff1aSopenharmony_ci        vld1.8          {d30,d31}, [r4,:128], r12
407cabdff1aSopenharmony_ci        vld1.8          {d20,d21}, [r4,:128], r12
408cabdff1aSopenharmony_ci        vld1.8          {d18,d19}, [r4,:128], r12
409cabdff1aSopenharmony_ci        vld1.8          {d16,d17}, [r4,:128], r12
410cabdff1aSopenharmony_ci        vld1.8          {d14,d15}, [r4,:128], r12
411cabdff1aSopenharmony_ci        vld1.8          {d12,d13}, [r4,:128], r12
412cabdff1aSopenharmony_ci        vld1.8          {d10,d11}, [r4,:128], r12
413cabdff1aSopenharmony_ci        vld1.8          {d8, d9},  [r4,:128], r12
414cabdff1aSopenharmony_ci        vld1.8          {d6, d7},  [r4,:128], r12
415cabdff1aSopenharmony_ci        vld1.8          {d4, d5},  [r4,:128], r12
416cabdff1aSopenharmony_ci        vld1.8          {d2, d3},  [r4,:128], r12
417cabdff1aSopenharmony_ci        vld1.8          {d0, d1},  [r4,:128]
418cabdff1aSopenharmony_ci
419cabdff1aSopenharmony_ci        swap4           d1,  d3,  d5,  d7,  d8,  d10, d12, d14
420cabdff1aSopenharmony_ci        transpose16_4x4 q0,  q1,  q2,  q3,  q4,  q5,  q6,  q7
421cabdff1aSopenharmony_ci
422cabdff1aSopenharmony_ci        swap4           d17, d19, d21, d31, d24, d26, d28, d22
423cabdff1aSopenharmony_ci        transpose16_4x4 q8,  q9,  q10, q15, q12, q13, q14, q11
424cabdff1aSopenharmony_ci
425cabdff1aSopenharmony_ci        vst1.8          {d30,d31}, [r4,:128]!
426cabdff1aSopenharmony_ci        vst1.8          {d6, d7},  [r4,:128]!
427cabdff1aSopenharmony_ci        vst1.8          {d20,d21}, [r4,:128]!
428cabdff1aSopenharmony_ci        vst1.8          {d4, d5},  [r4,:128]!
429cabdff1aSopenharmony_ci        vst1.8          {d18,d19}, [r4,:128]!
430cabdff1aSopenharmony_ci        vst1.8          {d2, d3},  [r4,:128]!
431cabdff1aSopenharmony_ci        vst1.8          {d16,d17}, [r4,:128]!
432cabdff1aSopenharmony_ci        vst1.8          {d0, d1},  [r4,:128]
433cabdff1aSopenharmony_ci
434cabdff1aSopenharmony_ci        lowpass_8.16    q4,  q12, d8,  d9,  d24, d25, d8
435cabdff1aSopenharmony_ci        lowpass_8.16    q5,  q13, d10, d11, d26, d27, d9
436cabdff1aSopenharmony_ci        lowpass_8.16    q6,  q14, d12, d13, d28, d29, d10
437cabdff1aSopenharmony_ci        lowpass_8.16    q7,  q11, d14, d15, d22, d23, d11
438cabdff1aSopenharmony_ci
439cabdff1aSopenharmony_ci        vld1.8          {d16,d17}, [r4,:128], r12
440cabdff1aSopenharmony_ci        vld1.8          {d30,d31}, [r4,:128], r12
441cabdff1aSopenharmony_ci        lowpass_8.16    q8,  q15, d16, d17, d30, d31, d12
442cabdff1aSopenharmony_ci        vld1.8          {d16,d17}, [r4,:128], r12
443cabdff1aSopenharmony_ci        vld1.8          {d30,d31}, [r4,:128], r12
444cabdff1aSopenharmony_ci        lowpass_8.16    q8,  q15, d16, d17, d30, d31, d13
445cabdff1aSopenharmony_ci        vld1.8          {d16,d17}, [r4,:128], r12
446cabdff1aSopenharmony_ci        vld1.8          {d30,d31}, [r4,:128], r12
447cabdff1aSopenharmony_ci        lowpass_8.16    q8,  q15, d16, d17, d30, d31, d14
448cabdff1aSopenharmony_ci        vld1.8          {d16,d17}, [r4,:128], r12
449cabdff1aSopenharmony_ci        vld1.8          {d30,d31}, [r4,:128]
450cabdff1aSopenharmony_ci        lowpass_8.16    q8,  q15, d16, d17, d30, d31, d15
451cabdff1aSopenharmony_ci
452cabdff1aSopenharmony_ci        transpose_8x8   d12, d13, d14, d15, d8,  d9,  d10, d11
453cabdff1aSopenharmony_ci
454cabdff1aSopenharmony_ci        bx              lr
455cabdff1aSopenharmony_ciendfunc
456cabdff1aSopenharmony_ci
457cabdff1aSopenharmony_ci.macro  h264_qpel8_hv_lowpass type
458cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_hv_lowpass_neon
459cabdff1aSopenharmony_ci        mov             r10, lr
460cabdff1aSopenharmony_ci        bl              put_h264_qpel8_hv_lowpass_neon_top
461cabdff1aSopenharmony_ci  .ifc \type,avg
462cabdff1aSopenharmony_ci        vld1.8          {d0},      [r0,:64], r2
463cabdff1aSopenharmony_ci        vld1.8          {d1},      [r0,:64], r2
464cabdff1aSopenharmony_ci        vld1.8          {d2},      [r0,:64], r2
465cabdff1aSopenharmony_ci        vld1.8          {d3},      [r0,:64], r2
466cabdff1aSopenharmony_ci        vrhadd.u8       q6,  q6,  q0
467cabdff1aSopenharmony_ci        vld1.8          {d4},      [r0,:64], r2
468cabdff1aSopenharmony_ci        vld1.8          {d5},      [r0,:64], r2
469cabdff1aSopenharmony_ci        vrhadd.u8       q7,  q7,  q1
470cabdff1aSopenharmony_ci        vld1.8          {d6},      [r0,:64], r2
471cabdff1aSopenharmony_ci        vld1.8          {d7},      [r0,:64], r2
472cabdff1aSopenharmony_ci        vrhadd.u8       q4,  q4,  q2
473cabdff1aSopenharmony_ci        vrhadd.u8       q5,  q5,  q3
474cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #3
475cabdff1aSopenharmony_ci  .endif
476cabdff1aSopenharmony_ci
477cabdff1aSopenharmony_ci        vst1.8          {d12},     [r0,:64], r2
478cabdff1aSopenharmony_ci        vst1.8          {d13},     [r0,:64], r2
479cabdff1aSopenharmony_ci        vst1.8          {d14},     [r0,:64], r2
480cabdff1aSopenharmony_ci        vst1.8          {d15},     [r0,:64], r2
481cabdff1aSopenharmony_ci        vst1.8          {d8},      [r0,:64], r2
482cabdff1aSopenharmony_ci        vst1.8          {d9},      [r0,:64], r2
483cabdff1aSopenharmony_ci        vst1.8          {d10},     [r0,:64], r2
484cabdff1aSopenharmony_ci        vst1.8          {d11},     [r0,:64], r2
485cabdff1aSopenharmony_ci
486cabdff1aSopenharmony_ci        mov             lr,  r10
487cabdff1aSopenharmony_ci        bx              lr
488cabdff1aSopenharmony_ciendfunc
489cabdff1aSopenharmony_ci.endm
490cabdff1aSopenharmony_ci
491cabdff1aSopenharmony_ci        h264_qpel8_hv_lowpass put
492cabdff1aSopenharmony_ci        h264_qpel8_hv_lowpass avg
493cabdff1aSopenharmony_ci
494cabdff1aSopenharmony_ci.macro  h264_qpel8_hv_lowpass_l2 type
495cabdff1aSopenharmony_cifunction \type\()_h264_qpel8_hv_lowpass_l2_neon
496cabdff1aSopenharmony_ci        mov             r10, lr
497cabdff1aSopenharmony_ci        bl              put_h264_qpel8_hv_lowpass_neon_top
498cabdff1aSopenharmony_ci
499cabdff1aSopenharmony_ci        vld1.8          {d0, d1},  [r2,:128]!
500cabdff1aSopenharmony_ci        vld1.8          {d2, d3},  [r2,:128]!
501cabdff1aSopenharmony_ci        vrhadd.u8       q0,  q0,  q6
502cabdff1aSopenharmony_ci        vld1.8          {d4, d5},  [r2,:128]!
503cabdff1aSopenharmony_ci        vrhadd.u8       q1,  q1,  q7
504cabdff1aSopenharmony_ci        vld1.8          {d6, d7},  [r2,:128]!
505cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q4
506cabdff1aSopenharmony_ci        vrhadd.u8       q3,  q3,  q5
507cabdff1aSopenharmony_ci  .ifc \type,avg
508cabdff1aSopenharmony_ci        vld1.8          {d16},     [r0,:64], r3
509cabdff1aSopenharmony_ci        vld1.8          {d17},     [r0,:64], r3
510cabdff1aSopenharmony_ci        vld1.8          {d18},     [r0,:64], r3
511cabdff1aSopenharmony_ci        vld1.8          {d19},     [r0,:64], r3
512cabdff1aSopenharmony_ci        vrhadd.u8       q0,  q0,  q8
513cabdff1aSopenharmony_ci        vld1.8          {d20},     [r0,:64], r3
514cabdff1aSopenharmony_ci        vld1.8          {d21},     [r0,:64], r3
515cabdff1aSopenharmony_ci        vrhadd.u8       q1,  q1,  q9
516cabdff1aSopenharmony_ci        vld1.8          {d22},     [r0,:64], r3
517cabdff1aSopenharmony_ci        vld1.8          {d23},     [r0,:64], r3
518cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q10
519cabdff1aSopenharmony_ci        vrhadd.u8       q3,  q3,  q11
520cabdff1aSopenharmony_ci        sub             r0,  r0,  r3,  lsl #3
521cabdff1aSopenharmony_ci  .endif
522cabdff1aSopenharmony_ci        vst1.8          {d0},      [r0,:64], r3
523cabdff1aSopenharmony_ci        vst1.8          {d1},      [r0,:64], r3
524cabdff1aSopenharmony_ci        vst1.8          {d2},      [r0,:64], r3
525cabdff1aSopenharmony_ci        vst1.8          {d3},      [r0,:64], r3
526cabdff1aSopenharmony_ci        vst1.8          {d4},      [r0,:64], r3
527cabdff1aSopenharmony_ci        vst1.8          {d5},      [r0,:64], r3
528cabdff1aSopenharmony_ci        vst1.8          {d6},      [r0,:64], r3
529cabdff1aSopenharmony_ci        vst1.8          {d7},      [r0,:64], r3
530cabdff1aSopenharmony_ci
531cabdff1aSopenharmony_ci        mov             lr,  r10
532cabdff1aSopenharmony_ci        bx              lr
533cabdff1aSopenharmony_ciendfunc
534cabdff1aSopenharmony_ci.endm
535cabdff1aSopenharmony_ci
536cabdff1aSopenharmony_ci        h264_qpel8_hv_lowpass_l2 put
537cabdff1aSopenharmony_ci        h264_qpel8_hv_lowpass_l2 avg
538cabdff1aSopenharmony_ci
539cabdff1aSopenharmony_ci.macro  h264_qpel16_hv  type
540cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_hv_lowpass_neon
541cabdff1aSopenharmony_ci        mov             r9,  lr
542cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_neon
543cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
544cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_neon
545cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #4
546cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
547cabdff1aSopenharmony_ci        add             r1,  r1,  #8
548cabdff1aSopenharmony_ci        sub             r0,  r0,  r2, lsl #4
549cabdff1aSopenharmony_ci        add             r0,  r0,  #8
550cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_neon
551cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
552cabdff1aSopenharmony_ci        mov             lr,  r9
553cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_hv_lowpass_neon
554cabdff1aSopenharmony_ciendfunc
555cabdff1aSopenharmony_ci
556cabdff1aSopenharmony_cifunction \type\()_h264_qpel16_hv_lowpass_l2_neon
557cabdff1aSopenharmony_ci        mov             r9,  lr
558cabdff1aSopenharmony_ci        sub             r2,  r4,  #256
559cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
560cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
561cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
562cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #4
563cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
564cabdff1aSopenharmony_ci        add             r1,  r1,  #8
565cabdff1aSopenharmony_ci        sub             r0,  r0,  r3, lsl #4
566cabdff1aSopenharmony_ci        add             r0,  r0,  #8
567cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
568cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #2
569cabdff1aSopenharmony_ci        mov             lr,  r9
570cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_hv_lowpass_l2_neon
571cabdff1aSopenharmony_ciendfunc
572cabdff1aSopenharmony_ci.endm
573cabdff1aSopenharmony_ci
574cabdff1aSopenharmony_ci        h264_qpel16_hv put
575cabdff1aSopenharmony_ci        h264_qpel16_hv avg
576cabdff1aSopenharmony_ci
577cabdff1aSopenharmony_ci.macro  h264_qpel8      type
578cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc10_neon, export=1
579cabdff1aSopenharmony_ci        lowpass_const   r3
580cabdff1aSopenharmony_ci        mov             r3,  r1
581cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
582cabdff1aSopenharmony_ci        mov             r12, #8
583cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_h_lowpass_l2_neon
584cabdff1aSopenharmony_ciendfunc
585cabdff1aSopenharmony_ci
586cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc20_neon, export=1
587cabdff1aSopenharmony_ci        lowpass_const   r3
588cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
589cabdff1aSopenharmony_ci        mov             r3,  r2
590cabdff1aSopenharmony_ci        mov             r12, #8
591cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_h_lowpass_neon
592cabdff1aSopenharmony_ciendfunc
593cabdff1aSopenharmony_ci
594cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc30_neon, export=1
595cabdff1aSopenharmony_ci        lowpass_const   r3
596cabdff1aSopenharmony_ci        add             r3,  r1,  #1
597cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
598cabdff1aSopenharmony_ci        mov             r12, #8
599cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_h_lowpass_l2_neon
600cabdff1aSopenharmony_ciendfunc
601cabdff1aSopenharmony_ci
602cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc01_neon, export=1
603cabdff1aSopenharmony_ci        push            {lr}
604cabdff1aSopenharmony_ci        mov             r12, r1
605cabdff1aSopenharmony_ci\type\()_h264_qpel8_mc01:
606cabdff1aSopenharmony_ci        lowpass_const   r3
607cabdff1aSopenharmony_ci        mov             r3,  r2
608cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
609cabdff1aSopenharmony_ci        vpush           {d8-d15}
610cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
611cabdff1aSopenharmony_ci        vpop            {d8-d15}
612cabdff1aSopenharmony_ci        pop             {pc}
613cabdff1aSopenharmony_ciendfunc
614cabdff1aSopenharmony_ci
615cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc11_neon, export=1
616cabdff1aSopenharmony_ci        push            {r0, r1, r11, lr}
617cabdff1aSopenharmony_ci\type\()_h264_qpel8_mc11:
618cabdff1aSopenharmony_ci        lowpass_const   r3
619cabdff1aSopenharmony_ci        mov             r11, sp
620cabdff1aSopenharmony_ciA       bic             sp,  sp,  #15
621cabdff1aSopenharmony_ciT       bic             r0,  r11, #15
622cabdff1aSopenharmony_ciT       mov             sp,  r0
623cabdff1aSopenharmony_ci        sub             sp,  sp,  #64
624cabdff1aSopenharmony_ci        mov             r0,  sp
625cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
626cabdff1aSopenharmony_ci        mov             r3,  #8
627cabdff1aSopenharmony_ci        mov             r12, #8
628cabdff1aSopenharmony_ci        vpush           {d8-d15}
629cabdff1aSopenharmony_ci        bl              put_h264_qpel8_h_lowpass_neon
630cabdff1aSopenharmony_ci        ldrd            r0,  r1,  [r11], #8
631cabdff1aSopenharmony_ci        mov             r3,  r2
632cabdff1aSopenharmony_ci        add             r12, sp,  #64
633cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
634cabdff1aSopenharmony_ci        mov             r2,  #8
635cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
636cabdff1aSopenharmony_ci        vpop            {d8-d15}
637cabdff1aSopenharmony_ci        mov             sp,  r11
638cabdff1aSopenharmony_ci        pop             {r11, pc}
639cabdff1aSopenharmony_ciendfunc
640cabdff1aSopenharmony_ci
641cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc21_neon, export=1
642cabdff1aSopenharmony_ci        push            {r0, r1, r4, r10, r11, lr}
643cabdff1aSopenharmony_ci\type\()_h264_qpel8_mc21:
644cabdff1aSopenharmony_ci        lowpass_const   r3
645cabdff1aSopenharmony_ci        mov             r11, sp
646cabdff1aSopenharmony_ciA       bic             sp,  sp,  #15
647cabdff1aSopenharmony_ciT       bic             r0,  r11, #15
648cabdff1aSopenharmony_ciT       mov             sp,  r0
649cabdff1aSopenharmony_ci        sub             sp,  sp,  #(8*8+16*12)
650cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
651cabdff1aSopenharmony_ci        mov             r3,  #8
652cabdff1aSopenharmony_ci        mov             r0,  sp
653cabdff1aSopenharmony_ci        mov             r12, #8
654cabdff1aSopenharmony_ci        vpush           {d8-d15}
655cabdff1aSopenharmony_ci        bl              put_h264_qpel8_h_lowpass_neon
656cabdff1aSopenharmony_ci        mov             r4,  r0
657cabdff1aSopenharmony_ci        ldrd            r0,  r1,  [r11], #8
658cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
659cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
660cabdff1aSopenharmony_ci        mov             r3,  r2
661cabdff1aSopenharmony_ci        sub             r2,  r4,  #64
662cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
663cabdff1aSopenharmony_ci        vpop            {d8-d15}
664cabdff1aSopenharmony_ci        mov             sp,  r11
665cabdff1aSopenharmony_ci        pop             {r4, r10, r11, pc}
666cabdff1aSopenharmony_ciendfunc
667cabdff1aSopenharmony_ci
668cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc31_neon, export=1
669cabdff1aSopenharmony_ci        add             r1,  r1,  #1
670cabdff1aSopenharmony_ci        push            {r0, r1, r11, lr}
671cabdff1aSopenharmony_ci        sub             r1,  r1,  #1
672cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_mc11
673cabdff1aSopenharmony_ciendfunc
674cabdff1aSopenharmony_ci
675cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc02_neon, export=1
676cabdff1aSopenharmony_ci        push            {lr}
677cabdff1aSopenharmony_ci        lowpass_const   r3
678cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
679cabdff1aSopenharmony_ci        mov             r3,  r2
680cabdff1aSopenharmony_ci        vpush           {d8-d15}
681cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_v_lowpass_neon
682cabdff1aSopenharmony_ci        vpop            {d8-d15}
683cabdff1aSopenharmony_ci        pop             {pc}
684cabdff1aSopenharmony_ciendfunc
685cabdff1aSopenharmony_ci
686cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc12_neon, export=1
687cabdff1aSopenharmony_ci        push            {r0, r1, r4, r10, r11, lr}
688cabdff1aSopenharmony_ci\type\()_h264_qpel8_mc12:
689cabdff1aSopenharmony_ci        lowpass_const   r3
690cabdff1aSopenharmony_ci        mov             r11, sp
691cabdff1aSopenharmony_ciA       bic             sp,  sp,  #15
692cabdff1aSopenharmony_ciT       bic             r0,  r11, #15
693cabdff1aSopenharmony_ciT       mov             sp,  r0
694cabdff1aSopenharmony_ci        sub             sp,  sp,  #(8*8+16*12)
695cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
696cabdff1aSopenharmony_ci        mov             r3,  r2
697cabdff1aSopenharmony_ci        mov             r2,  #8
698cabdff1aSopenharmony_ci        mov             r0,  sp
699cabdff1aSopenharmony_ci        vpush           {d8-d15}
700cabdff1aSopenharmony_ci        bl              put_h264_qpel8_v_lowpass_neon
701cabdff1aSopenharmony_ci        mov             r4,  r0
702cabdff1aSopenharmony_ci        ldrd            r0,  r1,  [r11], #8
703cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #1
704cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
705cabdff1aSopenharmony_ci        sub             r2,  r4,  #64
706cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
707cabdff1aSopenharmony_ci        vpop            {d8-d15}
708cabdff1aSopenharmony_ci        mov             sp,  r11
709cabdff1aSopenharmony_ci        pop             {r4, r10, r11, pc}
710cabdff1aSopenharmony_ciendfunc
711cabdff1aSopenharmony_ci
712cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc22_neon, export=1
713cabdff1aSopenharmony_ci        push            {r4, r10, r11, lr}
714cabdff1aSopenharmony_ci        mov             r11, sp
715cabdff1aSopenharmony_ciA       bic             sp,  sp,  #15
716cabdff1aSopenharmony_ciT       bic             r4,  r11, #15
717cabdff1aSopenharmony_ciT       mov             sp,  r4
718cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
719cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
720cabdff1aSopenharmony_ci        mov             r3,  r2
721cabdff1aSopenharmony_ci        sub             sp,  sp,  #(16*12)
722cabdff1aSopenharmony_ci        mov             r4,  sp
723cabdff1aSopenharmony_ci        vpush           {d8-d15}
724cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel8_hv_lowpass_neon
725cabdff1aSopenharmony_ci        vpop            {d8-d15}
726cabdff1aSopenharmony_ci        mov             sp,  r11
727cabdff1aSopenharmony_ci        pop             {r4, r10, r11, pc}
728cabdff1aSopenharmony_ciendfunc
729cabdff1aSopenharmony_ci
730cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc32_neon, export=1
731cabdff1aSopenharmony_ci        push            {r0, r1, r4, r10, r11, lr}
732cabdff1aSopenharmony_ci        add             r1,  r1,  #1
733cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_mc12
734cabdff1aSopenharmony_ciendfunc
735cabdff1aSopenharmony_ci
736cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc03_neon, export=1
737cabdff1aSopenharmony_ci        push            {lr}
738cabdff1aSopenharmony_ci        add             r12, r1,  r2
739cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_mc01
740cabdff1aSopenharmony_ciendfunc
741cabdff1aSopenharmony_ci
742cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc13_neon, export=1
743cabdff1aSopenharmony_ci        push            {r0, r1, r11, lr}
744cabdff1aSopenharmony_ci        add             r1,  r1,  r2
745cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_mc11
746cabdff1aSopenharmony_ciendfunc
747cabdff1aSopenharmony_ci
748cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc23_neon, export=1
749cabdff1aSopenharmony_ci        push            {r0, r1, r4, r10, r11, lr}
750cabdff1aSopenharmony_ci        add             r1,  r1,  r2
751cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_mc21
752cabdff1aSopenharmony_ciendfunc
753cabdff1aSopenharmony_ci
754cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel8_mc33_neon, export=1
755cabdff1aSopenharmony_ci        add             r1,  r1,  #1
756cabdff1aSopenharmony_ci        push            {r0, r1, r11, lr}
757cabdff1aSopenharmony_ci        add             r1,  r1,  r2
758cabdff1aSopenharmony_ci        sub             r1,  r1,  #1
759cabdff1aSopenharmony_ci        b               \type\()_h264_qpel8_mc11
760cabdff1aSopenharmony_ciendfunc
761cabdff1aSopenharmony_ci.endm
762cabdff1aSopenharmony_ci
763cabdff1aSopenharmony_ci        h264_qpel8 put
764cabdff1aSopenharmony_ci        h264_qpel8 avg
765cabdff1aSopenharmony_ci
766cabdff1aSopenharmony_ci.macro  h264_qpel16     type
767cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc10_neon, export=1
768cabdff1aSopenharmony_ci        lowpass_const   r3
769cabdff1aSopenharmony_ci        mov             r3,  r1
770cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
771cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_h_lowpass_l2_neon
772cabdff1aSopenharmony_ciendfunc
773cabdff1aSopenharmony_ci
774cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc20_neon, export=1
775cabdff1aSopenharmony_ci        lowpass_const   r3
776cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
777cabdff1aSopenharmony_ci        mov             r3,  r2
778cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_h_lowpass_neon
779cabdff1aSopenharmony_ciendfunc
780cabdff1aSopenharmony_ci
781cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc30_neon, export=1
782cabdff1aSopenharmony_ci        lowpass_const   r3
783cabdff1aSopenharmony_ci        add             r3,  r1,  #1
784cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
785cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_h_lowpass_l2_neon
786cabdff1aSopenharmony_ciendfunc
787cabdff1aSopenharmony_ci
788cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc01_neon, export=1
789cabdff1aSopenharmony_ci        push            {r4, lr}
790cabdff1aSopenharmony_ci        mov             r12, r1
791cabdff1aSopenharmony_ci\type\()_h264_qpel16_mc01:
792cabdff1aSopenharmony_ci        lowpass_const   r3
793cabdff1aSopenharmony_ci        mov             r3,  r2
794cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
795cabdff1aSopenharmony_ci        vpush           {d8-d15}
796cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel16_v_lowpass_l2_neon
797cabdff1aSopenharmony_ci        vpop            {d8-d15}
798cabdff1aSopenharmony_ci        pop             {r4, pc}
799cabdff1aSopenharmony_ciendfunc
800cabdff1aSopenharmony_ci
801cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc11_neon, export=1
802cabdff1aSopenharmony_ci        push            {r0, r1, r4, r11, lr}
803cabdff1aSopenharmony_ci\type\()_h264_qpel16_mc11:
804cabdff1aSopenharmony_ci        lowpass_const   r3
805cabdff1aSopenharmony_ci        mov             r11, sp
806cabdff1aSopenharmony_ciA       bic             sp,  sp,  #15
807cabdff1aSopenharmony_ciT       bic             r0,  r11, #15
808cabdff1aSopenharmony_ciT       mov             sp,  r0
809cabdff1aSopenharmony_ci        sub             sp,  sp,  #256
810cabdff1aSopenharmony_ci        mov             r0,  sp
811cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
812cabdff1aSopenharmony_ci        mov             r3,  #16
813cabdff1aSopenharmony_ci        vpush           {d8-d15}
814cabdff1aSopenharmony_ci        bl              put_h264_qpel16_h_lowpass_neon
815cabdff1aSopenharmony_ci        ldrd            r0,  r1,  [r11], #8
816cabdff1aSopenharmony_ci        mov             r3,  r2
817cabdff1aSopenharmony_ci        add             r12, sp,  #64
818cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
819cabdff1aSopenharmony_ci        mov             r2,  #16
820cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel16_v_lowpass_l2_neon
821cabdff1aSopenharmony_ci        vpop            {d8-d15}
822cabdff1aSopenharmony_ci        mov             sp,  r11
823cabdff1aSopenharmony_ci        pop             {r4, r11, pc}
824cabdff1aSopenharmony_ciendfunc
825cabdff1aSopenharmony_ci
826cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc21_neon, export=1
827cabdff1aSopenharmony_ci        push            {r0, r1, r4-r5, r9-r11, lr}
828cabdff1aSopenharmony_ci\type\()_h264_qpel16_mc21:
829cabdff1aSopenharmony_ci        lowpass_const   r3
830cabdff1aSopenharmony_ci        mov             r11, sp
831cabdff1aSopenharmony_ciA       bic             sp,  sp,  #15
832cabdff1aSopenharmony_ciT       bic             r0,  r11, #15
833cabdff1aSopenharmony_ciT       mov             sp,  r0
834cabdff1aSopenharmony_ci        sub             sp,  sp,  #(16*16+16*12)
835cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
836cabdff1aSopenharmony_ci        mov             r0,  sp
837cabdff1aSopenharmony_ci        vpush           {d8-d15}
838cabdff1aSopenharmony_ci        bl              put_h264_qpel16_h_lowpass_neon_packed
839cabdff1aSopenharmony_ci        mov             r4,  r0
840cabdff1aSopenharmony_ci        ldrd            r0,  r1,  [r11], #8
841cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
842cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
843cabdff1aSopenharmony_ci        mov             r3,  r2
844cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel16_hv_lowpass_l2_neon
845cabdff1aSopenharmony_ci        vpop            {d8-d15}
846cabdff1aSopenharmony_ci        mov             sp,  r11
847cabdff1aSopenharmony_ci        pop             {r4-r5, r9-r11, pc}
848cabdff1aSopenharmony_ciendfunc
849cabdff1aSopenharmony_ci
850cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc31_neon, export=1
851cabdff1aSopenharmony_ci        add             r1,  r1,  #1
852cabdff1aSopenharmony_ci        push            {r0, r1, r4, r11, lr}
853cabdff1aSopenharmony_ci        sub             r1,  r1,  #1
854cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_mc11
855cabdff1aSopenharmony_ciendfunc
856cabdff1aSopenharmony_ci
857cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc02_neon, export=1
858cabdff1aSopenharmony_ci        push            {r4, lr}
859cabdff1aSopenharmony_ci        lowpass_const   r3
860cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
861cabdff1aSopenharmony_ci        mov             r3,  r2
862cabdff1aSopenharmony_ci        vpush           {d8-d15}
863cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel16_v_lowpass_neon
864cabdff1aSopenharmony_ci        vpop            {d8-d15}
865cabdff1aSopenharmony_ci        pop             {r4, pc}
866cabdff1aSopenharmony_ciendfunc
867cabdff1aSopenharmony_ci
868cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc12_neon, export=1
869cabdff1aSopenharmony_ci        push            {r0, r1, r4-r5, r9-r11, lr}
870cabdff1aSopenharmony_ci\type\()_h264_qpel16_mc12:
871cabdff1aSopenharmony_ci        lowpass_const   r3
872cabdff1aSopenharmony_ci        mov             r11, sp
873cabdff1aSopenharmony_ciA       bic             sp,  sp,  #15
874cabdff1aSopenharmony_ciT       bic             r0,  r11, #15
875cabdff1aSopenharmony_ciT       mov             sp,  r0
876cabdff1aSopenharmony_ci        sub             sp,  sp,  #(16*16+16*12)
877cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
878cabdff1aSopenharmony_ci        mov             r0,  sp
879cabdff1aSopenharmony_ci        mov             r3,  r2
880cabdff1aSopenharmony_ci        vpush           {d8-d15}
881cabdff1aSopenharmony_ci        bl              put_h264_qpel16_v_lowpass_neon_packed
882cabdff1aSopenharmony_ci        mov             r4,  r0
883cabdff1aSopenharmony_ci        ldrd            r0,  r1,  [r11], #8
884cabdff1aSopenharmony_ci        sub             r1,  r1,  r3, lsl #1
885cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
886cabdff1aSopenharmony_ci        mov             r2,  r3
887cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel16_hv_lowpass_l2_neon
888cabdff1aSopenharmony_ci        vpop            {d8-d15}
889cabdff1aSopenharmony_ci        mov             sp,  r11
890cabdff1aSopenharmony_ci        pop             {r4-r5, r9-r11, pc}
891cabdff1aSopenharmony_ciendfunc
892cabdff1aSopenharmony_ci
893cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc22_neon, export=1
894cabdff1aSopenharmony_ci        push            {r4, r9-r11, lr}
895cabdff1aSopenharmony_ci        lowpass_const   r3
896cabdff1aSopenharmony_ci        mov             r11, sp
897cabdff1aSopenharmony_ciA       bic             sp,  sp,  #15
898cabdff1aSopenharmony_ciT       bic             r4,  r11, #15
899cabdff1aSopenharmony_ciT       mov             sp,  r4
900cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #1
901cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
902cabdff1aSopenharmony_ci        mov             r3,  r2
903cabdff1aSopenharmony_ci        sub             sp,  sp,  #(16*12)
904cabdff1aSopenharmony_ci        mov             r4,  sp
905cabdff1aSopenharmony_ci        vpush           {d8-d15}
906cabdff1aSopenharmony_ci        bl              \type\()_h264_qpel16_hv_lowpass_neon
907cabdff1aSopenharmony_ci        vpop            {d8-d15}
908cabdff1aSopenharmony_ci        mov             sp,  r11
909cabdff1aSopenharmony_ci        pop             {r4, r9-r11, pc}
910cabdff1aSopenharmony_ciendfunc
911cabdff1aSopenharmony_ci
912cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc32_neon, export=1
913cabdff1aSopenharmony_ci        push            {r0, r1, r4-r5, r9-r11, lr}
914cabdff1aSopenharmony_ci        add             r1,  r1,  #1
915cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_mc12
916cabdff1aSopenharmony_ciendfunc
917cabdff1aSopenharmony_ci
918cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc03_neon, export=1
919cabdff1aSopenharmony_ci        push            {r4, lr}
920cabdff1aSopenharmony_ci        add             r12, r1,  r2
921cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_mc01
922cabdff1aSopenharmony_ciendfunc
923cabdff1aSopenharmony_ci
924cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc13_neon, export=1
925cabdff1aSopenharmony_ci        push            {r0, r1, r4, r11, lr}
926cabdff1aSopenharmony_ci        add             r1,  r1,  r2
927cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_mc11
928cabdff1aSopenharmony_ciendfunc
929cabdff1aSopenharmony_ci
930cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc23_neon, export=1
931cabdff1aSopenharmony_ci        push            {r0, r1, r4-r5, r9-r11, lr}
932cabdff1aSopenharmony_ci        add             r1,  r1,  r2
933cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_mc21
934cabdff1aSopenharmony_ciendfunc
935cabdff1aSopenharmony_ci
936cabdff1aSopenharmony_cifunction ff_\type\()_h264_qpel16_mc33_neon, export=1
937cabdff1aSopenharmony_ci        add             r1,  r1,  #1
938cabdff1aSopenharmony_ci        push            {r0, r1, r4, r11, lr}
939cabdff1aSopenharmony_ci        add             r1,  r1,  r2
940cabdff1aSopenharmony_ci        sub             r1,  r1,  #1
941cabdff1aSopenharmony_ci        b               \type\()_h264_qpel16_mc11
942cabdff1aSopenharmony_ciendfunc
943cabdff1aSopenharmony_ci.endm
944cabdff1aSopenharmony_ci
945cabdff1aSopenharmony_ci        h264_qpel16 put
946cabdff1aSopenharmony_ci        h264_qpel16 avg
947