1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * VP8 NEON optimisations
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * Copyright (c) 2010 Rob Clark <rob@ti.com>
5cabdff1aSopenharmony_ci * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * This file is part of FFmpeg.
8cabdff1aSopenharmony_ci *
9cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
10cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
11cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
12cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
13cabdff1aSopenharmony_ci *
14cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
15cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
16cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17cabdff1aSopenharmony_ci * Lesser General Public License for more details.
18cabdff1aSopenharmony_ci *
19cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
20cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
21cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22cabdff1aSopenharmony_ci */
23cabdff1aSopenharmony_ci
24cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
25cabdff1aSopenharmony_ci#include "neon.S"
26cabdff1aSopenharmony_ci
27cabdff1aSopenharmony_cifunction ff_vp8_luma_dc_wht_neon, export=1
28cabdff1aSopenharmony_ci        vld1.16         {q0-q1},  [r1,:128]
29cabdff1aSopenharmony_ci        vmov.i16        q15, #0
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_ci        vadd.i16        d4,  d0,  d3
32cabdff1aSopenharmony_ci        vadd.i16        d6,  d1,  d2
33cabdff1aSopenharmony_ci        vst1.16         {q15},    [r1,:128]!
34cabdff1aSopenharmony_ci        vsub.i16        d7,  d1,  d2
35cabdff1aSopenharmony_ci        vsub.i16        d5,  d0,  d3
36cabdff1aSopenharmony_ci        vst1.16         {q15},    [r1,:128]
37cabdff1aSopenharmony_ci        vadd.i16        q0,  q2,  q3
38cabdff1aSopenharmony_ci        vsub.i16        q1,  q2,  q3
39cabdff1aSopenharmony_ci
40cabdff1aSopenharmony_ci        vmov.i16        q8, #3
41cabdff1aSopenharmony_ci
42cabdff1aSopenharmony_ci        vtrn.32         d0,  d2
43cabdff1aSopenharmony_ci        vtrn.32         d1,  d3
44cabdff1aSopenharmony_ci        vtrn.16         d0,  d1
45cabdff1aSopenharmony_ci        vtrn.16         d2,  d3
46cabdff1aSopenharmony_ci
47cabdff1aSopenharmony_ci        vadd.i16        d0,  d0,  d16
48cabdff1aSopenharmony_ci
49cabdff1aSopenharmony_ci        vadd.i16        d4,  d0,  d3
50cabdff1aSopenharmony_ci        vadd.i16        d6,  d1,  d2
51cabdff1aSopenharmony_ci        vsub.i16        d7,  d1,  d2
52cabdff1aSopenharmony_ci        vsub.i16        d5,  d0,  d3
53cabdff1aSopenharmony_ci        vadd.i16        q0,  q2,  q3
54cabdff1aSopenharmony_ci        vsub.i16        q1,  q2,  q3
55cabdff1aSopenharmony_ci
56cabdff1aSopenharmony_ci        vshr.s16        q0,  q0,  #3
57cabdff1aSopenharmony_ci        vshr.s16        q1,  q1,  #3
58cabdff1aSopenharmony_ci
59cabdff1aSopenharmony_ci        mov             r3,  #32
60cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r0,:16], r3
61cabdff1aSopenharmony_ci        vst1.16         {d1[0]},  [r0,:16], r3
62cabdff1aSopenharmony_ci        vst1.16         {d2[0]},  [r0,:16], r3
63cabdff1aSopenharmony_ci        vst1.16         {d3[0]},  [r0,:16], r3
64cabdff1aSopenharmony_ci        vst1.16         {d0[1]},  [r0,:16], r3
65cabdff1aSopenharmony_ci        vst1.16         {d1[1]},  [r0,:16], r3
66cabdff1aSopenharmony_ci        vst1.16         {d2[1]},  [r0,:16], r3
67cabdff1aSopenharmony_ci        vst1.16         {d3[1]},  [r0,:16], r3
68cabdff1aSopenharmony_ci        vst1.16         {d0[2]},  [r0,:16], r3
69cabdff1aSopenharmony_ci        vst1.16         {d1[2]},  [r0,:16], r3
70cabdff1aSopenharmony_ci        vst1.16         {d2[2]},  [r0,:16], r3
71cabdff1aSopenharmony_ci        vst1.16         {d3[2]},  [r0,:16], r3
72cabdff1aSopenharmony_ci        vst1.16         {d0[3]},  [r0,:16], r3
73cabdff1aSopenharmony_ci        vst1.16         {d1[3]},  [r0,:16], r3
74cabdff1aSopenharmony_ci        vst1.16         {d2[3]},  [r0,:16], r3
75cabdff1aSopenharmony_ci        vst1.16         {d3[3]},  [r0,:16], r3
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_ci        bx              lr
78cabdff1aSopenharmony_ciendfunc
79cabdff1aSopenharmony_ci
80cabdff1aSopenharmony_cifunction ff_vp8_idct_add_neon, export=1
81cabdff1aSopenharmony_ci        vld1.16         {q0-q1},  [r1,:128]
82cabdff1aSopenharmony_ci        movw            r3,  #20091
83cabdff1aSopenharmony_ci        movt            r3,  #35468/2
84cabdff1aSopenharmony_ci        vdup.32         d4,  r3
85cabdff1aSopenharmony_ci
86cabdff1aSopenharmony_ci        vmull.s16       q12, d1,  d4[0]
87cabdff1aSopenharmony_ci        vmull.s16       q13, d3,  d4[0]
88cabdff1aSopenharmony_ci        vqdmulh.s16     d20, d1,  d4[1]
89cabdff1aSopenharmony_ci        vqdmulh.s16     d23, d3,  d4[1]
90cabdff1aSopenharmony_ci        vshrn.s32       d21, q12, #16
91cabdff1aSopenharmony_ci        vshrn.s32       d22, q13, #16
92cabdff1aSopenharmony_ci        vadd.s16        d21, d21, d1
93cabdff1aSopenharmony_ci        vadd.s16        d22, d22, d3
94cabdff1aSopenharmony_ci
95cabdff1aSopenharmony_ci        vadd.s16        d16, d0,  d2
96cabdff1aSopenharmony_ci        vsub.s16        d17, d0,  d2
97cabdff1aSopenharmony_ci        vadd.s16        d18, d21, d23
98cabdff1aSopenharmony_ci        vsub.s16        d19, d20, d22
99cabdff1aSopenharmony_ci        vadd.s16        q0,  q8,  q9
100cabdff1aSopenharmony_ci        vsub.s16        q1,  q8,  q9
101cabdff1aSopenharmony_ci
102cabdff1aSopenharmony_ci        vtrn.32         d0,  d3
103cabdff1aSopenharmony_ci        vtrn.32         d1,  d2
104cabdff1aSopenharmony_ci        vtrn.16         d0,  d1
105cabdff1aSopenharmony_ci        vtrn.16         d3,  d2
106cabdff1aSopenharmony_ci
107cabdff1aSopenharmony_ci        vmov.i16        q15, #0
108cabdff1aSopenharmony_ci        vmull.s16       q12, d1,  d4[0]
109cabdff1aSopenharmony_ci        vst1.16         {q15},    [r1,:128]!
110cabdff1aSopenharmony_ci        vmull.s16       q13, d2,  d4[0]
111cabdff1aSopenharmony_ci        vst1.16         {q15},    [r1,:128]
112cabdff1aSopenharmony_ci        vqdmulh.s16     d21, d1,  d4[1]
113cabdff1aSopenharmony_ci        vqdmulh.s16     d23, d2,  d4[1]
114cabdff1aSopenharmony_ci        vshrn.s32       d20, q12, #16
115cabdff1aSopenharmony_ci        vshrn.s32       d22, q13, #16
116cabdff1aSopenharmony_ci        vadd.i16        d20, d20, d1
117cabdff1aSopenharmony_ci        vadd.i16        d22, d22, d2
118cabdff1aSopenharmony_ci
119cabdff1aSopenharmony_ci        vadd.i16        d16, d0,  d3
120cabdff1aSopenharmony_ci        vsub.i16        d17, d0,  d3
121cabdff1aSopenharmony_ci        vadd.i16        d18, d20, d23
122cabdff1aSopenharmony_ci        vld1.32         {d20[]},  [r0,:32], r2
123cabdff1aSopenharmony_ci        vsub.i16        d19, d21, d22
124cabdff1aSopenharmony_ci        vld1.32         {d22[]},  [r0,:32], r2
125cabdff1aSopenharmony_ci        vadd.s16        q0,  q8,  q9
126cabdff1aSopenharmony_ci        vld1.32         {d23[]},  [r0,:32], r2
127cabdff1aSopenharmony_ci        vsub.s16        q1,  q8,  q9
128cabdff1aSopenharmony_ci        vld1.32         {d21[]},  [r0,:32], r2
129cabdff1aSopenharmony_ci        vrshr.s16       q0,  q0,  #3
130cabdff1aSopenharmony_ci        vtrn.32         q10, q11
131cabdff1aSopenharmony_ci        vrshr.s16       q1,  q1,  #3
132cabdff1aSopenharmony_ci
133cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #2
134cabdff1aSopenharmony_ci
135cabdff1aSopenharmony_ci        vtrn.32         d0,  d3
136cabdff1aSopenharmony_ci        vtrn.32         d1,  d2
137cabdff1aSopenharmony_ci        vtrn.16         d0,  d1
138cabdff1aSopenharmony_ci        vtrn.16         d3,  d2
139cabdff1aSopenharmony_ci
140cabdff1aSopenharmony_ci        vaddw.u8        q0,  q0,  d20
141cabdff1aSopenharmony_ci        vaddw.u8        q1,  q1,  d21
142cabdff1aSopenharmony_ci        vqmovun.s16     d0,  q0
143cabdff1aSopenharmony_ci        vqmovun.s16     d1,  q1
144cabdff1aSopenharmony_ci
145cabdff1aSopenharmony_ci        vst1.32         {d0[0]},  [r0,:32], r2
146cabdff1aSopenharmony_ci        vst1.32         {d0[1]},  [r0,:32], r2
147cabdff1aSopenharmony_ci        vst1.32         {d1[1]},  [r0,:32], r2
148cabdff1aSopenharmony_ci        vst1.32         {d1[0]},  [r0,:32], r2
149cabdff1aSopenharmony_ci
150cabdff1aSopenharmony_ci        bx              lr
151cabdff1aSopenharmony_ciendfunc
152cabdff1aSopenharmony_ci
153cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add_neon, export=1
154cabdff1aSopenharmony_ci        mov             r3,  #0
155cabdff1aSopenharmony_ci        ldrsh           r12, [r1]
156cabdff1aSopenharmony_ci        strh            r3,  [r1]
157cabdff1aSopenharmony_ci        vdup.16         q1,  r12
158cabdff1aSopenharmony_ci        vrshr.s16       q1,  q1,  #3
159cabdff1aSopenharmony_ci        vld1.32         {d0[]},   [r0,:32], r2
160cabdff1aSopenharmony_ci        vld1.32         {d1[]},   [r0,:32], r2
161cabdff1aSopenharmony_ci        vld1.32         {d0[1]},  [r0,:32], r2
162cabdff1aSopenharmony_ci        vld1.32         {d1[1]},  [r0,:32], r2
163cabdff1aSopenharmony_ci        vaddw.u8        q2,  q1,  d0
164cabdff1aSopenharmony_ci        vaddw.u8        q3,  q1,  d1
165cabdff1aSopenharmony_ci        sub             r0,  r0,  r2, lsl #2
166cabdff1aSopenharmony_ci        vqmovun.s16     d0,  q2
167cabdff1aSopenharmony_ci        vqmovun.s16     d1,  q3
168cabdff1aSopenharmony_ci        vst1.32         {d0[0]},  [r0,:32], r2
169cabdff1aSopenharmony_ci        vst1.32         {d1[0]},  [r0,:32], r2
170cabdff1aSopenharmony_ci        vst1.32         {d0[1]},  [r0,:32], r2
171cabdff1aSopenharmony_ci        vst1.32         {d1[1]},  [r0,:32], r2
172cabdff1aSopenharmony_ci        bx              lr
173cabdff1aSopenharmony_ciendfunc
174cabdff1aSopenharmony_ci
175cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add4uv_neon, export=1
176cabdff1aSopenharmony_ci        vmov.i16        d0,  #0
177cabdff1aSopenharmony_ci        mov             r3,  #32
178cabdff1aSopenharmony_ci        vld1.16         {d16[]},  [r1,:16]
179cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r1,:16], r3
180cabdff1aSopenharmony_ci        vld1.16         {d17[]},  [r1,:16]
181cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r1,:16], r3
182cabdff1aSopenharmony_ci        vld1.16         {d18[]},  [r1,:16]
183cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r1,:16], r3
184cabdff1aSopenharmony_ci        vld1.16         {d19[]},  [r1,:16]
185cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r1,:16], r3
186cabdff1aSopenharmony_ci        mov             r3,  r0
187cabdff1aSopenharmony_ci        vrshr.s16       q8,  q8,  #3            @ dc >>= 3
188cabdff1aSopenharmony_ci        vld1.8          {d0},     [r0,:64], r2
189cabdff1aSopenharmony_ci        vrshr.s16       q9,  q9,  #3
190cabdff1aSopenharmony_ci        vld1.8          {d1},     [r0,:64], r2
191cabdff1aSopenharmony_ci        vaddw.u8        q10, q8,  d0
192cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0,:64], r2
193cabdff1aSopenharmony_ci        vaddw.u8        q0,  q8,  d1
194cabdff1aSopenharmony_ci        vld1.8          {d3},     [r0,:64], r2
195cabdff1aSopenharmony_ci        vaddw.u8        q11, q8,  d2
196cabdff1aSopenharmony_ci        vld1.8          {d4},     [r0,:64], r2
197cabdff1aSopenharmony_ci        vaddw.u8        q1,  q8,  d3
198cabdff1aSopenharmony_ci        vld1.8          {d5},     [r0,:64], r2
199cabdff1aSopenharmony_ci        vaddw.u8        q12, q9,  d4
200cabdff1aSopenharmony_ci        vld1.8          {d6},     [r0,:64], r2
201cabdff1aSopenharmony_ci        vaddw.u8        q2,  q9,  d5
202cabdff1aSopenharmony_ci        vld1.8          {d7},     [r0,:64], r2
203cabdff1aSopenharmony_ci        vaddw.u8        q13, q9,  d6
204cabdff1aSopenharmony_ci        vqmovun.s16     d20, q10
205cabdff1aSopenharmony_ci        vaddw.u8        q3,  q9,  d7
206cabdff1aSopenharmony_ci        vqmovun.s16     d21, q0
207cabdff1aSopenharmony_ci        vqmovun.s16     d22, q11
208cabdff1aSopenharmony_ci        vst1.8          {d20},    [r3,:64], r2
209cabdff1aSopenharmony_ci        vqmovun.s16     d23, q1
210cabdff1aSopenharmony_ci        vst1.8          {d21},    [r3,:64], r2
211cabdff1aSopenharmony_ci        vqmovun.s16     d24, q12
212cabdff1aSopenharmony_ci        vst1.8          {d22},    [r3,:64], r2
213cabdff1aSopenharmony_ci        vqmovun.s16     d25, q2
214cabdff1aSopenharmony_ci        vst1.8          {d23},    [r3,:64], r2
215cabdff1aSopenharmony_ci        vqmovun.s16     d26, q13
216cabdff1aSopenharmony_ci        vst1.8          {d24},    [r3,:64], r2
217cabdff1aSopenharmony_ci        vqmovun.s16     d27, q3
218cabdff1aSopenharmony_ci        vst1.8          {d25},    [r3,:64], r2
219cabdff1aSopenharmony_ci        vst1.8          {d26},    [r3,:64], r2
220cabdff1aSopenharmony_ci        vst1.8          {d27},    [r3,:64], r2
221cabdff1aSopenharmony_ci
222cabdff1aSopenharmony_ci        bx              lr
223cabdff1aSopenharmony_ciendfunc
224cabdff1aSopenharmony_ci
225cabdff1aSopenharmony_cifunction ff_vp8_idct_dc_add4y_neon, export=1
226cabdff1aSopenharmony_ci        vmov.i16        d0,  #0
227cabdff1aSopenharmony_ci        mov             r3,  #32
228cabdff1aSopenharmony_ci        vld1.16         {d16[]},  [r1,:16]
229cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r1,:16], r3
230cabdff1aSopenharmony_ci        vld1.16         {d17[]},  [r1,:16]
231cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r1,:16], r3
232cabdff1aSopenharmony_ci        vld1.16         {d18[]},  [r1,:16]
233cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r1,:16], r3
234cabdff1aSopenharmony_ci        vld1.16         {d19[]},  [r1,:16]
235cabdff1aSopenharmony_ci        vst1.16         {d0[0]},  [r1,:16], r3
236cabdff1aSopenharmony_ci        vrshr.s16       q8,  q8,  #3            @ dc >>= 3
237cabdff1aSopenharmony_ci        vld1.8          {q0},     [r0,:128], r2
238cabdff1aSopenharmony_ci        vrshr.s16       q9,  q9,  #3
239cabdff1aSopenharmony_ci        vld1.8          {q1},     [r0,:128], r2
240cabdff1aSopenharmony_ci        vaddw.u8        q10, q8,  d0
241cabdff1aSopenharmony_ci        vld1.8          {q2},     [r0,:128], r2
242cabdff1aSopenharmony_ci        vaddw.u8        q0,  q9,  d1
243cabdff1aSopenharmony_ci        vld1.8          {q3},     [r0,:128], r2
244cabdff1aSopenharmony_ci        vaddw.u8        q11, q8,  d2
245cabdff1aSopenharmony_ci        vaddw.u8        q1,  q9,  d3
246cabdff1aSopenharmony_ci        vaddw.u8        q12, q8,  d4
247cabdff1aSopenharmony_ci        vaddw.u8        q2,  q9,  d5
248cabdff1aSopenharmony_ci        vaddw.u8        q13, q8,  d6
249cabdff1aSopenharmony_ci        vaddw.u8        q3,  q9,  d7
250cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #2
251cabdff1aSopenharmony_ci        vqmovun.s16     d20, q10
252cabdff1aSopenharmony_ci        vqmovun.s16     d21, q0
253cabdff1aSopenharmony_ci        vqmovun.s16     d22, q11
254cabdff1aSopenharmony_ci        vqmovun.s16     d23, q1
255cabdff1aSopenharmony_ci        vqmovun.s16     d24, q12
256cabdff1aSopenharmony_ci        vst1.8          {q10},    [r0,:128], r2
257cabdff1aSopenharmony_ci        vqmovun.s16     d25, q2
258cabdff1aSopenharmony_ci        vst1.8          {q11},    [r0,:128], r2
259cabdff1aSopenharmony_ci        vqmovun.s16     d26, q13
260cabdff1aSopenharmony_ci        vst1.8          {q12},    [r0,:128], r2
261cabdff1aSopenharmony_ci        vqmovun.s16     d27, q3
262cabdff1aSopenharmony_ci        vst1.8          {q13},    [r0,:128], r2
263cabdff1aSopenharmony_ci
264cabdff1aSopenharmony_ci        bx              lr
265cabdff1aSopenharmony_ciendfunc
266cabdff1aSopenharmony_ci
267cabdff1aSopenharmony_ci@ Register layout:
268cabdff1aSopenharmony_ci@   P3..Q3 -> q0..q7
269cabdff1aSopenharmony_ci@   flim_E -> q14
270cabdff1aSopenharmony_ci@   flim_I -> q15
271cabdff1aSopenharmony_ci@   hev_thresh -> r12
272cabdff1aSopenharmony_ci@
273cabdff1aSopenharmony_ci.macro  vp8_loop_filter, inner=0, simple=0
274cabdff1aSopenharmony_ci    .if \simple
275cabdff1aSopenharmony_ci        vabd.u8         q9,  q3,  q4            @ abs(P0-Q0)
276cabdff1aSopenharmony_ci        vabd.u8         q15, q2,  q5            @ abs(P1-Q1)
277cabdff1aSopenharmony_ci        vqadd.u8        q9,  q9,  q9            @ abs(P0-Q0) * 2
278cabdff1aSopenharmony_ci        vshr.u8         q10, q15, #1            @ abs(P1-Q1) / 2
279cabdff1aSopenharmony_ci        vqadd.u8        q11, q9,  q10           @ (abs(P0-Q0)*2) + (abs(P1-Q1)/2)
280cabdff1aSopenharmony_ci        vmov.i8         q13, #0x80
281cabdff1aSopenharmony_ci        vcle.u8         q8,  q11, q14           @ (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim
282cabdff1aSopenharmony_ci    .else
283cabdff1aSopenharmony_ci        @ calculate hev and normal_limit:
284cabdff1aSopenharmony_ci        vabd.u8         q12, q2,  q3            @ abs(P1-P0)
285cabdff1aSopenharmony_ci        vabd.u8         q13, q5,  q4            @ abs(Q1-Q0)
286cabdff1aSopenharmony_ci        vabd.u8         q10, q0,  q1            @ abs(P3-P2)
287cabdff1aSopenharmony_ci        vabd.u8         q11, q1,  q2            @ abs(P2-P1)
288cabdff1aSopenharmony_ci        vcle.u8         q8,  q12, q15           @ abs(P1-P0) <= flim_I
289cabdff1aSopenharmony_ci        vcle.u8         q9,  q13, q15           @ abs(Q1-Q0) <= flim_I
290cabdff1aSopenharmony_ci        vcle.u8         q10, q10, q15           @ abs(P3-P2) <= flim_I
291cabdff1aSopenharmony_ci        vcle.u8         q11, q11, q15           @ abs(P2-P1) <= flim_I
292cabdff1aSopenharmony_ci        vand            q8,  q8,  q9
293cabdff1aSopenharmony_ci        vabd.u8         q9,  q7,  q6            @ abs(Q3-Q2)
294cabdff1aSopenharmony_ci        vand            q8,  q8,  q11
295cabdff1aSopenharmony_ci        vabd.u8         q11, q6,  q5            @ abs(Q2-Q1)
296cabdff1aSopenharmony_ci        vand            q8,  q8,  q10
297cabdff1aSopenharmony_ci        vcle.u8         q10, q9,  q15           @ abs(Q3-Q2) <= flim_I
298cabdff1aSopenharmony_ci        vcle.u8         q11, q11, q15           @ abs(Q2-Q1) <= flim_I
299cabdff1aSopenharmony_ci        vabd.u8         q9,  q3,  q4            @ abs(P0-Q0)
300cabdff1aSopenharmony_ci        vabd.u8         q15, q2,  q5            @ abs(P1-Q1)
301cabdff1aSopenharmony_ci        vand            q8,  q8,  q10
302cabdff1aSopenharmony_ci        vqadd.u8        q9,  q9,  q9            @ abs(P0-Q0) * 2
303cabdff1aSopenharmony_ci        vand            q8,  q8,  q11
304cabdff1aSopenharmony_ci        vshr.u8         q10, q15, #1            @ abs(P1-Q1) / 2
305cabdff1aSopenharmony_ci        vdup.8          q15, r12                @ hev_thresh
306cabdff1aSopenharmony_ci        vqadd.u8        q11, q9,  q10           @ (abs(P0-Q0)*2) + (abs(P1-Q1)/2)
307cabdff1aSopenharmony_ci        vcgt.u8         q12, q12, q15           @ abs(P1-P0) > hev_thresh
308cabdff1aSopenharmony_ci        vcle.u8         q11, q11, q14           @ (abs(P0-Q0)*2) + (abs(P1-Q1)/2) <= flim_E
309cabdff1aSopenharmony_ci        vcgt.u8         q14, q13, q15           @ abs(Q1-Q0) > hev_thresh
310cabdff1aSopenharmony_ci        vand            q8,  q8,  q11
311cabdff1aSopenharmony_ci        vmov.i8         q13, #0x80
312cabdff1aSopenharmony_ci        vorr            q9,  q12, q14
313cabdff1aSopenharmony_ci    .endif
314cabdff1aSopenharmony_ci
315cabdff1aSopenharmony_ci        @ at this point:
316cabdff1aSopenharmony_ci        @   q8: normal_limit
317cabdff1aSopenharmony_ci        @   q9: hev
318cabdff1aSopenharmony_ci
319cabdff1aSopenharmony_ci        @ convert to signed value:
320cabdff1aSopenharmony_ci        veor            q3,  q3,  q13           @ PS0 = P0 ^ 0x80
321cabdff1aSopenharmony_ci        veor            q4,  q4,  q13           @ QS0 = Q0 ^ 0x80
322cabdff1aSopenharmony_ci
323cabdff1aSopenharmony_ci        vmov.i16        q12, #3
324cabdff1aSopenharmony_ci        vsubl.s8        q10, d8,  d6            @ QS0 - PS0
325cabdff1aSopenharmony_ci        vsubl.s8        q11, d9,  d7            @   (widened to 16 bits)
326cabdff1aSopenharmony_ci        veor            q2,  q2,  q13           @ PS1 = P1 ^ 0x80
327cabdff1aSopenharmony_ci        veor            q5,  q5,  q13           @ QS1 = Q1 ^ 0x80
328cabdff1aSopenharmony_ci        vmul.i16        q10, q10, q12           @ w = 3 * (QS0 - PS0)
329cabdff1aSopenharmony_ci        vmul.i16        q11, q11, q12
330cabdff1aSopenharmony_ci
331cabdff1aSopenharmony_ci        vqsub.s8        q12, q2,  q5            @ clamp(PS1-QS1)
332cabdff1aSopenharmony_ci        vmov.i8         q14, #4
333cabdff1aSopenharmony_ci        vmov.i8         q15, #3
334cabdff1aSopenharmony_ci    .if \inner
335cabdff1aSopenharmony_ci        vand            q12, q12, q9            @ if(hev) w += clamp(PS1-QS1)
336cabdff1aSopenharmony_ci    .endif
337cabdff1aSopenharmony_ci        vaddw.s8        q10, q10, d24           @ w += clamp(PS1-QS1)
338cabdff1aSopenharmony_ci        vaddw.s8        q11, q11, d25
339cabdff1aSopenharmony_ci        vqmovn.s16      d20, q10                @ narrow result back into q10
340cabdff1aSopenharmony_ci        vqmovn.s16      d21, q11
341cabdff1aSopenharmony_ci    .if !\inner && !\simple
342cabdff1aSopenharmony_ci        veor            q1,  q1,  q13           @ PS2 = P2 ^ 0x80
343cabdff1aSopenharmony_ci        veor            q6,  q6,  q13           @ QS2 = Q2 ^ 0x80
344cabdff1aSopenharmony_ci    .endif
345cabdff1aSopenharmony_ci        vand            q10, q10, q8            @ w &= normal_limit
346cabdff1aSopenharmony_ci
347cabdff1aSopenharmony_ci        @ registers used at this point..
348cabdff1aSopenharmony_ci        @   q0 -> P3  (don't corrupt)
349cabdff1aSopenharmony_ci        @   q1-q6 -> PS2-QS2
350cabdff1aSopenharmony_ci        @   q7 -> Q3  (don't corrupt)
351cabdff1aSopenharmony_ci        @   q9 -> hev
352cabdff1aSopenharmony_ci        @   q10 -> w
353cabdff1aSopenharmony_ci        @   q13 -> #0x80
354cabdff1aSopenharmony_ci        @   q14 -> #4
355cabdff1aSopenharmony_ci        @   q15 -> #3
356cabdff1aSopenharmony_ci        @   q8, q11, q12 -> unused
357cabdff1aSopenharmony_ci
358cabdff1aSopenharmony_ci        @ filter_common:   is4tap==1
359cabdff1aSopenharmony_ci        @   c1 = clamp(w + 4) >> 3;
360cabdff1aSopenharmony_ci        @   c2 = clamp(w + 3) >> 3;
361cabdff1aSopenharmony_ci        @   Q0 = s2u(QS0 - c1);
362cabdff1aSopenharmony_ci        @   P0 = s2u(PS0 + c2);
363cabdff1aSopenharmony_ci
364cabdff1aSopenharmony_ci    .if \simple
365cabdff1aSopenharmony_ci        vqadd.s8        q11, q10, q14           @ c1 = clamp((w&hev)+4)
366cabdff1aSopenharmony_ci        vqadd.s8        q12, q10, q15           @ c2 = clamp((w&hev)+3)
367cabdff1aSopenharmony_ci        vshr.s8         q11, q11, #3            @ c1 >>= 3
368cabdff1aSopenharmony_ci        vshr.s8         q12, q12, #3            @ c2 >>= 3
369cabdff1aSopenharmony_ci        vqsub.s8        q4,  q4,  q11           @ QS0 = clamp(QS0-c1)
370cabdff1aSopenharmony_ci        vqadd.s8        q3,  q3,  q12           @ PS0 = clamp(PS0+c2)
371cabdff1aSopenharmony_ci        veor            q4,  q4,  q13           @ Q0 = QS0 ^ 0x80
372cabdff1aSopenharmony_ci        veor            q3,  q3,  q13           @ P0 = PS0 ^ 0x80
373cabdff1aSopenharmony_ci        veor            q5,  q5,  q13           @ Q1 = QS1 ^ 0x80
374cabdff1aSopenharmony_ci        veor            q2,  q2,  q13           @ P1 = PS1 ^ 0x80
375cabdff1aSopenharmony_ci    .elseif \inner
376cabdff1aSopenharmony_ci        @ the !is4tap case of filter_common, only used for inner blocks
377cabdff1aSopenharmony_ci        @   c3 = ((c1&~hev) + 1) >> 1;
378cabdff1aSopenharmony_ci        @   Q1 = s2u(QS1 - c3);
379cabdff1aSopenharmony_ci        @   P1 = s2u(PS1 + c3);
380cabdff1aSopenharmony_ci        vqadd.s8        q11, q10, q14           @ c1 = clamp((w&hev)+4)
381cabdff1aSopenharmony_ci        vqadd.s8        q12, q10, q15           @ c2 = clamp((w&hev)+3)
382cabdff1aSopenharmony_ci        vshr.s8         q11, q11, #3            @ c1 >>= 3
383cabdff1aSopenharmony_ci        vshr.s8         q12, q12, #3            @ c2 >>= 3
384cabdff1aSopenharmony_ci        vqsub.s8        q4,  q4,  q11           @ QS0 = clamp(QS0-c1)
385cabdff1aSopenharmony_ci        vqadd.s8        q3,  q3,  q12           @ PS0 = clamp(PS0+c2)
386cabdff1aSopenharmony_ci        vbic            q11, q11, q9            @ c1 & ~hev
387cabdff1aSopenharmony_ci        veor            q4,  q4,  q13           @ Q0 = QS0 ^ 0x80
388cabdff1aSopenharmony_ci        vrshr.s8        q11, q11, #1            @ c3 >>= 1
389cabdff1aSopenharmony_ci        veor            q3,  q3,  q13           @ P0 = PS0 ^ 0x80
390cabdff1aSopenharmony_ci        vqsub.s8        q5,  q5,  q11           @ QS1 = clamp(QS1-c3)
391cabdff1aSopenharmony_ci        vqadd.s8        q2,  q2,  q11           @ PS1 = clamp(PS1+c3)
392cabdff1aSopenharmony_ci        veor            q5,  q5,  q13           @ Q1 = QS1 ^ 0x80
393cabdff1aSopenharmony_ci        veor            q2,  q2,  q13           @ P1 = PS1 ^ 0x80
394cabdff1aSopenharmony_ci    .else
395cabdff1aSopenharmony_ci        vand            q12, q10, q9            @ w & hev
396cabdff1aSopenharmony_ci        vqadd.s8        q11, q12, q14           @ c1 = clamp((w&hev)+4)
397cabdff1aSopenharmony_ci        vqadd.s8        q12, q12, q15           @ c2 = clamp((w&hev)+3)
398cabdff1aSopenharmony_ci        vshr.s8         q11, q11, #3            @ c1 >>= 3
399cabdff1aSopenharmony_ci        vshr.s8         q12, q12, #3            @ c2 >>= 3
400cabdff1aSopenharmony_ci        vbic            q10, q10, q9            @ w &= ~hev
401cabdff1aSopenharmony_ci        vqsub.s8        q4,  q4,  q11           @ QS0 = clamp(QS0-c1)
402cabdff1aSopenharmony_ci        vqadd.s8        q3,  q3,  q12           @ PS0 = clamp(PS0+c2)
403cabdff1aSopenharmony_ci
404cabdff1aSopenharmony_ci        @ filter_mbedge:
405cabdff1aSopenharmony_ci        @   a = clamp((27*w + 63) >> 7);
406cabdff1aSopenharmony_ci        @   Q0 = s2u(QS0 - a);
407cabdff1aSopenharmony_ci        @   P0 = s2u(PS0 + a);
408cabdff1aSopenharmony_ci        @   a = clamp((18*w + 63) >> 7);
409cabdff1aSopenharmony_ci        @   Q1 = s2u(QS1 - a);
410cabdff1aSopenharmony_ci        @   P1 = s2u(PS1 + a);
411cabdff1aSopenharmony_ci        @   a = clamp((9*w + 63) >> 7);
412cabdff1aSopenharmony_ci        @   Q2 = s2u(QS2 - a);
413cabdff1aSopenharmony_ci        @   P2 = s2u(PS2 + a);
414cabdff1aSopenharmony_ci        vmov.i16        q9,  #63
415cabdff1aSopenharmony_ci        vshll.s8        q14, d20, #3
416cabdff1aSopenharmony_ci        vshll.s8        q15, d21, #3
417cabdff1aSopenharmony_ci        vaddw.s8        q14, q14, d20
418cabdff1aSopenharmony_ci        vaddw.s8        q15, q15, d21
419cabdff1aSopenharmony_ci        vadd.s16        q8,  q9,  q14
420cabdff1aSopenharmony_ci        vadd.s16        q9,  q9,  q15           @  9*w + 63
421cabdff1aSopenharmony_ci        vadd.s16        q11, q8,  q14
422cabdff1aSopenharmony_ci        vadd.s16        q12, q9,  q15           @ 18*w + 63
423cabdff1aSopenharmony_ci        vadd.s16        q14, q11, q14
424cabdff1aSopenharmony_ci        vadd.s16        q15, q12, q15           @ 27*w + 63
425cabdff1aSopenharmony_ci        vqshrn.s16      d16, q8,  #7
426cabdff1aSopenharmony_ci        vqshrn.s16      d17, q9,  #7            @ clamp(( 9*w + 63)>>7)
427cabdff1aSopenharmony_ci        vqshrn.s16      d22, q11, #7
428cabdff1aSopenharmony_ci        vqshrn.s16      d23, q12, #7            @ clamp((18*w + 63)>>7)
429cabdff1aSopenharmony_ci        vqshrn.s16      d28, q14, #7
430cabdff1aSopenharmony_ci        vqshrn.s16      d29, q15, #7            @ clamp((27*w + 63)>>7)
431cabdff1aSopenharmony_ci        vqadd.s8        q1,  q1,  q8            @ PS2 = clamp(PS2+a)
432cabdff1aSopenharmony_ci        vqsub.s8        q6,  q6,  q8            @ QS2 = clamp(QS2-a)
433cabdff1aSopenharmony_ci        vqadd.s8        q2,  q2,  q11           @ PS1 = clamp(PS1+a)
434cabdff1aSopenharmony_ci        vqsub.s8        q5,  q5,  q11           @ QS1 = clamp(QS1-a)
435cabdff1aSopenharmony_ci        vqadd.s8        q3,  q3,  q14           @ PS0 = clamp(PS0+a)
436cabdff1aSopenharmony_ci        vqsub.s8        q4,  q4,  q14           @ QS0 = clamp(QS0-a)
437cabdff1aSopenharmony_ci        veor            q3,  q3,  q13           @ P0 = PS0 ^ 0x80
438cabdff1aSopenharmony_ci        veor            q4,  q4,  q13           @ Q0 = QS0 ^ 0x80
439cabdff1aSopenharmony_ci        veor            q2,  q2,  q13           @ P1 = PS1 ^ 0x80
440cabdff1aSopenharmony_ci        veor            q5,  q5,  q13           @ Q1 = QS1 ^ 0x80
441cabdff1aSopenharmony_ci        veor            q1,  q1,  q13           @ P2 = PS2 ^ 0x80
442cabdff1aSopenharmony_ci        veor            q6,  q6,  q13           @ Q2 = QS2 ^ 0x80
443cabdff1aSopenharmony_ci    .endif
444cabdff1aSopenharmony_ci.endm
445cabdff1aSopenharmony_ci
446cabdff1aSopenharmony_ci.macro  vp8_v_loop_filter16 name, inner=0, simple=0
447cabdff1aSopenharmony_cifunction ff_vp8_v_loop_filter16\name\()_neon, export=1
448cabdff1aSopenharmony_ci        vpush           {q4-q7}
449cabdff1aSopenharmony_ci        sub             r0,  r0,  r1,  lsl #1+!\simple
450cabdff1aSopenharmony_ci
451cabdff1aSopenharmony_ci        @ Load pixels:
452cabdff1aSopenharmony_ci    .if !\simple
453cabdff1aSopenharmony_ci        ldr             r12, [sp, #64]          @ hev_thresh
454cabdff1aSopenharmony_ci        vld1.8          {q0},     [r0,:128], r1 @ P3
455cabdff1aSopenharmony_ci        vld1.8          {q1},     [r0,:128], r1 @ P2
456cabdff1aSopenharmony_ci    .endif
457cabdff1aSopenharmony_ci        vld1.8          {q2},     [r0,:128], r1 @ P1
458cabdff1aSopenharmony_ci        vld1.8          {q3},     [r0,:128], r1 @ P0
459cabdff1aSopenharmony_ci        vld1.8          {q4},     [r0,:128], r1 @ Q0
460cabdff1aSopenharmony_ci        vld1.8          {q5},     [r0,:128], r1 @ Q1
461cabdff1aSopenharmony_ci    .if !\simple
462cabdff1aSopenharmony_ci        vld1.8          {q6},     [r0,:128], r1 @ Q2
463cabdff1aSopenharmony_ci        vld1.8          {q7},     [r0,:128]     @ Q3
464cabdff1aSopenharmony_ci        vdup.8          q15, r3                 @ flim_I
465cabdff1aSopenharmony_ci    .endif
466cabdff1aSopenharmony_ci        vdup.8          q14, r2                 @ flim_E
467cabdff1aSopenharmony_ci
468cabdff1aSopenharmony_ci        vp8_loop_filter inner=\inner, simple=\simple
469cabdff1aSopenharmony_ci
470cabdff1aSopenharmony_ci        @ back up to P2:  dst -= stride * 6
471cabdff1aSopenharmony_ci        sub             r0,  r0,  r1,  lsl #2
472cabdff1aSopenharmony_ci    .if !\simple
473cabdff1aSopenharmony_ci        sub             r0,  r0,  r1,  lsl #1
474cabdff1aSopenharmony_ci
475cabdff1aSopenharmony_ci        @ Store pixels:
476cabdff1aSopenharmony_ci        vst1.8          {q1},     [r0,:128], r1 @ P2
477cabdff1aSopenharmony_ci    .endif
478cabdff1aSopenharmony_ci        vst1.8          {q2},     [r0,:128], r1 @ P1
479cabdff1aSopenharmony_ci        vst1.8          {q3},     [r0,:128], r1 @ P0
480cabdff1aSopenharmony_ci        vst1.8          {q4},     [r0,:128], r1 @ Q0
481cabdff1aSopenharmony_ci        vst1.8          {q5},     [r0,:128], r1 @ Q1
482cabdff1aSopenharmony_ci    .if !\simple
483cabdff1aSopenharmony_ci        vst1.8          {q6},     [r0,:128]     @ Q2
484cabdff1aSopenharmony_ci    .endif
485cabdff1aSopenharmony_ci
486cabdff1aSopenharmony_ci        vpop            {q4-q7}
487cabdff1aSopenharmony_ci        bx              lr
488cabdff1aSopenharmony_ciendfunc
489cabdff1aSopenharmony_ci.endm
490cabdff1aSopenharmony_ci
491cabdff1aSopenharmony_civp8_v_loop_filter16
492cabdff1aSopenharmony_civp8_v_loop_filter16 _inner,  inner=1
493cabdff1aSopenharmony_civp8_v_loop_filter16 _simple, simple=1
494cabdff1aSopenharmony_ci
495cabdff1aSopenharmony_ci.macro  vp8_v_loop_filter8uv name, inner=0
496cabdff1aSopenharmony_cifunction ff_vp8_v_loop_filter8uv\name\()_neon, export=1
497cabdff1aSopenharmony_ci        vpush           {q4-q7}
498cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #2
499cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #2
500cabdff1aSopenharmony_ci        ldr             r12, [sp, #64]          @ flim_I
501cabdff1aSopenharmony_ci
502cabdff1aSopenharmony_ci        @ Load pixels:
503cabdff1aSopenharmony_ci        vld1.8          {d0},     [r0,:64], r2  @ P3
504cabdff1aSopenharmony_ci        vld1.8          {d1},     [r1,:64], r2  @ P3
505cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0,:64], r2  @ P2
506cabdff1aSopenharmony_ci        vld1.8          {d3},     [r1,:64], r2  @ P2
507cabdff1aSopenharmony_ci        vld1.8          {d4},     [r0,:64], r2  @ P1
508cabdff1aSopenharmony_ci        vld1.8          {d5},     [r1,:64], r2  @ P1
509cabdff1aSopenharmony_ci        vld1.8          {d6},     [r0,:64], r2  @ P0
510cabdff1aSopenharmony_ci        vld1.8          {d7},     [r1,:64], r2  @ P0
511cabdff1aSopenharmony_ci        vld1.8          {d8},     [r0,:64], r2  @ Q0
512cabdff1aSopenharmony_ci        vld1.8          {d9},     [r1,:64], r2  @ Q0
513cabdff1aSopenharmony_ci        vld1.8          {d10},    [r0,:64], r2  @ Q1
514cabdff1aSopenharmony_ci        vld1.8          {d11},    [r1,:64], r2  @ Q1
515cabdff1aSopenharmony_ci        vld1.8          {d12},    [r0,:64], r2  @ Q2
516cabdff1aSopenharmony_ci        vld1.8          {d13},    [r1,:64], r2  @ Q2
517cabdff1aSopenharmony_ci        vld1.8          {d14},    [r0,:64]      @ Q3
518cabdff1aSopenharmony_ci        vld1.8          {d15},    [r1,:64]      @ Q3
519cabdff1aSopenharmony_ci
520cabdff1aSopenharmony_ci        vdup.8          q14, r3                 @ flim_E
521cabdff1aSopenharmony_ci        vdup.8          q15, r12                @ flim_I
522cabdff1aSopenharmony_ci        ldr             r12, [sp, #68]          @ hev_thresh
523cabdff1aSopenharmony_ci
524cabdff1aSopenharmony_ci        vp8_loop_filter inner=\inner
525cabdff1aSopenharmony_ci
526cabdff1aSopenharmony_ci        @ back up to P2:  u,v -= stride * 6
527cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #2
528cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #2
529cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #1
530cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
531cabdff1aSopenharmony_ci
532cabdff1aSopenharmony_ci        @ Store pixels:
533cabdff1aSopenharmony_ci        vst1.8          {d2},     [r0,:64], r2  @ P2
534cabdff1aSopenharmony_ci        vst1.8          {d3},     [r1,:64], r2  @ P2
535cabdff1aSopenharmony_ci        vst1.8          {d4},     [r0,:64], r2  @ P1
536cabdff1aSopenharmony_ci        vst1.8          {d5},     [r1,:64], r2  @ P1
537cabdff1aSopenharmony_ci        vst1.8          {d6},     [r0,:64], r2  @ P0
538cabdff1aSopenharmony_ci        vst1.8          {d7},     [r1,:64], r2  @ P0
539cabdff1aSopenharmony_ci        vst1.8          {d8},     [r0,:64], r2  @ Q0
540cabdff1aSopenharmony_ci        vst1.8          {d9},     [r1,:64], r2  @ Q0
541cabdff1aSopenharmony_ci        vst1.8          {d10},    [r0,:64], r2  @ Q1
542cabdff1aSopenharmony_ci        vst1.8          {d11},    [r1,:64], r2  @ Q1
543cabdff1aSopenharmony_ci        vst1.8          {d12},    [r0,:64]      @ Q2
544cabdff1aSopenharmony_ci        vst1.8          {d13},    [r1,:64]      @ Q2
545cabdff1aSopenharmony_ci
546cabdff1aSopenharmony_ci        vpop            {q4-q7}
547cabdff1aSopenharmony_ci        bx              lr
548cabdff1aSopenharmony_ciendfunc
549cabdff1aSopenharmony_ci.endm
550cabdff1aSopenharmony_ci
551cabdff1aSopenharmony_civp8_v_loop_filter8uv
552cabdff1aSopenharmony_civp8_v_loop_filter8uv _inner, inner=1
553cabdff1aSopenharmony_ci
554cabdff1aSopenharmony_ci.macro  vp8_h_loop_filter16 name, inner=0, simple=0
555cabdff1aSopenharmony_cifunction ff_vp8_h_loop_filter16\name\()_neon, export=1
556cabdff1aSopenharmony_ci        vpush           {q4-q7}
557cabdff1aSopenharmony_ci        sub             r0,  r0,  #4
558cabdff1aSopenharmony_ci    .if !\simple
559cabdff1aSopenharmony_ci        ldr             r12, [sp, #64]          @ hev_thresh
560cabdff1aSopenharmony_ci    .endif
561cabdff1aSopenharmony_ci
562cabdff1aSopenharmony_ci        @ Load pixels:
563cabdff1aSopenharmony_ci        vld1.8          {d0},     [r0], r1      @ load first 8-line src data
564cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0], r1
565cabdff1aSopenharmony_ci        vld1.8          {d4},     [r0], r1
566cabdff1aSopenharmony_ci        vld1.8          {d6},     [r0], r1
567cabdff1aSopenharmony_ci        vld1.8          {d8},     [r0], r1
568cabdff1aSopenharmony_ci        vld1.8          {d10},    [r0], r1
569cabdff1aSopenharmony_ci        vld1.8          {d12},    [r0], r1
570cabdff1aSopenharmony_ci        vld1.8          {d14},    [r0], r1
571cabdff1aSopenharmony_ci        vld1.8          {d1},     [r0], r1      @ load second 8-line src data
572cabdff1aSopenharmony_ci        vld1.8          {d3},     [r0], r1
573cabdff1aSopenharmony_ci        vld1.8          {d5},     [r0], r1
574cabdff1aSopenharmony_ci        vld1.8          {d7},     [r0], r1
575cabdff1aSopenharmony_ci        vld1.8          {d9},     [r0], r1
576cabdff1aSopenharmony_ci        vld1.8          {d11},    [r0], r1
577cabdff1aSopenharmony_ci        vld1.8          {d13},    [r0], r1
578cabdff1aSopenharmony_ci        vld1.8          {d15},    [r0], r1
579cabdff1aSopenharmony_ci
580cabdff1aSopenharmony_ci        transpose_8x8   q0,  q1,  q2,  q3,  q4,  q5,  q6,  q7
581cabdff1aSopenharmony_ci
582cabdff1aSopenharmony_ci        vdup.8          q14, r2                 @ flim_E
583cabdff1aSopenharmony_ci    .if !\simple
584cabdff1aSopenharmony_ci        vdup.8          q15, r3                 @ flim_I
585cabdff1aSopenharmony_ci    .endif
586cabdff1aSopenharmony_ci
587cabdff1aSopenharmony_ci        vp8_loop_filter inner=\inner, simple=\simple
588cabdff1aSopenharmony_ci
589cabdff1aSopenharmony_ci        sub             r0,  r0,  r1, lsl #4    @ backup 16 rows
590cabdff1aSopenharmony_ci
591cabdff1aSopenharmony_ci        transpose_8x8   q0,  q1,  q2,  q3,  q4,  q5,  q6,  q7
592cabdff1aSopenharmony_ci
593cabdff1aSopenharmony_ci        @ Store pixels:
594cabdff1aSopenharmony_ci        vst1.8          {d0},     [r0],     r1
595cabdff1aSopenharmony_ci        vst1.8          {d2},     [r0],     r1
596cabdff1aSopenharmony_ci        vst1.8          {d4},     [r0],     r1
597cabdff1aSopenharmony_ci        vst1.8          {d6},     [r0],     r1
598cabdff1aSopenharmony_ci        vst1.8          {d8},     [r0],     r1
599cabdff1aSopenharmony_ci        vst1.8          {d10},    [r0],     r1
600cabdff1aSopenharmony_ci        vst1.8          {d12},    [r0],     r1
601cabdff1aSopenharmony_ci        vst1.8          {d14},    [r0],     r1
602cabdff1aSopenharmony_ci        vst1.8          {d1},     [r0],     r1
603cabdff1aSopenharmony_ci        vst1.8          {d3},     [r0],     r1
604cabdff1aSopenharmony_ci        vst1.8          {d5},     [r0],     r1
605cabdff1aSopenharmony_ci        vst1.8          {d7},     [r0],     r1
606cabdff1aSopenharmony_ci        vst1.8          {d9},     [r0],     r1
607cabdff1aSopenharmony_ci        vst1.8          {d11},    [r0],     r1
608cabdff1aSopenharmony_ci        vst1.8          {d13},    [r0],     r1
609cabdff1aSopenharmony_ci        vst1.8          {d15},    [r0]
610cabdff1aSopenharmony_ci
611cabdff1aSopenharmony_ci        vpop            {q4-q7}
612cabdff1aSopenharmony_ci        bx              lr
613cabdff1aSopenharmony_ciendfunc
614cabdff1aSopenharmony_ci.endm
615cabdff1aSopenharmony_ci
616cabdff1aSopenharmony_civp8_h_loop_filter16
617cabdff1aSopenharmony_civp8_h_loop_filter16 _inner,  inner=1
618cabdff1aSopenharmony_civp8_h_loop_filter16 _simple, simple=1
619cabdff1aSopenharmony_ci
620cabdff1aSopenharmony_ci.macro  vp8_h_loop_filter8uv name, inner=0
621cabdff1aSopenharmony_cifunction ff_vp8_h_loop_filter8uv\name\()_neon, export=1
622cabdff1aSopenharmony_ci        vpush           {q4-q7}
623cabdff1aSopenharmony_ci        sub             r0,  r0,  #4
624cabdff1aSopenharmony_ci        sub             r1,  r1,  #4
625cabdff1aSopenharmony_ci        ldr             r12, [sp, #64]          @ flim_I
626cabdff1aSopenharmony_ci
627cabdff1aSopenharmony_ci        @ Load pixels:
628cabdff1aSopenharmony_ci        vld1.8          {d0},     [r0], r2      @ load u
629cabdff1aSopenharmony_ci        vld1.8          {d1},     [r1], r2      @ load v
630cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0], r2
631cabdff1aSopenharmony_ci        vld1.8          {d3},     [r1], r2
632cabdff1aSopenharmony_ci        vld1.8          {d4},     [r0], r2
633cabdff1aSopenharmony_ci        vld1.8          {d5},     [r1], r2
634cabdff1aSopenharmony_ci        vld1.8          {d6},     [r0], r2
635cabdff1aSopenharmony_ci        vld1.8          {d7},     [r1], r2
636cabdff1aSopenharmony_ci        vld1.8          {d8},     [r0], r2
637cabdff1aSopenharmony_ci        vld1.8          {d9},     [r1], r2
638cabdff1aSopenharmony_ci        vld1.8          {d10},    [r0], r2
639cabdff1aSopenharmony_ci        vld1.8          {d11},    [r1], r2
640cabdff1aSopenharmony_ci        vld1.8          {d12},    [r0], r2
641cabdff1aSopenharmony_ci        vld1.8          {d13},    [r1], r2
642cabdff1aSopenharmony_ci        vld1.8          {d14},    [r0], r2
643cabdff1aSopenharmony_ci        vld1.8          {d15},    [r1], r2
644cabdff1aSopenharmony_ci
645cabdff1aSopenharmony_ci        transpose_8x8   q0,  q1,  q2,  q3,  q4,  q5,  q6,  q7
646cabdff1aSopenharmony_ci
647cabdff1aSopenharmony_ci        vdup.8          q14, r3                 @ flim_E
648cabdff1aSopenharmony_ci        vdup.8          q15, r12                @ flim_I
649cabdff1aSopenharmony_ci        ldr             r12, [sp, #68]          @ hev_thresh
650cabdff1aSopenharmony_ci
651cabdff1aSopenharmony_ci        vp8_loop_filter inner=\inner
652cabdff1aSopenharmony_ci
653cabdff1aSopenharmony_ci        sub             r0,  r0,  r2, lsl #3    @ backup u 8 rows
654cabdff1aSopenharmony_ci        sub             r1,  r1,  r2, lsl #3    @ backup v 8 rows
655cabdff1aSopenharmony_ci
656cabdff1aSopenharmony_ci        transpose_8x8   q0,  q1,  q2,  q3,  q4,  q5,  q6,  q7
657cabdff1aSopenharmony_ci
658cabdff1aSopenharmony_ci        @ Store pixels:
659cabdff1aSopenharmony_ci        vst1.8          {d0},     [r0], r2
660cabdff1aSopenharmony_ci        vst1.8          {d1},     [r1], r2
661cabdff1aSopenharmony_ci        vst1.8          {d2},     [r0], r2
662cabdff1aSopenharmony_ci        vst1.8          {d3},     [r1], r2
663cabdff1aSopenharmony_ci        vst1.8          {d4},     [r0], r2
664cabdff1aSopenharmony_ci        vst1.8          {d5},     [r1], r2
665cabdff1aSopenharmony_ci        vst1.8          {d6},     [r0], r2
666cabdff1aSopenharmony_ci        vst1.8          {d7},     [r1], r2
667cabdff1aSopenharmony_ci        vst1.8          {d8},     [r0], r2
668cabdff1aSopenharmony_ci        vst1.8          {d9},     [r1], r2
669cabdff1aSopenharmony_ci        vst1.8          {d10},    [r0], r2
670cabdff1aSopenharmony_ci        vst1.8          {d11},    [r1], r2
671cabdff1aSopenharmony_ci        vst1.8          {d12},    [r0], r2
672cabdff1aSopenharmony_ci        vst1.8          {d13},    [r1], r2
673cabdff1aSopenharmony_ci        vst1.8          {d14},    [r0]
674cabdff1aSopenharmony_ci        vst1.8          {d15},    [r1]
675cabdff1aSopenharmony_ci
676cabdff1aSopenharmony_ci        vpop            {q4-q7}
677cabdff1aSopenharmony_ci        bx              lr
678cabdff1aSopenharmony_ciendfunc
679cabdff1aSopenharmony_ci.endm
680cabdff1aSopenharmony_ci
681cabdff1aSopenharmony_civp8_h_loop_filter8uv
682cabdff1aSopenharmony_civp8_h_loop_filter8uv _inner, inner=1
683cabdff1aSopenharmony_ci
684cabdff1aSopenharmony_cifunction ff_put_vp8_pixels16_neon, export=1
685cabdff1aSopenharmony_ci        ldr             r12, [sp, #0]           @ h
686cabdff1aSopenharmony_ci1:
687cabdff1aSopenharmony_ci        subs            r12, r12, #4
688cabdff1aSopenharmony_ci        vld1.8          {q0},     [r2], r3
689cabdff1aSopenharmony_ci        vld1.8          {q1},     [r2], r3
690cabdff1aSopenharmony_ci        vld1.8          {q2},     [r2], r3
691cabdff1aSopenharmony_ci        vld1.8          {q3},     [r2], r3
692cabdff1aSopenharmony_ci        vst1.8          {q0},     [r0,:128], r1
693cabdff1aSopenharmony_ci        vst1.8          {q1},     [r0,:128], r1
694cabdff1aSopenharmony_ci        vst1.8          {q2},     [r0,:128], r1
695cabdff1aSopenharmony_ci        vst1.8          {q3},     [r0,:128], r1
696cabdff1aSopenharmony_ci        bgt             1b
697cabdff1aSopenharmony_ci        bx              lr
698cabdff1aSopenharmony_ciendfunc
699cabdff1aSopenharmony_ci
700cabdff1aSopenharmony_cifunction ff_put_vp8_pixels8_neon, export=1
701cabdff1aSopenharmony_ci        ldr             r12, [sp, #0]           @ h
702cabdff1aSopenharmony_ci1:
703cabdff1aSopenharmony_ci        subs            r12, r12, #4
704cabdff1aSopenharmony_ci        vld1.8          {d0},     [r2], r3
705cabdff1aSopenharmony_ci        vld1.8          {d1},     [r2], r3
706cabdff1aSopenharmony_ci        vld1.8          {d2},     [r2], r3
707cabdff1aSopenharmony_ci        vld1.8          {d3},     [r2], r3
708cabdff1aSopenharmony_ci        vst1.8          {d0},     [r0,:64], r1
709cabdff1aSopenharmony_ci        vst1.8          {d1},     [r0,:64], r1
710cabdff1aSopenharmony_ci        vst1.8          {d2},     [r0,:64], r1
711cabdff1aSopenharmony_ci        vst1.8          {d3},     [r0,:64], r1
712cabdff1aSopenharmony_ci        bgt             1b
713cabdff1aSopenharmony_ci        bx              lr
714cabdff1aSopenharmony_ciendfunc
715cabdff1aSopenharmony_ci
716cabdff1aSopenharmony_ci/* 4/6-tap 8th-pel MC */
717cabdff1aSopenharmony_ci
718cabdff1aSopenharmony_ci.macro  vp8_epel8_h6    d,   a,   b
719cabdff1aSopenharmony_ci        vext.8          d27, \a,  \b,  #1
720cabdff1aSopenharmony_ci        vmovl.u8        q8,  \a
721cabdff1aSopenharmony_ci        vext.8          d28, \a,  \b,  #2
722cabdff1aSopenharmony_ci        vmovl.u8        q9,  d27
723cabdff1aSopenharmony_ci        vext.8          d29, \a,  \b,  #3
724cabdff1aSopenharmony_ci        vmovl.u8        q10, d28
725cabdff1aSopenharmony_ci        vext.8          d30, \a,  \b,  #4
726cabdff1aSopenharmony_ci        vmovl.u8        q11, d29
727cabdff1aSopenharmony_ci        vext.8          d31, \a,  \b,  #5
728cabdff1aSopenharmony_ci        vmovl.u8        q12, d30
729cabdff1aSopenharmony_ci        vmul.u16        q10, q10, d0[2]
730cabdff1aSopenharmony_ci        vmovl.u8        q13, d31
731cabdff1aSopenharmony_ci        vmul.u16        q11, q11, d0[3]
732cabdff1aSopenharmony_ci        vmls.u16        q10, q9,  d0[1]
733cabdff1aSopenharmony_ci        vmls.u16        q11, q12, d1[0]
734cabdff1aSopenharmony_ci        vmla.u16        q10, q8,  d0[0]
735cabdff1aSopenharmony_ci        vmla.u16        q11, q13, d1[1]
736cabdff1aSopenharmony_ci        vqadd.s16       q11, q10, q11
737cabdff1aSopenharmony_ci        vqrshrun.s16    \d,  q11, #7
738cabdff1aSopenharmony_ci.endm
739cabdff1aSopenharmony_ci
740cabdff1aSopenharmony_ci.macro  vp8_epel16_h6   d0,  d1,  s0,  s1,  s2,  q0,  q1
741cabdff1aSopenharmony_ci        vext.8          q14, \q0, \q1, #3
742cabdff1aSopenharmony_ci        vext.8          q15, \q0, \q1, #4
743cabdff1aSopenharmony_ci        vmovl.u8        q11, d28
744cabdff1aSopenharmony_ci        vmovl.u8        q14, d29
745cabdff1aSopenharmony_ci        vext.8          q3,  \q0, \q1, #2
746cabdff1aSopenharmony_ci        vmovl.u8        q12, d30
747cabdff1aSopenharmony_ci        vmovl.u8        q15, d31
748cabdff1aSopenharmony_ci        vext.8          q8,  \q0, \q1, #1
749cabdff1aSopenharmony_ci        vmovl.u8        q10, d6
750cabdff1aSopenharmony_ci        vmovl.u8        q3,  d7
751cabdff1aSopenharmony_ci        vext.8          q2,  \q0, \q1, #5
752cabdff1aSopenharmony_ci        vmovl.u8        q13, d4
753cabdff1aSopenharmony_ci        vmovl.u8        q2,  d5
754cabdff1aSopenharmony_ci        vmovl.u8        q9,  d16
755cabdff1aSopenharmony_ci        vmovl.u8        q8,  d17
756cabdff1aSopenharmony_ci        vmul.u16        q11, q11, d0[3]
757cabdff1aSopenharmony_ci        vmul.u16        q10, q10, d0[2]
758cabdff1aSopenharmony_ci        vmul.u16        q3,  q3,  d0[2]
759cabdff1aSopenharmony_ci        vmul.u16        q14, q14, d0[3]
760cabdff1aSopenharmony_ci        vmls.u16        q11, q12, d1[0]
761cabdff1aSopenharmony_ci        vmovl.u8        q12, \s0
762cabdff1aSopenharmony_ci        vmovl.u8        q1,  \s1
763cabdff1aSopenharmony_ci        vmls.u16        q10, q9,  d0[1]
764cabdff1aSopenharmony_ci        vmls.u16        q3,  q8,  d0[1]
765cabdff1aSopenharmony_ci        vmls.u16        q14, q15, d1[0]
766cabdff1aSopenharmony_ci        vmla.u16        q10, q12, d0[0]
767cabdff1aSopenharmony_ci        vmla.u16        q11, q13, d1[1]
768cabdff1aSopenharmony_ci        vmla.u16        q3,  q1,  d0[0]
769cabdff1aSopenharmony_ci        vmla.u16        q14, q2,  d1[1]
770cabdff1aSopenharmony_ci        vqadd.s16       q11, q10, q11
771cabdff1aSopenharmony_ci        vqadd.s16       q14, q3,  q14
772cabdff1aSopenharmony_ci        vqrshrun.s16    \d0, q11, #7
773cabdff1aSopenharmony_ci        vqrshrun.s16    \d1, q14, #7
774cabdff1aSopenharmony_ci.endm
775cabdff1aSopenharmony_ci
776cabdff1aSopenharmony_ci.macro  vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6
777cabdff1aSopenharmony_ci        vmovl.u8        q10, \s0
778cabdff1aSopenharmony_ci        vmovl.u8        q11, \s3
779cabdff1aSopenharmony_ci        vmovl.u8        q14, \s6
780cabdff1aSopenharmony_ci        vmovl.u8        q9,  \s1
781cabdff1aSopenharmony_ci        vmovl.u8        q12, \s4
782cabdff1aSopenharmony_ci        vmovl.u8        q8,  \s2
783cabdff1aSopenharmony_ci        vmovl.u8        q13, \s5
784cabdff1aSopenharmony_ci        vmul.u16        q10, q10, d0[0]
785cabdff1aSopenharmony_ci        vmul.u16        q15, q11, d0[3]
786cabdff1aSopenharmony_ci        vmul.u16        q11, q11, d0[2]
787cabdff1aSopenharmony_ci        vmul.u16        q14, q14, d1[1]
788cabdff1aSopenharmony_ci        vmls.u16        q10, q9,  d0[1]
789cabdff1aSopenharmony_ci        vmls.u16        q15, q12, d1[0]
790cabdff1aSopenharmony_ci        vmls.u16        q11, q8,  d0[1]
791cabdff1aSopenharmony_ci        vmls.u16        q14, q13, d1[0]
792cabdff1aSopenharmony_ci        vmla.u16        q10, q8,  d0[2]
793cabdff1aSopenharmony_ci        vmla.u16        q15, q13, d1[1]
794cabdff1aSopenharmony_ci        vmla.u16        q11, q9,  d0[0]
795cabdff1aSopenharmony_ci        vmla.u16        q14, q12, d0[3]
796cabdff1aSopenharmony_ci        vqadd.s16       q15, q10, q15
797cabdff1aSopenharmony_ci        vqadd.s16       q14, q11, q14
798cabdff1aSopenharmony_ci        vqrshrun.s16    \d0, q15, #7
799cabdff1aSopenharmony_ci        vqrshrun.s16    \d1, q14, #7
800cabdff1aSopenharmony_ci.endm
801cabdff1aSopenharmony_ci
802cabdff1aSopenharmony_ci.macro  vp8_epel8_h4    d,   a,   b
803cabdff1aSopenharmony_ci        vext.8          d28, \a,  \b,  #1
804cabdff1aSopenharmony_ci        vmovl.u8        q9,  \a
805cabdff1aSopenharmony_ci        vext.8          d29, \a,  \b,  #2
806cabdff1aSopenharmony_ci        vmovl.u8        q10, d28
807cabdff1aSopenharmony_ci        vext.8          d30, \a,  \b,  #3
808cabdff1aSopenharmony_ci        vmovl.u8        q11, d29
809cabdff1aSopenharmony_ci        vmovl.u8        q12, d30
810cabdff1aSopenharmony_ci        vmul.u16        q10, q10, d0[2]
811cabdff1aSopenharmony_ci        vmul.u16        q11, q11, d0[3]
812cabdff1aSopenharmony_ci        vmls.u16        q10, q9,  d0[1]
813cabdff1aSopenharmony_ci        vmls.u16        q11, q12, d1[0]
814cabdff1aSopenharmony_ci        vqadd.s16       q11, q10, q11
815cabdff1aSopenharmony_ci        vqrshrun.s16    \d,  q11, #7
816cabdff1aSopenharmony_ci.endm
817cabdff1aSopenharmony_ci
818cabdff1aSopenharmony_ci.macro  vp8_epel8_v4_y2 d0,  d1,  s0,  s1,  s2,  s3,  s4
819cabdff1aSopenharmony_ci        vmovl.u8        q9,  \s0
820cabdff1aSopenharmony_ci        vmovl.u8        q10, \s1
821cabdff1aSopenharmony_ci        vmovl.u8        q11, \s2
822cabdff1aSopenharmony_ci        vmovl.u8        q12, \s3
823cabdff1aSopenharmony_ci        vmovl.u8        q13, \s4
824cabdff1aSopenharmony_ci        vmul.u16        q8,  q10, d0[2]
825cabdff1aSopenharmony_ci        vmul.u16        q14, q11, d0[3]
826cabdff1aSopenharmony_ci        vmul.u16        q11, q11, d0[2]
827cabdff1aSopenharmony_ci        vmul.u16        q15, q12, d0[3]
828cabdff1aSopenharmony_ci        vmls.u16        q8,  q9,  d0[1]
829cabdff1aSopenharmony_ci        vmls.u16        q14, q12, d1[0]
830cabdff1aSopenharmony_ci        vmls.u16        q11, q10, d0[1]
831cabdff1aSopenharmony_ci        vmls.u16        q15, q13, d1[0]
832cabdff1aSopenharmony_ci        vqadd.s16       q8,  q8,  q14
833cabdff1aSopenharmony_ci        vqadd.s16       q11, q11, q15
834cabdff1aSopenharmony_ci        vqrshrun.s16    \d0, q8,  #7
835cabdff1aSopenharmony_ci        vqrshrun.s16    \d1, q11, #7
836cabdff1aSopenharmony_ci.endm
837cabdff1aSopenharmony_ci
838cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_v6_neon, export=1
839cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
840cabdff1aSopenharmony_ci        push            {r4,lr}
841cabdff1aSopenharmony_ci        vpush           {d8-d15}
842cabdff1aSopenharmony_ci
843cabdff1aSopenharmony_ci        ldr             r4,  [sp, #80]          @ my
844cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
845cabdff1aSopenharmony_ci        ldr             r12, [sp, #72]          @ h
846cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
847cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
848cabdff1aSopenharmony_ci1:
849cabdff1aSopenharmony_ci        vld1.8          {d2-d3},  [r2], r3
850cabdff1aSopenharmony_ci        vld1.8          {d4-d5},  [r2], r3
851cabdff1aSopenharmony_ci        vld1.8          {d6-d7},  [r2], r3
852cabdff1aSopenharmony_ci        vld1.8          {d8-d9},  [r2], r3
853cabdff1aSopenharmony_ci        vld1.8          {d10-d11},[r2], r3
854cabdff1aSopenharmony_ci        vld1.8          {d12-d13},[r2], r3
855cabdff1aSopenharmony_ci        vld1.8          {d14-d15},[r2]
856cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #2
857cabdff1aSopenharmony_ci
858cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d2,  d4,  d2,  d4,  d6,  d8,  d10, d12, d14
859cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d3,  d5,  d3,  d5,  d7,  d9,  d11, d13, d15
860cabdff1aSopenharmony_ci
861cabdff1aSopenharmony_ci        vst1.8          {d2-d3},  [r0,:128], r1
862cabdff1aSopenharmony_ci        vst1.8          {d4-d5},  [r0,:128], r1
863cabdff1aSopenharmony_ci        subs            r12, r12, #2
864cabdff1aSopenharmony_ci        bne             1b
865cabdff1aSopenharmony_ci
866cabdff1aSopenharmony_ci        vpop            {d8-d15}
867cabdff1aSopenharmony_ci        pop             {r4,pc}
868cabdff1aSopenharmony_ciendfunc
869cabdff1aSopenharmony_ci
870cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_h6_neon, export=1
871cabdff1aSopenharmony_ci        sub             r2,  r2,  #2
872cabdff1aSopenharmony_ci        push            {r4,lr}
873cabdff1aSopenharmony_ci
874cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
875cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
876cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
877cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
878cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
879cabdff1aSopenharmony_ci1:
880cabdff1aSopenharmony_ci        vld1.8          {d2-d4},  [r2], r3
881cabdff1aSopenharmony_ci
882cabdff1aSopenharmony_ci        vp8_epel16_h6   d2,  d3,  d2,  d3,  d4,  q1,  q2
883cabdff1aSopenharmony_ci
884cabdff1aSopenharmony_ci        vst1.8          {d2-d3}, [r0,:128], r1
885cabdff1aSopenharmony_ci        subs            r12, r12, #1
886cabdff1aSopenharmony_ci        bne             1b
887cabdff1aSopenharmony_ci
888cabdff1aSopenharmony_ci        pop             {r4,pc}
889cabdff1aSopenharmony_ciendfunc
890cabdff1aSopenharmony_ci
891cabdff1aSopenharmony_cifunction ff_put_vp8_epel16_h6v6_neon, export=1
892cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
893cabdff1aSopenharmony_ci        sub             r2,  r2,  #2
894cabdff1aSopenharmony_ci        push            {r4,lr}
895cabdff1aSopenharmony_ci        vpush           {d8-d15}
896cabdff1aSopenharmony_ci
897cabdff1aSopenharmony_ci        @ first pass (horizontal):
898cabdff1aSopenharmony_ci        ldr             r4,  [sp, #64+8+4]          @ mx
899cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
900cabdff1aSopenharmony_ci        ldr             r12, [sp, #64+8+0]          @ h
901cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
902cabdff1aSopenharmony_ci        sub             sp,  sp,  #336+16
903cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
904cabdff1aSopenharmony_ci        add             lr,  sp,  #15
905cabdff1aSopenharmony_ci        add             r12, r12, #5
906cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
907cabdff1aSopenharmony_ci1:
908cabdff1aSopenharmony_ci        vld1.8          {d2,d3,d4}, [r2], r3
909cabdff1aSopenharmony_ci
910cabdff1aSopenharmony_ci        vp8_epel16_h6   d2,  d3,  d2,  d3,  d4,  q1,  q2
911cabdff1aSopenharmony_ci
912cabdff1aSopenharmony_ci        vst1.8          {d2-d3}, [lr,:128]!
913cabdff1aSopenharmony_ci        subs            r12, r12, #1
914cabdff1aSopenharmony_ci        bne             1b
915cabdff1aSopenharmony_ci
916cabdff1aSopenharmony_ci        @ second pass (vertical):
917cabdff1aSopenharmony_ci        ldr             r4,  [sp, #336+16+64+8+8]   @ my
918cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
919cabdff1aSopenharmony_ci        ldr             r12, [sp, #336+16+64+8+0]   @ h
920cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
921cabdff1aSopenharmony_ci        add             lr,  sp,  #15
922cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
923cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
924cabdff1aSopenharmony_ci2:
925cabdff1aSopenharmony_ci        vld1.8          {d2-d5},  [lr,:128]!
926cabdff1aSopenharmony_ci        vld1.8          {d6-d9},  [lr,:128]!
927cabdff1aSopenharmony_ci        vld1.8          {d10-d13},[lr,:128]!
928cabdff1aSopenharmony_ci        vld1.8          {d14-d15},[lr,:128]
929cabdff1aSopenharmony_ci        sub             lr,  lr,  #64
930cabdff1aSopenharmony_ci
931cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d2,  d4,  d2,  d4,  d6,  d8,  d10, d12, d14
932cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d3,  d5,  d3,  d5,  d7,  d9,  d11, d13, d15
933cabdff1aSopenharmony_ci
934cabdff1aSopenharmony_ci        vst1.8          {d2-d3}, [r0,:128], r1
935cabdff1aSopenharmony_ci        vst1.8          {d4-d5}, [r0,:128], r1
936cabdff1aSopenharmony_ci        subs            r12, r12, #2
937cabdff1aSopenharmony_ci        bne             2b
938cabdff1aSopenharmony_ci
939cabdff1aSopenharmony_ci        add             sp,  sp,  #336+16
940cabdff1aSopenharmony_ci        vpop            {d8-d15}
941cabdff1aSopenharmony_ci        pop             {r4,pc}
942cabdff1aSopenharmony_ciendfunc
943cabdff1aSopenharmony_ci
944cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_v6_neon, export=1
945cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
946cabdff1aSopenharmony_ci        push            {r4,lr}
947cabdff1aSopenharmony_ci
948cabdff1aSopenharmony_ci        ldr             r4,  [sp, #16]          @ my
949cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
950cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
951cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
952cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
953cabdff1aSopenharmony_ci1:
954cabdff1aSopenharmony_ci        vld1.8          {d2},  [r2], r3
955cabdff1aSopenharmony_ci        vld1.8          {d3},  [r2], r3
956cabdff1aSopenharmony_ci        vld1.8          {d4},  [r2], r3
957cabdff1aSopenharmony_ci        vld1.8          {d5},  [r2], r3
958cabdff1aSopenharmony_ci        vld1.8          {d6},  [r2], r3
959cabdff1aSopenharmony_ci        vld1.8          {d7},  [r2], r3
960cabdff1aSopenharmony_ci        vld1.8          {d28}, [r2]
961cabdff1aSopenharmony_ci
962cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #2
963cabdff1aSopenharmony_ci
964cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d2,  d3,  d2,  d3,  d4,  d5,  d6,  d7,  d28
965cabdff1aSopenharmony_ci
966cabdff1aSopenharmony_ci        vst1.8          {d2}, [r0,:64], r1
967cabdff1aSopenharmony_ci        vst1.8          {d3}, [r0,:64], r1
968cabdff1aSopenharmony_ci        subs            r12, r12, #2
969cabdff1aSopenharmony_ci        bne             1b
970cabdff1aSopenharmony_ci
971cabdff1aSopenharmony_ci        pop             {r4,pc}
972cabdff1aSopenharmony_ciendfunc
973cabdff1aSopenharmony_ci
974cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6_neon, export=1
975cabdff1aSopenharmony_ci        sub             r2,  r2,  #2
976cabdff1aSopenharmony_ci        push            {r4,lr}
977cabdff1aSopenharmony_ci
978cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
979cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
980cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
981cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
982cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
983cabdff1aSopenharmony_ci1:
984cabdff1aSopenharmony_ci        vld1.8          {d2,d3}, [r2], r3
985cabdff1aSopenharmony_ci
986cabdff1aSopenharmony_ci        vp8_epel8_h6    d2,  d2,  d3
987cabdff1aSopenharmony_ci
988cabdff1aSopenharmony_ci        vst1.8          {d2}, [r0,:64], r1
989cabdff1aSopenharmony_ci        subs            r12, r12, #1
990cabdff1aSopenharmony_ci        bne             1b
991cabdff1aSopenharmony_ci
992cabdff1aSopenharmony_ci        pop             {r4,pc}
993cabdff1aSopenharmony_ciendfunc
994cabdff1aSopenharmony_ci
995cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6v6_neon, export=1
996cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
997cabdff1aSopenharmony_ci        sub             r2,  r2,  #2
998cabdff1aSopenharmony_ci        push            {r4,lr}
999cabdff1aSopenharmony_ci
1000cabdff1aSopenharmony_ci        @ first pass (horizontal):
1001cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1002cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1003cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1004cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1005cabdff1aSopenharmony_ci        sub             sp,  sp,  #168+16
1006cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1007cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1008cabdff1aSopenharmony_ci        add             r12, r12, #5
1009cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1010cabdff1aSopenharmony_ci1:
1011cabdff1aSopenharmony_ci        vld1.8          {d2,d3}, [r2], r3
1012cabdff1aSopenharmony_ci
1013cabdff1aSopenharmony_ci        vp8_epel8_h6    d2,  d2,  d3
1014cabdff1aSopenharmony_ci
1015cabdff1aSopenharmony_ci        vst1.8          {d2}, [lr,:64]!
1016cabdff1aSopenharmony_ci        subs            r12, r12, #1
1017cabdff1aSopenharmony_ci        bne             1b
1018cabdff1aSopenharmony_ci
1019cabdff1aSopenharmony_ci        @ second pass (vertical):
1020cabdff1aSopenharmony_ci        ldr             r4,  [sp, #168+16+16]   @ my
1021cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1022cabdff1aSopenharmony_ci        ldr             r12, [sp, #168+16+8]    @ h
1023cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1024cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1025cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1026cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1027cabdff1aSopenharmony_ci2:
1028cabdff1aSopenharmony_ci        vld1.8          {d2-d5},  [lr,:128]!
1029cabdff1aSopenharmony_ci        vld1.8          {d6-d7},  [lr,:128]!
1030cabdff1aSopenharmony_ci        vld1.8          {d30},    [lr,:64]
1031cabdff1aSopenharmony_ci        sub             lr,  lr,  #32
1032cabdff1aSopenharmony_ci
1033cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d2,  d3,  d2,  d3,  d4,  d5,  d6,  d7,  d30
1034cabdff1aSopenharmony_ci
1035cabdff1aSopenharmony_ci        vst1.8          {d2}, [r0,:64], r1
1036cabdff1aSopenharmony_ci        vst1.8          {d3}, [r0,:64], r1
1037cabdff1aSopenharmony_ci        subs            r12, r12, #2
1038cabdff1aSopenharmony_ci        bne             2b
1039cabdff1aSopenharmony_ci
1040cabdff1aSopenharmony_ci        add             sp,  sp,  #168+16
1041cabdff1aSopenharmony_ci        pop             {r4,pc}
1042cabdff1aSopenharmony_ciendfunc
1043cabdff1aSopenharmony_ci
1044cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_v4_neon, export=1
1045cabdff1aSopenharmony_ci        sub             r2,  r2,  r3
1046cabdff1aSopenharmony_ci        push            {r4,lr}
1047cabdff1aSopenharmony_ci
1048cabdff1aSopenharmony_ci        ldr             r4,  [sp, #16]          @ my
1049cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1050cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1051cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1052cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1053cabdff1aSopenharmony_ci1:
1054cabdff1aSopenharmony_ci        vld1.8          {d2},     [r2], r3
1055cabdff1aSopenharmony_ci        vld1.8          {d3},     [r2], r3
1056cabdff1aSopenharmony_ci        vld1.8          {d4},     [r2], r3
1057cabdff1aSopenharmony_ci        vld1.8          {d5},     [r2], r3
1058cabdff1aSopenharmony_ci        vld1.8          {d6},     [r2]
1059cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
1060cabdff1aSopenharmony_ci
1061cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 d2,  d3,  d2,  d3,  d4,  d5,  d6
1062cabdff1aSopenharmony_ci
1063cabdff1aSopenharmony_ci        vst1.8          {d2}, [r0,:64], r1
1064cabdff1aSopenharmony_ci        vst1.8          {d3}, [r0,:64], r1
1065cabdff1aSopenharmony_ci        subs            r12, r12, #2
1066cabdff1aSopenharmony_ci        bne             1b
1067cabdff1aSopenharmony_ci
1068cabdff1aSopenharmony_ci        pop             {r4,pc}
1069cabdff1aSopenharmony_ciendfunc
1070cabdff1aSopenharmony_ci
1071cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4_neon, export=1
1072cabdff1aSopenharmony_ci        sub             r2,  r2,  #1
1073cabdff1aSopenharmony_ci        push            {r4,lr}
1074cabdff1aSopenharmony_ci
1075cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1076cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1077cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1078cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1079cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1080cabdff1aSopenharmony_ci1:
1081cabdff1aSopenharmony_ci        vld1.8          {d2,d3}, [r2], r3
1082cabdff1aSopenharmony_ci
1083cabdff1aSopenharmony_ci        vp8_epel8_h4    d2,  d2,  d3
1084cabdff1aSopenharmony_ci
1085cabdff1aSopenharmony_ci        vst1.8          {d2}, [r0,:64], r1
1086cabdff1aSopenharmony_ci        subs            r12, r12, #1
1087cabdff1aSopenharmony_ci        bne             1b
1088cabdff1aSopenharmony_ci
1089cabdff1aSopenharmony_ci        pop             {r4,pc}
1090cabdff1aSopenharmony_ciendfunc
1091cabdff1aSopenharmony_ci
1092cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4v4_neon, export=1
1093cabdff1aSopenharmony_ci        sub             r2,  r2,  r3
1094cabdff1aSopenharmony_ci        sub             r2,  r2,  #1
1095cabdff1aSopenharmony_ci        push            {r4,lr}
1096cabdff1aSopenharmony_ci
1097cabdff1aSopenharmony_ci        @ first pass (horizontal):
1098cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1099cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1100cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1101cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1102cabdff1aSopenharmony_ci        sub             sp,  sp,  #168+16
1103cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1104cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1105cabdff1aSopenharmony_ci        add             r12, r12, #3
1106cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1107cabdff1aSopenharmony_ci1:
1108cabdff1aSopenharmony_ci        vld1.8          {d2,d3}, [r2], r3
1109cabdff1aSopenharmony_ci
1110cabdff1aSopenharmony_ci        vp8_epel8_h4    d2,  d2,  d3
1111cabdff1aSopenharmony_ci
1112cabdff1aSopenharmony_ci        vst1.8          {d2}, [lr,:64]!
1113cabdff1aSopenharmony_ci        subs            r12, r12, #1
1114cabdff1aSopenharmony_ci        bne             1b
1115cabdff1aSopenharmony_ci
1116cabdff1aSopenharmony_ci        @ second pass (vertical):
1117cabdff1aSopenharmony_ci        ldr             r4,  [sp, #168+16+16]   @ my
1118cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1119cabdff1aSopenharmony_ci        ldr             r12, [sp, #168+16+8]    @ h
1120cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1121cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1122cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1123cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1124cabdff1aSopenharmony_ci2:
1125cabdff1aSopenharmony_ci        vld1.8          {d2-d5},  [lr,:128]!
1126cabdff1aSopenharmony_ci        vld1.8          {d6},     [lr,:64]
1127cabdff1aSopenharmony_ci        sub             lr,  lr,  #16
1128cabdff1aSopenharmony_ci
1129cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 d2,  d3,  d2,  d3,  d4,  d5,  d6
1130cabdff1aSopenharmony_ci
1131cabdff1aSopenharmony_ci        vst1.8          {d2},     [r0,:64], r1
1132cabdff1aSopenharmony_ci        vst1.8          {d3},     [r0,:64], r1
1133cabdff1aSopenharmony_ci        subs            r12, r12, #2
1134cabdff1aSopenharmony_ci        bne             2b
1135cabdff1aSopenharmony_ci
1136cabdff1aSopenharmony_ci        add             sp,  sp,  #168+16
1137cabdff1aSopenharmony_ci        pop             {r4,pc}
1138cabdff1aSopenharmony_ciendfunc
1139cabdff1aSopenharmony_ci
1140cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h6v4_neon, export=1
1141cabdff1aSopenharmony_ci        sub             r2,  r2,  r3
1142cabdff1aSopenharmony_ci        sub             r2,  r2,  #2
1143cabdff1aSopenharmony_ci        push            {r4,lr}
1144cabdff1aSopenharmony_ci
1145cabdff1aSopenharmony_ci        @ first pass (horizontal):
1146cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1147cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1148cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1149cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1150cabdff1aSopenharmony_ci        sub             sp,  sp,  #168+16
1151cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1152cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1153cabdff1aSopenharmony_ci        add             r12, r12, #3
1154cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1155cabdff1aSopenharmony_ci1:
1156cabdff1aSopenharmony_ci        vld1.8          {d2,d3}, [r2], r3
1157cabdff1aSopenharmony_ci
1158cabdff1aSopenharmony_ci        vp8_epel8_h6    d2,  d2,  d3
1159cabdff1aSopenharmony_ci
1160cabdff1aSopenharmony_ci        vst1.8          {d2}, [lr,:64]!
1161cabdff1aSopenharmony_ci        subs            r12, r12, #1
1162cabdff1aSopenharmony_ci        bne             1b
1163cabdff1aSopenharmony_ci
1164cabdff1aSopenharmony_ci        @ second pass (vertical):
1165cabdff1aSopenharmony_ci        ldr             r4,  [sp, #168+16+16]   @ my
1166cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1167cabdff1aSopenharmony_ci        ldr             r12, [sp, #168+16+8]    @ h
1168cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1169cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1170cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1171cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1172cabdff1aSopenharmony_ci2:
1173cabdff1aSopenharmony_ci        vld1.8          {d2-d5},  [lr,:128]!
1174cabdff1aSopenharmony_ci        vld1.8          {d6},     [lr,:64]
1175cabdff1aSopenharmony_ci        sub             lr,  lr,  #16
1176cabdff1aSopenharmony_ci
1177cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 d2,  d3,  d2,  d3,  d4,  d5,  d6
1178cabdff1aSopenharmony_ci
1179cabdff1aSopenharmony_ci        vst1.8          {d2},     [r0,:64], r1
1180cabdff1aSopenharmony_ci        vst1.8          {d3},     [r0,:64], r1
1181cabdff1aSopenharmony_ci        subs            r12, r12, #2
1182cabdff1aSopenharmony_ci        bne             2b
1183cabdff1aSopenharmony_ci
1184cabdff1aSopenharmony_ci        add             sp,  sp,  #168+16
1185cabdff1aSopenharmony_ci        pop             {r4,pc}
1186cabdff1aSopenharmony_ciendfunc
1187cabdff1aSopenharmony_ci
1188cabdff1aSopenharmony_cifunction ff_put_vp8_epel8_h4v6_neon, export=1
1189cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
1190cabdff1aSopenharmony_ci        sub             r2,  r2,  #1
1191cabdff1aSopenharmony_ci        push            {r4,lr}
1192cabdff1aSopenharmony_ci
1193cabdff1aSopenharmony_ci        @ first pass (horizontal):
1194cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1195cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1196cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1197cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1198cabdff1aSopenharmony_ci        sub             sp,  sp,  #168+16
1199cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1200cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1201cabdff1aSopenharmony_ci        add             r12, r12, #5
1202cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1203cabdff1aSopenharmony_ci1:
1204cabdff1aSopenharmony_ci        vld1.8          {d2,d3}, [r2], r3
1205cabdff1aSopenharmony_ci
1206cabdff1aSopenharmony_ci        vp8_epel8_h4    d2,  d2,  d3
1207cabdff1aSopenharmony_ci
1208cabdff1aSopenharmony_ci        vst1.8          {d2}, [lr,:64]!
1209cabdff1aSopenharmony_ci        subs            r12, r12, #1
1210cabdff1aSopenharmony_ci        bne             1b
1211cabdff1aSopenharmony_ci
1212cabdff1aSopenharmony_ci        @ second pass (vertical):
1213cabdff1aSopenharmony_ci        ldr             r4,  [sp, #168+16+16]   @ my
1214cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1215cabdff1aSopenharmony_ci        ldr             r12, [sp, #168+16+8]    @ h
1216cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1217cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1218cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1219cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1220cabdff1aSopenharmony_ci2:
1221cabdff1aSopenharmony_ci        vld1.8          {d2-d5},  [lr,:128]!
1222cabdff1aSopenharmony_ci        vld1.8          {d6-d7},  [lr,:128]!
1223cabdff1aSopenharmony_ci        vld1.8          {d30},    [lr,:64]
1224cabdff1aSopenharmony_ci        sub             lr,  lr,  #32
1225cabdff1aSopenharmony_ci
1226cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d2,  d3,  d2,  d3,  d4,  d5,  d6,  d7,  d30
1227cabdff1aSopenharmony_ci
1228cabdff1aSopenharmony_ci        vst1.8          {d2}, [r0,:64], r1
1229cabdff1aSopenharmony_ci        vst1.8          {d3}, [r0,:64], r1
1230cabdff1aSopenharmony_ci        subs            r12, r12, #2
1231cabdff1aSopenharmony_ci        bne             2b
1232cabdff1aSopenharmony_ci
1233cabdff1aSopenharmony_ci        add             sp,  sp,  #168+16
1234cabdff1aSopenharmony_ci        pop             {r4,pc}
1235cabdff1aSopenharmony_ciendfunc
1236cabdff1aSopenharmony_ci
1237cabdff1aSopenharmony_ci.ltorg
1238cabdff1aSopenharmony_ci
1239cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_v6_neon, export=1
1240cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
1241cabdff1aSopenharmony_ci        push            {r4,lr}
1242cabdff1aSopenharmony_ci
1243cabdff1aSopenharmony_ci        ldr             r4,  [sp, #16]          @ my
1244cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1245cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1246cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1247cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1248cabdff1aSopenharmony_ci1:
1249cabdff1aSopenharmony_ci        vld1.32         {d2[]},   [r2], r3
1250cabdff1aSopenharmony_ci        vld1.32         {d3[]},   [r2], r3
1251cabdff1aSopenharmony_ci        vld1.32         {d4[]},   [r2], r3
1252cabdff1aSopenharmony_ci        vld1.32         {d5[]},   [r2], r3
1253cabdff1aSopenharmony_ci        vld1.32         {d6[]},   [r2], r3
1254cabdff1aSopenharmony_ci        vld1.32         {d7[]},   [r2], r3
1255cabdff1aSopenharmony_ci        vld1.32         {d28[]},  [r2]
1256cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #2
1257cabdff1aSopenharmony_ci        vld1.32         {d2[1]},  [r2], r3
1258cabdff1aSopenharmony_ci        vld1.32         {d3[1]},  [r2], r3
1259cabdff1aSopenharmony_ci        vld1.32         {d4[1]},  [r2], r3
1260cabdff1aSopenharmony_ci        vld1.32         {d5[1]},  [r2], r3
1261cabdff1aSopenharmony_ci        vld1.32         {d6[1]},  [r2], r3
1262cabdff1aSopenharmony_ci        vld1.32         {d7[1]},  [r2], r3
1263cabdff1aSopenharmony_ci        vld1.32         {d28[1]}, [r2]
1264cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #2
1265cabdff1aSopenharmony_ci
1266cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d2,  d3,  d2,  d3,  d4,  d5,  d6,  d7,  d28
1267cabdff1aSopenharmony_ci
1268cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [r0,:32], r1
1269cabdff1aSopenharmony_ci        vst1.32         {d3[0]},  [r0,:32], r1
1270cabdff1aSopenharmony_ci        vst1.32         {d2[1]},  [r0,:32], r1
1271cabdff1aSopenharmony_ci        vst1.32         {d3[1]},  [r0,:32], r1
1272cabdff1aSopenharmony_ci        subs            r12, r12, #4
1273cabdff1aSopenharmony_ci        bne             1b
1274cabdff1aSopenharmony_ci
1275cabdff1aSopenharmony_ci        pop             {r4,pc}
1276cabdff1aSopenharmony_ciendfunc
1277cabdff1aSopenharmony_ci
1278cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6_neon, export=1
1279cabdff1aSopenharmony_ci        sub             r2,  r2,  #2
1280cabdff1aSopenharmony_ci        push            {r4,lr}
1281cabdff1aSopenharmony_ci
1282cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1283cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1284cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1285cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1286cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1287cabdff1aSopenharmony_ci1:
1288cabdff1aSopenharmony_ci        vld1.8          {q1},     [r2], r3
1289cabdff1aSopenharmony_ci        vp8_epel8_h6    d2,  d2,  d3
1290cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [r0,:32], r1
1291cabdff1aSopenharmony_ci        subs            r12, r12, #1
1292cabdff1aSopenharmony_ci        bne             1b
1293cabdff1aSopenharmony_ci
1294cabdff1aSopenharmony_ci        pop             {r4,pc}
1295cabdff1aSopenharmony_ciendfunc
1296cabdff1aSopenharmony_ci
1297cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6v6_neon, export=1
1298cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
1299cabdff1aSopenharmony_ci        sub             r2,  r2,  #2
1300cabdff1aSopenharmony_ci        push            {r4,lr}
1301cabdff1aSopenharmony_ci
1302cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1303cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1304cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1305cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1306cabdff1aSopenharmony_ci        sub             sp,  sp,  #52+16
1307cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1308cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1309cabdff1aSopenharmony_ci        add             r12, r12, #5
1310cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1311cabdff1aSopenharmony_ci1:
1312cabdff1aSopenharmony_ci        vld1.8          {q1},     [r2], r3
1313cabdff1aSopenharmony_ci        vp8_epel8_h6    d2,  d2,  d3
1314cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [lr,:32]!
1315cabdff1aSopenharmony_ci        subs            r12, r12, #1
1316cabdff1aSopenharmony_ci        bne             1b
1317cabdff1aSopenharmony_ci
1318cabdff1aSopenharmony_ci        ldr             r4,  [sp, #52+16+16]    @ my
1319cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1320cabdff1aSopenharmony_ci        ldr             r12, [sp, #52+16+8]     @ h
1321cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1322cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1323cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1324cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1325cabdff1aSopenharmony_ci2:
1326cabdff1aSopenharmony_ci        vld1.8          {d2-d3},  [lr,:128]!
1327cabdff1aSopenharmony_ci        vld1.8          {d6},     [lr,:64]!
1328cabdff1aSopenharmony_ci        vld1.32         {d28[]},  [lr,:32]
1329cabdff1aSopenharmony_ci        sub             lr,  lr,  #16
1330cabdff1aSopenharmony_ci        vld1.8          {d4-d5},  [lr]!
1331cabdff1aSopenharmony_ci        vld1.8          {d7},     [lr,:64]!
1332cabdff1aSopenharmony_ci        vld1.32         {d28[1]}, [lr,:32]
1333cabdff1aSopenharmony_ci        sub             lr,  lr,  #16
1334cabdff1aSopenharmony_ci        vtrn.32         q1,  q2
1335cabdff1aSopenharmony_ci        vtrn.32         d6,  d7
1336cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d2,  d3,  d2,  d4,  d3,  d5,  d6,  d7,  d28
1337cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [r0,:32], r1
1338cabdff1aSopenharmony_ci        vst1.32         {d3[0]},  [r0,:32], r1
1339cabdff1aSopenharmony_ci        vst1.32         {d2[1]},  [r0,:32], r1
1340cabdff1aSopenharmony_ci        vst1.32         {d3[1]},  [r0,:32], r1
1341cabdff1aSopenharmony_ci        subs            r12, r12, #4
1342cabdff1aSopenharmony_ci        bne             2b
1343cabdff1aSopenharmony_ci
1344cabdff1aSopenharmony_ci        add             sp,  sp,  #52+16
1345cabdff1aSopenharmony_ci        pop             {r4,pc}
1346cabdff1aSopenharmony_ciendfunc
1347cabdff1aSopenharmony_ci
1348cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4v6_neon, export=1
1349cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
1350cabdff1aSopenharmony_ci        sub             r2,  r2,  #1
1351cabdff1aSopenharmony_ci        push            {r4,lr}
1352cabdff1aSopenharmony_ci
1353cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1354cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1355cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1356cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1357cabdff1aSopenharmony_ci        sub             sp,  sp,  #52+16
1358cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1359cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1360cabdff1aSopenharmony_ci        add             r12, r12, #5
1361cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1362cabdff1aSopenharmony_ci1:
1363cabdff1aSopenharmony_ci        vld1.8          {d2},     [r2], r3
1364cabdff1aSopenharmony_ci        vp8_epel8_h4    d2,  d2,  d2
1365cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [lr,:32]!
1366cabdff1aSopenharmony_ci        subs            r12, r12, #1
1367cabdff1aSopenharmony_ci        bne             1b
1368cabdff1aSopenharmony_ci
1369cabdff1aSopenharmony_ci        ldr             r4,  [sp, #52+16+16]    @ my
1370cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1371cabdff1aSopenharmony_ci        ldr             r12, [sp, #52+16+8]     @ h
1372cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1373cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1374cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1375cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1376cabdff1aSopenharmony_ci2:
1377cabdff1aSopenharmony_ci        vld1.8          {d2-d3},  [lr,:128]!
1378cabdff1aSopenharmony_ci        vld1.8          {d6},     [lr,:64]!
1379cabdff1aSopenharmony_ci        vld1.32         {d28[]},  [lr,:32]
1380cabdff1aSopenharmony_ci        sub             lr,  lr,  #16
1381cabdff1aSopenharmony_ci        vld1.8          {d4-d5},  [lr]!
1382cabdff1aSopenharmony_ci        vld1.8          {d7},     [lr,:64]!
1383cabdff1aSopenharmony_ci        vld1.32         {d28[1]}, [lr,:32]
1384cabdff1aSopenharmony_ci        sub             lr,  lr,  #16
1385cabdff1aSopenharmony_ci        vtrn.32         q1,  q2
1386cabdff1aSopenharmony_ci        vtrn.32         d6,  d7
1387cabdff1aSopenharmony_ci        vp8_epel8_v6_y2 d2,  d3,  d2,  d4,  d3,  d5,  d6,  d7,  d28
1388cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [r0,:32], r1
1389cabdff1aSopenharmony_ci        vst1.32         {d3[0]},  [r0,:32], r1
1390cabdff1aSopenharmony_ci        vst1.32         {d2[1]},  [r0,:32], r1
1391cabdff1aSopenharmony_ci        vst1.32         {d3[1]},  [r0,:32], r1
1392cabdff1aSopenharmony_ci        subs            r12, r12, #4
1393cabdff1aSopenharmony_ci        bne             2b
1394cabdff1aSopenharmony_ci
1395cabdff1aSopenharmony_ci        add             sp,  sp,  #52+16
1396cabdff1aSopenharmony_ci        pop             {r4,pc}
1397cabdff1aSopenharmony_ciendfunc
1398cabdff1aSopenharmony_ci
1399cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h6v4_neon, export=1
1400cabdff1aSopenharmony_ci        sub             r2,  r2,  r3
1401cabdff1aSopenharmony_ci        sub             r2,  r2,  #2
1402cabdff1aSopenharmony_ci        push            {r4,lr}
1403cabdff1aSopenharmony_ci
1404cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1405cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1406cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1407cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1408cabdff1aSopenharmony_ci        sub             sp,  sp,  #44+16
1409cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1410cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1411cabdff1aSopenharmony_ci        add             r12, r12, #3
1412cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1413cabdff1aSopenharmony_ci1:
1414cabdff1aSopenharmony_ci        vld1.8          {q1},     [r2], r3
1415cabdff1aSopenharmony_ci        vp8_epel8_h6    d2,  d2,  d3
1416cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [lr,:32]!
1417cabdff1aSopenharmony_ci        subs            r12, r12, #1
1418cabdff1aSopenharmony_ci        bne             1b
1419cabdff1aSopenharmony_ci
1420cabdff1aSopenharmony_ci        ldr             r4,  [sp, #44+16+16]    @ my
1421cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1422cabdff1aSopenharmony_ci        ldr             r12, [sp, #44+16+8]     @ h
1423cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1424cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1425cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1426cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1427cabdff1aSopenharmony_ci2:
1428cabdff1aSopenharmony_ci        vld1.8          {d2-d3},  [lr,:128]!
1429cabdff1aSopenharmony_ci        vld1.32         {d6[]},   [lr,:32]
1430cabdff1aSopenharmony_ci        sub             lr,  lr,  #8
1431cabdff1aSopenharmony_ci        vld1.8          {d4-d5},  [lr]!
1432cabdff1aSopenharmony_ci        vld1.32         {d6[1]},  [lr,:32]
1433cabdff1aSopenharmony_ci        sub             lr,  lr,  #8
1434cabdff1aSopenharmony_ci        vtrn.32         q1,  q2
1435cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 d2,  d3,  d2,  d4,  d3,  d5,  d6
1436cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [r0,:32], r1
1437cabdff1aSopenharmony_ci        vst1.32         {d3[0]},  [r0,:32], r1
1438cabdff1aSopenharmony_ci        vst1.32         {d2[1]},  [r0,:32], r1
1439cabdff1aSopenharmony_ci        vst1.32         {d3[1]},  [r0,:32], r1
1440cabdff1aSopenharmony_ci        subs            r12, r12, #4
1441cabdff1aSopenharmony_ci        bne             2b
1442cabdff1aSopenharmony_ci
1443cabdff1aSopenharmony_ci        add             sp,  sp,  #44+16
1444cabdff1aSopenharmony_ci        pop             {r4,pc}
1445cabdff1aSopenharmony_ciendfunc
1446cabdff1aSopenharmony_ci
1447cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4_neon, export=1
1448cabdff1aSopenharmony_ci        sub             r2,  r2,  #1
1449cabdff1aSopenharmony_ci        push            {r4,lr}
1450cabdff1aSopenharmony_ci
1451cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1452cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1453cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1454cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1455cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1456cabdff1aSopenharmony_ci1:
1457cabdff1aSopenharmony_ci        vld1.8          {d2},     [r2], r3
1458cabdff1aSopenharmony_ci        vp8_epel8_h4    d2,  d2,  d2
1459cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [r0,:32], r1
1460cabdff1aSopenharmony_ci        subs            r12, r12, #1
1461cabdff1aSopenharmony_ci        bne             1b
1462cabdff1aSopenharmony_ci
1463cabdff1aSopenharmony_ci        pop             {r4,pc}
1464cabdff1aSopenharmony_ciendfunc
1465cabdff1aSopenharmony_ci
1466cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_v4_neon, export=1
1467cabdff1aSopenharmony_ci        sub             r2,  r2,  r3
1468cabdff1aSopenharmony_ci        push            {r4,lr}
1469cabdff1aSopenharmony_ci
1470cabdff1aSopenharmony_ci        ldr             r4,  [sp, #16]          @ my
1471cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1472cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1473cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1474cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1475cabdff1aSopenharmony_ci1:
1476cabdff1aSopenharmony_ci        vld1.32         {d2[]},   [r2], r3
1477cabdff1aSopenharmony_ci        vld1.32         {d3[]},   [r2], r3
1478cabdff1aSopenharmony_ci        vld1.32         {d4[]},   [r2], r3
1479cabdff1aSopenharmony_ci        vld1.32         {d5[]},   [r2], r3
1480cabdff1aSopenharmony_ci        vld1.32         {d6[]},   [r2]
1481cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
1482cabdff1aSopenharmony_ci        vld1.32         {d2[1]},  [r2], r3
1483cabdff1aSopenharmony_ci        vld1.32         {d3[1]},  [r2], r3
1484cabdff1aSopenharmony_ci        vld1.32         {d4[1]},  [r2], r3
1485cabdff1aSopenharmony_ci        vld1.32         {d5[1]},  [r2], r3
1486cabdff1aSopenharmony_ci        vld1.32         {d6[1]},  [r2]
1487cabdff1aSopenharmony_ci        sub             r2,  r2,  r3,  lsl #1
1488cabdff1aSopenharmony_ci
1489cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 d2,  d3,  d2,  d3,  d4,  d5,  d6
1490cabdff1aSopenharmony_ci
1491cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [r0,:32], r1
1492cabdff1aSopenharmony_ci        vst1.32         {d3[0]},  [r0,:32], r1
1493cabdff1aSopenharmony_ci        vst1.32         {d2[1]},  [r0,:32], r1
1494cabdff1aSopenharmony_ci        vst1.32         {d3[1]},  [r0,:32], r1
1495cabdff1aSopenharmony_ci        subs            r12, r12, #4
1496cabdff1aSopenharmony_ci        bne             1b
1497cabdff1aSopenharmony_ci
1498cabdff1aSopenharmony_ci        pop             {r4,pc}
1499cabdff1aSopenharmony_ciendfunc
1500cabdff1aSopenharmony_ci
1501cabdff1aSopenharmony_cifunction ff_put_vp8_epel4_h4v4_neon, export=1
1502cabdff1aSopenharmony_ci        sub             r2,  r2,  r3
1503cabdff1aSopenharmony_ci        sub             r2,  r2,  #1
1504cabdff1aSopenharmony_ci        push            {r4,lr}
1505cabdff1aSopenharmony_ci
1506cabdff1aSopenharmony_ci        ldr             r4,  [sp, #12]          @ mx
1507cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1508cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ h
1509cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1510cabdff1aSopenharmony_ci        sub             sp,  sp,  #44+16
1511cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1512cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1513cabdff1aSopenharmony_ci        add             r12, r12, #3
1514cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1515cabdff1aSopenharmony_ci1:
1516cabdff1aSopenharmony_ci        vld1.8          {d2},     [r2], r3
1517cabdff1aSopenharmony_ci        vp8_epel8_h4    d2,  d2,  d3
1518cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [lr,:32]!
1519cabdff1aSopenharmony_ci        subs            r12, r12, #1
1520cabdff1aSopenharmony_ci        bne             1b
1521cabdff1aSopenharmony_ci
1522cabdff1aSopenharmony_ci        ldr             r4,  [sp, #44+16+16]    @ my
1523cabdff1aSopenharmony_ci        movrel          lr,  subpel_filters-16
1524cabdff1aSopenharmony_ci        ldr             r12, [sp, #44+16+8]     @ h
1525cabdff1aSopenharmony_ci        add             r4,  lr,  r4, lsl #4
1526cabdff1aSopenharmony_ci        add             lr,  sp,  #15
1527cabdff1aSopenharmony_ci        vld1.16         {q0},     [r4,:128]
1528cabdff1aSopenharmony_ci        bic             lr,  lr,  #15
1529cabdff1aSopenharmony_ci2:
1530cabdff1aSopenharmony_ci        vld1.8          {d2-d3},  [lr,:128]!
1531cabdff1aSopenharmony_ci        vld1.32         {d6[]},   [lr,:32]
1532cabdff1aSopenharmony_ci        sub             lr,  lr,  #8
1533cabdff1aSopenharmony_ci        vld1.8          {d4-d5},  [lr]!
1534cabdff1aSopenharmony_ci        vld1.32         {d6[1]},  [lr,:32]
1535cabdff1aSopenharmony_ci        sub             lr,  lr,  #8
1536cabdff1aSopenharmony_ci        vtrn.32         q1,  q2
1537cabdff1aSopenharmony_ci        vp8_epel8_v4_y2 d2,  d3,  d2,  d4,  d3,  d5,  d6
1538cabdff1aSopenharmony_ci        vst1.32         {d2[0]},  [r0,:32], r1
1539cabdff1aSopenharmony_ci        vst1.32         {d3[0]},  [r0,:32], r1
1540cabdff1aSopenharmony_ci        vst1.32         {d2[1]},  [r0,:32], r1
1541cabdff1aSopenharmony_ci        vst1.32         {d3[1]},  [r0,:32], r1
1542cabdff1aSopenharmony_ci        subs            r12, r12, #4
1543cabdff1aSopenharmony_ci        bne             2b
1544cabdff1aSopenharmony_ci
1545cabdff1aSopenharmony_ci        add             sp,  sp,  #44+16
1546cabdff1aSopenharmony_ci        pop             {r4,pc}
1547cabdff1aSopenharmony_ciendfunc
1548cabdff1aSopenharmony_ci
1549cabdff1aSopenharmony_ci@ note: worst case sum of all 6-tap filter values * 255 is 0x7f80 so 16 bit
1550cabdff1aSopenharmony_ci@ arithmetic can be used to apply filters
1551cabdff1aSopenharmony_ciconst   subpel_filters, align=4
1552cabdff1aSopenharmony_ci        .short     0,   6, 123,  12,   1,   0,   0,   0
1553cabdff1aSopenharmony_ci        .short     2,  11, 108,  36,   8,   1,   0,   0
1554cabdff1aSopenharmony_ci        .short     0,   9,  93,  50,   6,   0,   0,   0
1555cabdff1aSopenharmony_ci        .short     3,  16,  77,  77,  16,   3,   0,   0
1556cabdff1aSopenharmony_ci        .short     0,   6,  50,  93,   9,   0,   0,   0
1557cabdff1aSopenharmony_ci        .short     1,   8,  36, 108,  11,   2,   0,   0
1558cabdff1aSopenharmony_ci        .short     0,   1,  12, 123,   6,   0,   0,   0
1559cabdff1aSopenharmony_ciendconst
1560cabdff1aSopenharmony_ci
1561cabdff1aSopenharmony_ci/* Bilinear MC */
1562cabdff1aSopenharmony_ci
1563cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_h_neon, export=1
1564cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]           @ mx
1565cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1566cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1567cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1568cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1569cabdff1aSopenharmony_ci1:
1570cabdff1aSopenharmony_ci        subs            r12, r12, #2
1571cabdff1aSopenharmony_ci        vld1.8          {d2-d4},  [r2], r3
1572cabdff1aSopenharmony_ci        vext.8          q2,  q1,  q2,  #1
1573cabdff1aSopenharmony_ci        vmull.u8        q8,  d2,  d1
1574cabdff1aSopenharmony_ci        vmlal.u8        q8,  d4,  d0
1575cabdff1aSopenharmony_ci        vld1.8          {d18-d20},[r2], r3
1576cabdff1aSopenharmony_ci        vmull.u8        q3,  d3,  d1
1577cabdff1aSopenharmony_ci        vmlal.u8        q3,  d5,  d0
1578cabdff1aSopenharmony_ci        vext.8          q10, q9,  q10, #1
1579cabdff1aSopenharmony_ci        vmull.u8        q11, d18, d1
1580cabdff1aSopenharmony_ci        vmlal.u8        q11, d20, d0
1581cabdff1aSopenharmony_ci        vmull.u8        q12, d19, d1
1582cabdff1aSopenharmony_ci        vmlal.u8        q12, d21, d0
1583cabdff1aSopenharmony_ci        vrshrn.u16      d4,  q8,  #3
1584cabdff1aSopenharmony_ci        vrshrn.u16      d5,  q3,  #3
1585cabdff1aSopenharmony_ci        vrshrn.u16      d6,  q11, #3
1586cabdff1aSopenharmony_ci        vrshrn.u16      d7,  q12, #3
1587cabdff1aSopenharmony_ci        vst1.8          {q2},     [r0,:128], r1
1588cabdff1aSopenharmony_ci        vst1.8          {q3},     [r0,:128], r1
1589cabdff1aSopenharmony_ci        bgt             1b
1590cabdff1aSopenharmony_ci
1591cabdff1aSopenharmony_ci        bx              lr
1592cabdff1aSopenharmony_ciendfunc
1593cabdff1aSopenharmony_ci
1594cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_v_neon, export=1
1595cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ my
1596cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1597cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1598cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1599cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1600cabdff1aSopenharmony_ci        vld1.8          {q1},     [r2], r3
1601cabdff1aSopenharmony_ci1:
1602cabdff1aSopenharmony_ci        subs            r12, r12, #2
1603cabdff1aSopenharmony_ci        vld1.8          {q2},     [r2], r3
1604cabdff1aSopenharmony_ci        vmull.u8        q3,  d2,  d1
1605cabdff1aSopenharmony_ci        vmlal.u8        q3,  d4,  d0
1606cabdff1aSopenharmony_ci        vmull.u8        q8,  d3,  d1
1607cabdff1aSopenharmony_ci        vmlal.u8        q8,  d5,  d0
1608cabdff1aSopenharmony_ci        vld1.8          {q1},     [r2], r3
1609cabdff1aSopenharmony_ci        vmull.u8        q9,  d4,  d1
1610cabdff1aSopenharmony_ci        vmlal.u8        q9,  d2,  d0
1611cabdff1aSopenharmony_ci        vmull.u8        q10, d5,  d1
1612cabdff1aSopenharmony_ci        vmlal.u8        q10, d3,  d0
1613cabdff1aSopenharmony_ci        vrshrn.u16      d4,  q3,  #3
1614cabdff1aSopenharmony_ci        vrshrn.u16      d5,  q8,  #3
1615cabdff1aSopenharmony_ci        vrshrn.u16      d6,  q9,  #3
1616cabdff1aSopenharmony_ci        vrshrn.u16      d7,  q10, #3
1617cabdff1aSopenharmony_ci        vst1.8          {q2},     [r0,:128], r1
1618cabdff1aSopenharmony_ci        vst1.8          {q3},     [r0,:128], r1
1619cabdff1aSopenharmony_ci        bgt             1b
1620cabdff1aSopenharmony_ci
1621cabdff1aSopenharmony_ci        bx              lr
1622cabdff1aSopenharmony_ciendfunc
1623cabdff1aSopenharmony_ci
1624cabdff1aSopenharmony_cifunction ff_put_vp8_bilin16_hv_neon, export=1
1625cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]           @ mx
1626cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1627cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1628cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1629cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ my
1630cabdff1aSopenharmony_ci        vdup.8          d2,  r12
1631cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1632cabdff1aSopenharmony_ci        vdup.8          d3,  r12
1633cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1634cabdff1aSopenharmony_ci
1635cabdff1aSopenharmony_ci        vld1.8          {d4-d6},  [r2], r3
1636cabdff1aSopenharmony_ci        vext.8          q3,  q2,  q3,  #1
1637cabdff1aSopenharmony_ci        vmull.u8        q8,  d4,  d1
1638cabdff1aSopenharmony_ci        vmlal.u8        q8,  d6,  d0
1639cabdff1aSopenharmony_ci        vmull.u8        q9,  d5,  d1
1640cabdff1aSopenharmony_ci        vmlal.u8        q9,  d7,  d0
1641cabdff1aSopenharmony_ci        vrshrn.u16      d4,  q8,  #3
1642cabdff1aSopenharmony_ci        vrshrn.u16      d5,  q9,  #3
1643cabdff1aSopenharmony_ci1:
1644cabdff1aSopenharmony_ci        subs            r12, r12, #2
1645cabdff1aSopenharmony_ci        vld1.8          {d18-d20},[r2], r3
1646cabdff1aSopenharmony_ci        vext.8          q10, q9,  q10, #1
1647cabdff1aSopenharmony_ci        vmull.u8        q11, d18, d1
1648cabdff1aSopenharmony_ci        vmlal.u8        q11, d20, d0
1649cabdff1aSopenharmony_ci        vld1.8          {d26-d28},[r2], r3
1650cabdff1aSopenharmony_ci        vmull.u8        q12, d19, d1
1651cabdff1aSopenharmony_ci        vmlal.u8        q12, d21, d0
1652cabdff1aSopenharmony_ci        vext.8          q14, q13, q14, #1
1653cabdff1aSopenharmony_ci        vmull.u8        q8,  d26, d1
1654cabdff1aSopenharmony_ci        vmlal.u8        q8,  d28, d0
1655cabdff1aSopenharmony_ci        vmull.u8        q9,  d27, d1
1656cabdff1aSopenharmony_ci        vmlal.u8        q9,  d29, d0
1657cabdff1aSopenharmony_ci        vrshrn.u16      d6,  q11, #3
1658cabdff1aSopenharmony_ci        vrshrn.u16      d7,  q12, #3
1659cabdff1aSopenharmony_ci        vmull.u8        q12, d4,  d3
1660cabdff1aSopenharmony_ci        vmlal.u8        q12, d6,  d2
1661cabdff1aSopenharmony_ci        vmull.u8        q15, d5,  d3
1662cabdff1aSopenharmony_ci        vmlal.u8        q15, d7,  d2
1663cabdff1aSopenharmony_ci        vrshrn.u16      d4,  q8,  #3
1664cabdff1aSopenharmony_ci        vrshrn.u16      d5,  q9,  #3
1665cabdff1aSopenharmony_ci        vmull.u8        q10, d6,  d3
1666cabdff1aSopenharmony_ci        vmlal.u8        q10, d4,  d2
1667cabdff1aSopenharmony_ci        vmull.u8        q11, d7,  d3
1668cabdff1aSopenharmony_ci        vmlal.u8        q11, d5,  d2
1669cabdff1aSopenharmony_ci        vrshrn.u16      d24, q12, #3
1670cabdff1aSopenharmony_ci        vrshrn.u16      d25, q15, #3
1671cabdff1aSopenharmony_ci        vst1.8          {q12},    [r0,:128], r1
1672cabdff1aSopenharmony_ci        vrshrn.u16      d20, q10, #3
1673cabdff1aSopenharmony_ci        vrshrn.u16      d21, q11, #3
1674cabdff1aSopenharmony_ci        vst1.8          {q10},    [r0,:128], r1
1675cabdff1aSopenharmony_ci        bgt             1b
1676cabdff1aSopenharmony_ci
1677cabdff1aSopenharmony_ci        bx              lr
1678cabdff1aSopenharmony_ciendfunc
1679cabdff1aSopenharmony_ci
1680cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_h_neon, export=1
1681cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]           @ mx
1682cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1683cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1684cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1685cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1686cabdff1aSopenharmony_ci1:
1687cabdff1aSopenharmony_ci        subs            r12, r12, #2
1688cabdff1aSopenharmony_ci        vld1.8          {q1},     [r2], r3
1689cabdff1aSopenharmony_ci        vext.8          d3,  d2,  d3,  #1
1690cabdff1aSopenharmony_ci        vmull.u8        q2,  d2,  d1
1691cabdff1aSopenharmony_ci        vmlal.u8        q2,  d3,  d0
1692cabdff1aSopenharmony_ci        vld1.8          {q3},     [r2], r3
1693cabdff1aSopenharmony_ci        vext.8          d7,  d6,  d7,  #1
1694cabdff1aSopenharmony_ci        vmull.u8        q8,  d6,  d1
1695cabdff1aSopenharmony_ci        vmlal.u8        q8,  d7,  d0
1696cabdff1aSopenharmony_ci        vrshrn.u16      d4,  q2,  #3
1697cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #3
1698cabdff1aSopenharmony_ci        vst1.8          {d4},     [r0,:64], r1
1699cabdff1aSopenharmony_ci        vst1.8          {d16},    [r0,:64], r1
1700cabdff1aSopenharmony_ci        bgt             1b
1701cabdff1aSopenharmony_ci
1702cabdff1aSopenharmony_ci        bx              lr
1703cabdff1aSopenharmony_ciendfunc
1704cabdff1aSopenharmony_ci
1705cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_v_neon, export=1
1706cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ my
1707cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1708cabdff1aSopenharmony_ci        rsb             r12, r12,  #8
1709cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1710cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1711cabdff1aSopenharmony_ci        vld1.8          {d2},     [r2], r3
1712cabdff1aSopenharmony_ci1:
1713cabdff1aSopenharmony_ci        subs            r12, r12, #2
1714cabdff1aSopenharmony_ci        vld1.8          {d3},     [r2], r3
1715cabdff1aSopenharmony_ci        vmull.u8        q2,  d2,  d1
1716cabdff1aSopenharmony_ci        vmlal.u8        q2,  d3,  d0
1717cabdff1aSopenharmony_ci        vld1.8          {d2},     [r2], r3
1718cabdff1aSopenharmony_ci        vmull.u8        q3,  d3,  d1
1719cabdff1aSopenharmony_ci        vmlal.u8        q3,  d2,  d0
1720cabdff1aSopenharmony_ci        vrshrn.u16      d4,  q2,  #3
1721cabdff1aSopenharmony_ci        vrshrn.u16      d6,  q3,  #3
1722cabdff1aSopenharmony_ci        vst1.8          {d4},     [r0,:64], r1
1723cabdff1aSopenharmony_ci        vst1.8          {d6},     [r0,:64], r1
1724cabdff1aSopenharmony_ci        bgt             1b
1725cabdff1aSopenharmony_ci
1726cabdff1aSopenharmony_ci        bx              lr
1727cabdff1aSopenharmony_ciendfunc
1728cabdff1aSopenharmony_ci
1729cabdff1aSopenharmony_cifunction ff_put_vp8_bilin8_hv_neon, export=1
1730cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]           @ mx
1731cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1732cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1733cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1734cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ my
1735cabdff1aSopenharmony_ci        vdup.8          d2,  r12
1736cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1737cabdff1aSopenharmony_ci        vdup.8          d3,  r12
1738cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1739cabdff1aSopenharmony_ci
1740cabdff1aSopenharmony_ci        vld1.8          {q2},     [r2], r3
1741cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d5,  #1
1742cabdff1aSopenharmony_ci        vmull.u8        q9,  d4,  d1
1743cabdff1aSopenharmony_ci        vmlal.u8        q9,  d5,  d0
1744cabdff1aSopenharmony_ci        vrshrn.u16      d22, q9,  #3
1745cabdff1aSopenharmony_ci1:
1746cabdff1aSopenharmony_ci        subs            r12, r12, #2
1747cabdff1aSopenharmony_ci        vld1.8          {q3},     [r2], r3
1748cabdff1aSopenharmony_ci        vext.8          d7,  d6,  d7,  #1
1749cabdff1aSopenharmony_ci        vmull.u8        q8,  d6,  d1
1750cabdff1aSopenharmony_ci        vmlal.u8        q8,  d7,  d0
1751cabdff1aSopenharmony_ci        vld1.8          {q2},     [r2], r3
1752cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d5,  #1
1753cabdff1aSopenharmony_ci        vmull.u8        q9,  d4,  d1
1754cabdff1aSopenharmony_ci        vmlal.u8        q9,  d5,  d0
1755cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #3
1756cabdff1aSopenharmony_ci        vmull.u8        q10, d22, d3
1757cabdff1aSopenharmony_ci        vmlal.u8        q10, d16, d2
1758cabdff1aSopenharmony_ci        vrshrn.u16      d22, q9,  #3
1759cabdff1aSopenharmony_ci        vmull.u8        q12, d16, d3
1760cabdff1aSopenharmony_ci        vmlal.u8        q12, d22, d2
1761cabdff1aSopenharmony_ci        vrshrn.u16      d20, q10, #3
1762cabdff1aSopenharmony_ci        vst1.8          {d20},    [r0,:64], r1
1763cabdff1aSopenharmony_ci        vrshrn.u16      d23, q12, #3
1764cabdff1aSopenharmony_ci        vst1.8          {d23},    [r0,:64], r1
1765cabdff1aSopenharmony_ci        bgt             1b
1766cabdff1aSopenharmony_ci
1767cabdff1aSopenharmony_ci        bx              lr
1768cabdff1aSopenharmony_ciendfunc
1769cabdff1aSopenharmony_ci
1770cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_h_neon, export=1
1771cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]           @ mx
1772cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1773cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1774cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1775cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1776cabdff1aSopenharmony_ci1:
1777cabdff1aSopenharmony_ci        subs            r12, r12, #2
1778cabdff1aSopenharmony_ci        vld1.8          {d2},     [r2], r3
1779cabdff1aSopenharmony_ci        vext.8          d3,  d2,  d3,  #1
1780cabdff1aSopenharmony_ci        vld1.8          {d6},     [r2], r3
1781cabdff1aSopenharmony_ci        vext.8          d7,  d6,  d7,  #1
1782cabdff1aSopenharmony_ci        vtrn.32         q1,  q3
1783cabdff1aSopenharmony_ci        vmull.u8        q2,  d2,  d1
1784cabdff1aSopenharmony_ci        vmlal.u8        q2,  d3,  d0
1785cabdff1aSopenharmony_ci        vrshrn.u16      d4,  q2,  #3
1786cabdff1aSopenharmony_ci        vst1.32         {d4[0]},  [r0,:32], r1
1787cabdff1aSopenharmony_ci        vst1.32         {d4[1]}, [r0,:32], r1
1788cabdff1aSopenharmony_ci        bgt             1b
1789cabdff1aSopenharmony_ci
1790cabdff1aSopenharmony_ci        bx              lr
1791cabdff1aSopenharmony_ciendfunc
1792cabdff1aSopenharmony_ci
1793cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_v_neon, export=1
1794cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ my
1795cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1796cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1797cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1798cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1799cabdff1aSopenharmony_ci        vld1.32         {d2[]},   [r2], r3
1800cabdff1aSopenharmony_ci1:
1801cabdff1aSopenharmony_ci        vld1.32         {d3[]},   [r2]
1802cabdff1aSopenharmony_ci        vld1.32         {d2[1]},  [r2], r3
1803cabdff1aSopenharmony_ci        vld1.32         {d3[1]},  [r2], r3
1804cabdff1aSopenharmony_ci        vmull.u8        q2,  d2,  d1
1805cabdff1aSopenharmony_ci        vmlal.u8        q2,  d3,  d0
1806cabdff1aSopenharmony_ci        vtrn.32         d3,  d2
1807cabdff1aSopenharmony_ci        vrshrn.u16      d4,  q2,  #3
1808cabdff1aSopenharmony_ci        vst1.32         {d4[0]},  [r0,:32], r1
1809cabdff1aSopenharmony_ci        vst1.32         {d4[1]},  [r0,:32], r1
1810cabdff1aSopenharmony_ci        subs            r12, r12, #2
1811cabdff1aSopenharmony_ci        bgt             1b
1812cabdff1aSopenharmony_ci
1813cabdff1aSopenharmony_ci        bx              lr
1814cabdff1aSopenharmony_ciendfunc
1815cabdff1aSopenharmony_ci
1816cabdff1aSopenharmony_cifunction ff_put_vp8_bilin4_hv_neon, export=1
1817cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]           @ mx
1818cabdff1aSopenharmony_ci        vdup.8          d0,  r12
1819cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1820cabdff1aSopenharmony_ci        vdup.8          d1,  r12
1821cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]           @ my
1822cabdff1aSopenharmony_ci        vdup.8          d2,  r12
1823cabdff1aSopenharmony_ci        rsb             r12, r12, #8
1824cabdff1aSopenharmony_ci        vdup.8          d3,  r12
1825cabdff1aSopenharmony_ci        ldr             r12, [sp]               @ h
1826cabdff1aSopenharmony_ci
1827cabdff1aSopenharmony_ci        vld1.8          {d4},     [r2], r3
1828cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d4,  #1
1829cabdff1aSopenharmony_ci        vmull.u8        q9,  d4,  d1
1830cabdff1aSopenharmony_ci        vmlal.u8        q9,  d5,  d0
1831cabdff1aSopenharmony_ci        vrshrn.u16      d22, q9,  #3
1832cabdff1aSopenharmony_ci1:
1833cabdff1aSopenharmony_ci        subs            r12, r12, #2
1834cabdff1aSopenharmony_ci        vld1.8          {d6},     [r2], r3
1835cabdff1aSopenharmony_ci        vext.8          d7,  d6,  d6,  #1
1836cabdff1aSopenharmony_ci        vld1.8          {d4},     [r2], r3
1837cabdff1aSopenharmony_ci        vext.8          d5,  d4,  d4,  #1
1838cabdff1aSopenharmony_ci        vtrn.32         q3,  q2
1839cabdff1aSopenharmony_ci        vmull.u8        q8,  d6,  d1
1840cabdff1aSopenharmony_ci        vmlal.u8        q8,  d7,  d0
1841cabdff1aSopenharmony_ci        vrshrn.u16      d16, q8,  #3
1842cabdff1aSopenharmony_ci        vmull.u8        q10, d16, d2
1843cabdff1aSopenharmony_ci        vtrn.32         d22, d16
1844cabdff1aSopenharmony_ci        vmlal.u8        q10, d22, d3
1845cabdff1aSopenharmony_ci        vrev64.32       d22, d16
1846cabdff1aSopenharmony_ci        vrshrn.u16      d20, q10, #3
1847cabdff1aSopenharmony_ci        vst1.32         {d20[0]}, [r0,:32], r1
1848cabdff1aSopenharmony_ci        vst1.32         {d20[1]}, [r0,:32], r1
1849cabdff1aSopenharmony_ci        bgt             1b
1850cabdff1aSopenharmony_ci
1851cabdff1aSopenharmony_ci        bx              lr
1852cabdff1aSopenharmony_ciendfunc
1853