1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
3cabdff1aSopenharmony_ci * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
4cabdff1aSopenharmony_ci *
5cabdff1aSopenharmony_ci * This file is part of FFmpeg.
6cabdff1aSopenharmony_ci *
7cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
8cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
9cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
10cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
11cabdff1aSopenharmony_ci *
12cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
13cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
14cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15cabdff1aSopenharmony_ci * Lesser General Public License for more details.
16cabdff1aSopenharmony_ci *
17cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
18cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
19cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20cabdff1aSopenharmony_ci */
21cabdff1aSopenharmony_ci
22cabdff1aSopenharmony_ci#include "libavutil/arm/asm.S"
23cabdff1aSopenharmony_ci#include "neon.S"
24cabdff1aSopenharmony_ci
25cabdff1aSopenharmony_ci.macro  qpel_lowpass    r0,  r1,  rc1, rc2, shift
26cabdff1aSopenharmony_ci        vext.8          d25, \r0, \r1, #1       @ src[-1]
27cabdff1aSopenharmony_ci        vext.8          d26, \r0, \r1, #4       @ src[ 2]
28cabdff1aSopenharmony_ci        vext.8          d24, \r0, \r1, #5       @ src[ 3]
29cabdff1aSopenharmony_ci        vaddl.u8        q9,  d25, d26
30cabdff1aSopenharmony_ci        vaddl.u8        q8,  \r0, d24
31cabdff1aSopenharmony_ci        vext.8          d27, \r0, \r1, #2       @ src[ 0]
32cabdff1aSopenharmony_ci        vshl.s16        q12, q9,  #2
33cabdff1aSopenharmony_ci        vsub.s16        q8,  q8,  q9
34cabdff1aSopenharmony_ci        vext.8          d28, \r0, \r1, #3       @ src[ 1]
35cabdff1aSopenharmony_ci        vsub.s16        q8,  q8,  q12
36cabdff1aSopenharmony_ci        vmlal.u8        q8,  d27, \rc1
37cabdff1aSopenharmony_ci        vmlal.u8        q8,  d28, \rc2
38cabdff1aSopenharmony_ci        vqrshrun.s16    \r0, q8,  #\shift
39cabdff1aSopenharmony_ci.endm
40cabdff1aSopenharmony_ci
41cabdff1aSopenharmony_ci.macro  qpel_lowpass_x2 r0,  r1,  r2,  r3,  rc1, rc2, shift
42cabdff1aSopenharmony_ci        vext.8          d25, \r0, \r1, #1       @ src[-1]
43cabdff1aSopenharmony_ci        vext.8          d26, \r0, \r1, #4       @ src[ 2]
44cabdff1aSopenharmony_ci        vext.8          d24, \r0, \r1, #5       @ src[ 3]
45cabdff1aSopenharmony_ci        vaddl.u8        q9,  d25, d26
46cabdff1aSopenharmony_ci        vaddl.u8        q8,  \r0, d24
47cabdff1aSopenharmony_ci        vext.8          d29, \r0, \r1, #2       @ src[ 0]
48cabdff1aSopenharmony_ci        vext.8          d28, \r0, \r1, #3       @ src[ 1]
49cabdff1aSopenharmony_ci        vshl.s16        q10, q9,  #2
50cabdff1aSopenharmony_ci        vext.8          \r1, \r2, \r3, #1       @ src[-1]
51cabdff1aSopenharmony_ci        vsub.s16        q8,  q8,  q9
52cabdff1aSopenharmony_ci        vext.8          d22, \r2, \r3, #4       @ src[ 2]
53cabdff1aSopenharmony_ci        vext.8          \r0, \r2, \r3, #5       @ src[ 3]
54cabdff1aSopenharmony_ci        vaddl.u8        q13, \r1, d22
55cabdff1aSopenharmony_ci        vaddl.u8        q12, \r2, \r0
56cabdff1aSopenharmony_ci        vsub.s16        q8,  q8,  q10
57cabdff1aSopenharmony_ci        vshl.s16        q9,  q13, #2
58cabdff1aSopenharmony_ci        vsub.s16        q12, q12, q13
59cabdff1aSopenharmony_ci        vmlal.u8        q8,  d29, \rc1
60cabdff1aSopenharmony_ci        vmlal.u8        q8,  d28, \rc2
61cabdff1aSopenharmony_ci        vsub.s16        q12, q12, q9
62cabdff1aSopenharmony_ci        vext.8          d26, \r2, \r3, #2       @ src[ 0]
63cabdff1aSopenharmony_ci        vext.8          d27, \r2, \r3, #3       @ src[ 1]
64cabdff1aSopenharmony_ci        vmlal.u8        q12, d26, \rc1
65cabdff1aSopenharmony_ci        vmlal.u8        q12, d27, \rc2
66cabdff1aSopenharmony_ci        vqrshrun.s16    \r0, q8,  #\shift
67cabdff1aSopenharmony_ci        vqrshrun.s16    \r2, q12, #\shift
68cabdff1aSopenharmony_ci.endm
69cabdff1aSopenharmony_ci
70cabdff1aSopenharmony_ci.macro  rv40_qpel8_h    shift
71cabdff1aSopenharmony_cifunction put_rv40_qpel8_h_lp_packed_s\shift\()_neon
72cabdff1aSopenharmony_ci1:
73cabdff1aSopenharmony_ci        vld1.8          {q2},     [r1], r2
74cabdff1aSopenharmony_ci        vld1.8          {q3},     [r1], r2
75cabdff1aSopenharmony_ci        qpel_lowpass_x2 d4,  d5,  d6,  d7,  d0,  d1,  \shift
76cabdff1aSopenharmony_ci        vst1.8          {d4},     [r12,:64]!
77cabdff1aSopenharmony_ci        vst1.8          {d6},     [r12,:64]!
78cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
79cabdff1aSopenharmony_ci        bgt             1b
80cabdff1aSopenharmony_ci        vld1.8          {q2},     [r1]
81cabdff1aSopenharmony_ci        qpel_lowpass    d4,  d5,  d0,  d1,  \shift
82cabdff1aSopenharmony_ci        vst1.8          {d4},     [r12,:64]!
83cabdff1aSopenharmony_ci        bx              lr
84cabdff1aSopenharmony_ciendfunc
85cabdff1aSopenharmony_ci.endm
86cabdff1aSopenharmony_ci
87cabdff1aSopenharmony_ci.macro  rv40_qpel8_v    shift, type
88cabdff1aSopenharmony_cifunction \type\()_rv40_qpel8_v_lp_packed_s\shift\()_neon
89cabdff1aSopenharmony_ci        vld1.64         {d2},     [r1,:64]!
90cabdff1aSopenharmony_ci        vld1.64         {d3},     [r1,:64]!
91cabdff1aSopenharmony_ci        vld1.64         {d4},     [r1,:64]!
92cabdff1aSopenharmony_ci        vld1.64         {d5},     [r1,:64]!
93cabdff1aSopenharmony_ci        vld1.64         {d6},     [r1,:64]!
94cabdff1aSopenharmony_ci        vld1.64         {d7},     [r1,:64]!
95cabdff1aSopenharmony_ci        vld1.64         {d8},     [r1,:64]!
96cabdff1aSopenharmony_ci        vld1.64         {d9},     [r1,:64]!
97cabdff1aSopenharmony_ci        vld1.64         {d10},    [r1,:64]!
98cabdff1aSopenharmony_ci        vld1.64         {d11},    [r1,:64]!
99cabdff1aSopenharmony_ci        vld1.64         {d12},    [r1,:64]!
100cabdff1aSopenharmony_ci        vld1.64         {d13},    [r1,:64]!
101cabdff1aSopenharmony_ci        vld1.64         {d14},    [r1,:64]!
102cabdff1aSopenharmony_ci        transpose_8x8   d2,  d3,  d4,  d5,  d6,  d7,  d8,  d9
103cabdff1aSopenharmony_ci        transpose_8x8   d10, d11, d12, d13, d14, d15, d30, d31
104cabdff1aSopenharmony_ci        qpel_lowpass_x2 d2,  d10, d3,  d11, d0,  d1,  \shift
105cabdff1aSopenharmony_ci        qpel_lowpass_x2 d4,  d12, d5,  d13, d0,  d1,  \shift
106cabdff1aSopenharmony_ci        qpel_lowpass_x2 d6,  d14, d7,  d15, d0,  d1,  \shift
107cabdff1aSopenharmony_ci        qpel_lowpass_x2 d8,  d30, d9,  d31, d0,  d1,  \shift
108cabdff1aSopenharmony_ci        transpose_8x8   d2,  d3,  d4,  d5,  d6,  d7,  d8,  d9
109cabdff1aSopenharmony_ci  .ifc \type,avg
110cabdff1aSopenharmony_ci        vld1.64         d12,      [r0,:64], r2
111cabdff1aSopenharmony_ci        vld1.64         d13,      [r0,:64], r2
112cabdff1aSopenharmony_ci        vld1.64         d14,      [r0,:64], r2
113cabdff1aSopenharmony_ci        vld1.64         d15,      [r0,:64], r2
114cabdff1aSopenharmony_ci        vld1.64         d16,      [r0,:64], r2
115cabdff1aSopenharmony_ci        vld1.64         d17,      [r0,:64], r2
116cabdff1aSopenharmony_ci        vld1.64         d18,      [r0,:64], r2
117cabdff1aSopenharmony_ci        vld1.64         d19,      [r0,:64], r2
118cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #3
119cabdff1aSopenharmony_ci        vrhadd.u8       q1,  q1,  q6
120cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q7
121cabdff1aSopenharmony_ci        vrhadd.u8       q3,  q3,  q8
122cabdff1aSopenharmony_ci        vrhadd.u8       q4,  q4,  q9
123cabdff1aSopenharmony_ci  .endif
124cabdff1aSopenharmony_ci        vst1.64         d2,       [r0,:64], r2
125cabdff1aSopenharmony_ci        vst1.64         d3,       [r0,:64], r2
126cabdff1aSopenharmony_ci        vst1.64         d4,       [r0,:64], r2
127cabdff1aSopenharmony_ci        vst1.64         d5,       [r0,:64], r2
128cabdff1aSopenharmony_ci        vst1.64         d6,       [r0,:64], r2
129cabdff1aSopenharmony_ci        vst1.64         d7,       [r0,:64], r2
130cabdff1aSopenharmony_ci        vst1.64         d8,       [r0,:64], r2
131cabdff1aSopenharmony_ci        vst1.64         d9,       [r0,:64], r2
132cabdff1aSopenharmony_ci        bx              lr
133cabdff1aSopenharmony_ciendfunc
134cabdff1aSopenharmony_ci.endm
135cabdff1aSopenharmony_ci
136cabdff1aSopenharmony_ci        rv40_qpel8_h    5
137cabdff1aSopenharmony_ci        rv40_qpel8_h    6
138cabdff1aSopenharmony_ci
139cabdff1aSopenharmony_ci.macro  rv40_qpel       type
140cabdff1aSopenharmony_cifunction \type\()_rv40_qpel8_h_lowpass_neon
141cabdff1aSopenharmony_ci  .ifc \type,avg
142cabdff1aSopenharmony_ci        mov             r12, r0
143cabdff1aSopenharmony_ci  .endif
144cabdff1aSopenharmony_ci1:
145cabdff1aSopenharmony_ci        vld1.8          {q2},     [r1], r2
146cabdff1aSopenharmony_ci        vld1.8          {q3},     [r1], r2
147cabdff1aSopenharmony_ci        qpel_lowpass_x2 d4,  d5,  d6,  d7,  d0,  d1,  6
148cabdff1aSopenharmony_ci  .ifc \type,avg
149cabdff1aSopenharmony_ci        vld1.8          {d3},     [r12,:64], r2
150cabdff1aSopenharmony_ci        vld1.8          {d16},    [r12,:64], r2
151cabdff1aSopenharmony_ci        vrhadd.u8       d4,  d4,  d3
152cabdff1aSopenharmony_ci        vrhadd.u8       d6,  d6,  d16
153cabdff1aSopenharmony_ci  .endif
154cabdff1aSopenharmony_ci        vst1.8          {d4},     [r0,:64], r2
155cabdff1aSopenharmony_ci        vst1.8          {d6},     [r0,:64], r2
156cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
157cabdff1aSopenharmony_ci        bgt             1b
158cabdff1aSopenharmony_ci        bx              lr
159cabdff1aSopenharmony_ciendfunc
160cabdff1aSopenharmony_ci
161cabdff1aSopenharmony_cifunction \type\()_rv40_qpel8_v_lowpass_neon
162cabdff1aSopenharmony_ci        vld1.64         {d2},     [r1], r2
163cabdff1aSopenharmony_ci        vld1.64         {d3},     [r1], r2
164cabdff1aSopenharmony_ci        vld1.64         {d4},     [r1], r2
165cabdff1aSopenharmony_ci        vld1.64         {d5},     [r1], r2
166cabdff1aSopenharmony_ci        vld1.64         {d6},     [r1], r2
167cabdff1aSopenharmony_ci        vld1.64         {d7},     [r1], r2
168cabdff1aSopenharmony_ci        vld1.64         {d8},     [r1], r2
169cabdff1aSopenharmony_ci        vld1.64         {d9},     [r1], r2
170cabdff1aSopenharmony_ci        vld1.64         {d10},    [r1], r2
171cabdff1aSopenharmony_ci        vld1.64         {d11},    [r1], r2
172cabdff1aSopenharmony_ci        vld1.64         {d12},    [r1], r2
173cabdff1aSopenharmony_ci        vld1.64         {d13},    [r1], r2
174cabdff1aSopenharmony_ci        vld1.64         {d14},    [r1]
175cabdff1aSopenharmony_ci        transpose_8x8   d2,  d3,  d4,  d5,  d6,  d7,  d8,  d9
176cabdff1aSopenharmony_ci        transpose_8x8   d10, d11, d12, d13, d14, d15, d30, d31
177cabdff1aSopenharmony_ci        qpel_lowpass_x2 d2,  d10, d3,  d11, d0,  d1,  6
178cabdff1aSopenharmony_ci        qpel_lowpass_x2 d4,  d12, d5,  d13, d0,  d1,  6
179cabdff1aSopenharmony_ci        qpel_lowpass_x2 d6,  d14, d7,  d15, d0,  d1,  6
180cabdff1aSopenharmony_ci        qpel_lowpass_x2 d8,  d30, d9,  d31, d0,  d1,  6
181cabdff1aSopenharmony_ci        transpose_8x8   d2,  d3,  d4,  d5,  d6,  d7,  d8,  d9
182cabdff1aSopenharmony_ci  .ifc \type,avg
183cabdff1aSopenharmony_ci        vld1.64         d12,      [r0,:64], r2
184cabdff1aSopenharmony_ci        vld1.64         d13,      [r0,:64], r2
185cabdff1aSopenharmony_ci        vld1.64         d14,      [r0,:64], r2
186cabdff1aSopenharmony_ci        vld1.64         d15,      [r0,:64], r2
187cabdff1aSopenharmony_ci        vld1.64         d16,      [r0,:64], r2
188cabdff1aSopenharmony_ci        vld1.64         d17,      [r0,:64], r2
189cabdff1aSopenharmony_ci        vld1.64         d18,      [r0,:64], r2
190cabdff1aSopenharmony_ci        vld1.64         d19,      [r0,:64], r2
191cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #3
192cabdff1aSopenharmony_ci        vrhadd.u8       q1,  q1,  q6
193cabdff1aSopenharmony_ci        vrhadd.u8       q2,  q2,  q7
194cabdff1aSopenharmony_ci        vrhadd.u8       q3,  q3,  q8
195cabdff1aSopenharmony_ci        vrhadd.u8       q4,  q4,  q9
196cabdff1aSopenharmony_ci  .endif
197cabdff1aSopenharmony_ci        vst1.64         d2,       [r0,:64], r2
198cabdff1aSopenharmony_ci        vst1.64         d3,       [r0,:64], r2
199cabdff1aSopenharmony_ci        vst1.64         d4,       [r0,:64], r2
200cabdff1aSopenharmony_ci        vst1.64         d5,       [r0,:64], r2
201cabdff1aSopenharmony_ci        vst1.64         d6,       [r0,:64], r2
202cabdff1aSopenharmony_ci        vst1.64         d7,       [r0,:64], r2
203cabdff1aSopenharmony_ci        vst1.64         d8,       [r0,:64], r2
204cabdff1aSopenharmony_ci        vst1.64         d9,       [r0,:64], r2
205cabdff1aSopenharmony_ci        bx              lr
206cabdff1aSopenharmony_ciendfunc
207cabdff1aSopenharmony_ci
208cabdff1aSopenharmony_ci        rv40_qpel8_v    5, \type
209cabdff1aSopenharmony_ci        rv40_qpel8_v    6, \type
210cabdff1aSopenharmony_ci
211cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc10_neon, export=1
212cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
213cabdff1aSopenharmony_ci        mov             r3,  #8
214cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
215cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
216cabdff1aSopenharmony_ci        b               \type\()_rv40_qpel8_h_lowpass_neon
217cabdff1aSopenharmony_ciendfunc
218cabdff1aSopenharmony_ci
219cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc30_neon, export=1
220cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
221cabdff1aSopenharmony_ci        mov             r3,  #8
222cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
223cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
224cabdff1aSopenharmony_ci        b               \type\()_rv40_qpel8_h_lowpass_neon
225cabdff1aSopenharmony_ciendfunc
226cabdff1aSopenharmony_ci
227cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc01_neon, export=1
228cabdff1aSopenharmony_ci        push            {r4, lr}
229cabdff1aSopenharmony_ci        vpush           {d8-d15}
230cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
231cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
232cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
233cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lowpass_neon
234cabdff1aSopenharmony_ci        vpop            {d8-d15}
235cabdff1aSopenharmony_ci        pop             {r4, pc}
236cabdff1aSopenharmony_ciendfunc
237cabdff1aSopenharmony_ci
238cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc11_neon, export=1
239cabdff1aSopenharmony_ci        push            {r4, lr}
240cabdff1aSopenharmony_ci        vpush           {d8-d15}
241cabdff1aSopenharmony_ci        sub             sp,  sp,  #14*8
242cabdff1aSopenharmony_ci        add             r12, sp,  #7
243cabdff1aSopenharmony_ci        bic             r12, r12, #7
244cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
245cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
246cabdff1aSopenharmony_ci        mov             r3,  #12
247cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
248cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
249cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
250cabdff1aSopenharmony_ci        add             r1,  sp,  #7
251cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
252cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
253cabdff1aSopenharmony_ci        add             sp,  sp,  #14*8
254cabdff1aSopenharmony_ci        vpop            {d8-d15}
255cabdff1aSopenharmony_ci        pop             {r4, pc}
256cabdff1aSopenharmony_ciendfunc
257cabdff1aSopenharmony_ci
258cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc21_neon, export=1
259cabdff1aSopenharmony_ci        push            {r4, lr}
260cabdff1aSopenharmony_ci        vpush           {d8-d15}
261cabdff1aSopenharmony_ci        sub             sp,  sp,  #14*8
262cabdff1aSopenharmony_ci        add             r12, sp,  #7
263cabdff1aSopenharmony_ci        bic             r12, r12, #7
264cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
265cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
266cabdff1aSopenharmony_ci        mov             r3,  #12
267cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
268cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
269cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
270cabdff1aSopenharmony_ci        add             r1,  sp,  #7
271cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
272cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
273cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
274cabdff1aSopenharmony_ci        add             sp,  sp,  #14*8
275cabdff1aSopenharmony_ci        vpop            {d8-d15}
276cabdff1aSopenharmony_ci        pop             {r4, pc}
277cabdff1aSopenharmony_ciendfunc
278cabdff1aSopenharmony_ci
279cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc31_neon, export=1
280cabdff1aSopenharmony_ci        push            {r4, lr}
281cabdff1aSopenharmony_ci        vpush           {d8-d15}
282cabdff1aSopenharmony_ci        sub             sp,  sp,  #14*8
283cabdff1aSopenharmony_ci        add             r12, sp,  #7
284cabdff1aSopenharmony_ci        bic             r12, r12, #7
285cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
286cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
287cabdff1aSopenharmony_ci        mov             r3,  #12
288cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
289cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
290cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
291cabdff1aSopenharmony_ci        add             r1,  sp,  #7
292cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
293cabdff1aSopenharmony_ci        vswp            d0,  d1
294cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
295cabdff1aSopenharmony_ci        add             sp,  sp,  #14*8
296cabdff1aSopenharmony_ci        vpop            {d8-d15}
297cabdff1aSopenharmony_ci        pop             {r4, pc}
298cabdff1aSopenharmony_ciendfunc
299cabdff1aSopenharmony_ci
300cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc12_neon, export=1
301cabdff1aSopenharmony_ci        push            {r4, lr}
302cabdff1aSopenharmony_ci        vpush           {d8-d15}
303cabdff1aSopenharmony_ci        sub             sp,  sp,  #14*8
304cabdff1aSopenharmony_ci        add             r12, sp,  #7
305cabdff1aSopenharmony_ci        bic             r12, r12, #7
306cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
307cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
308cabdff1aSopenharmony_ci        mov             r3,  #12
309cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
310cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
311cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
312cabdff1aSopenharmony_ci        add             r1,  sp,  #7
313cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
314cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
315cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s5_neon
316cabdff1aSopenharmony_ci        add             sp,  sp,  #14*8
317cabdff1aSopenharmony_ci        vpop            {d8-d15}
318cabdff1aSopenharmony_ci        pop             {r4, pc}
319cabdff1aSopenharmony_ciendfunc
320cabdff1aSopenharmony_ci
321cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc22_neon, export=1
322cabdff1aSopenharmony_ci        push            {r4, lr}
323cabdff1aSopenharmony_ci        vpush           {d8-d15}
324cabdff1aSopenharmony_ci        sub             sp,  sp,  #14*8
325cabdff1aSopenharmony_ci        add             r12, sp,  #7
326cabdff1aSopenharmony_ci        bic             r12, r12, #7
327cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
328cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
329cabdff1aSopenharmony_ci        mov             r3,  #12
330cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
331cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
332cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
333cabdff1aSopenharmony_ci        add             r1,  sp,  #7
334cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
335cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s5_neon
336cabdff1aSopenharmony_ci        add             sp,  sp,  #14*8
337cabdff1aSopenharmony_ci        vpop            {d8-d15}
338cabdff1aSopenharmony_ci        pop             {r4, pc}
339cabdff1aSopenharmony_ciendfunc
340cabdff1aSopenharmony_ci
341cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc32_neon, export=1
342cabdff1aSopenharmony_ci        push            {r4, lr}
343cabdff1aSopenharmony_ci        vpush           {d8-d15}
344cabdff1aSopenharmony_ci        sub             sp,  sp,  #14*8
345cabdff1aSopenharmony_ci        add             r12, sp,  #7
346cabdff1aSopenharmony_ci        bic             r12, r12, #7
347cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
348cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
349cabdff1aSopenharmony_ci        mov             r3,  #12
350cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
351cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
352cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
353cabdff1aSopenharmony_ci        add             r1,  sp,  #7
354cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
355cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
356cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s5_neon
357cabdff1aSopenharmony_ci        add             sp,  sp,  #14*8
358cabdff1aSopenharmony_ci        vpop            {d8-d15}
359cabdff1aSopenharmony_ci        pop             {r4, pc}
360cabdff1aSopenharmony_ciendfunc
361cabdff1aSopenharmony_ci
362cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc03_neon, export=1
363cabdff1aSopenharmony_ci        push            {r4, lr}
364cabdff1aSopenharmony_ci        vpush           {d8-d15}
365cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
366cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
367cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
368cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lowpass_neon
369cabdff1aSopenharmony_ci        vpop            {d8-d15}
370cabdff1aSopenharmony_ci        pop             {r4, pc}
371cabdff1aSopenharmony_ciendfunc
372cabdff1aSopenharmony_ci
373cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc33_neon, export=1
374cabdff1aSopenharmony_ci        mov             r3,  #8
375cabdff1aSopenharmony_ci        b               X(ff_\type\()_pixels8_xy2_neon)
376cabdff1aSopenharmony_ciendfunc
377cabdff1aSopenharmony_ci
378cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc13_neon, export=1
379cabdff1aSopenharmony_ci        push            {r4, lr}
380cabdff1aSopenharmony_ci        vpush           {d8-d15}
381cabdff1aSopenharmony_ci        sub             sp,  sp,  #14*8
382cabdff1aSopenharmony_ci        add             r12, sp,  #7
383cabdff1aSopenharmony_ci        bic             r12, r12, #7
384cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
385cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
386cabdff1aSopenharmony_ci        mov             r3,  #12
387cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
388cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
389cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
390cabdff1aSopenharmony_ci        add             r1,  sp,  #7
391cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
392cabdff1aSopenharmony_ci        vswp            d0,  d1
393cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
394cabdff1aSopenharmony_ci        add             sp,  sp,  #14*8
395cabdff1aSopenharmony_ci        vpop            {d8-d15}
396cabdff1aSopenharmony_ci        pop             {r4, pc}
397cabdff1aSopenharmony_ciendfunc
398cabdff1aSopenharmony_ci
399cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel8_mc23_neon, export=1
400cabdff1aSopenharmony_ci        push            {r4, lr}
401cabdff1aSopenharmony_ci        vpush           {d8-d15}
402cabdff1aSopenharmony_ci        sub             sp,  sp,  #14*8
403cabdff1aSopenharmony_ci        add             r12, sp,  #7
404cabdff1aSopenharmony_ci        bic             r12, r12, #7
405cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
406cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
407cabdff1aSopenharmony_ci        mov             r3,  #12
408cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
409cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
410cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
411cabdff1aSopenharmony_ci        add             r1,  sp,  #7
412cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
413cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
414cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
415cabdff1aSopenharmony_ci        add             sp,  sp,  #14*8
416cabdff1aSopenharmony_ci        vpop            {d8-d15}
417cabdff1aSopenharmony_ci        pop             {r4, pc}
418cabdff1aSopenharmony_ciendfunc
419cabdff1aSopenharmony_ci
420cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc10_neon, export=1
421cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
422cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
423cabdff1aSopenharmony_ci.L\type\()_rv40_qpel16_h:
424cabdff1aSopenharmony_ci        push            {r1, lr}
425cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
426cabdff1aSopenharmony_ci        mov             r3,  #16
427cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_h_lowpass_neon
428cabdff1aSopenharmony_ci        pop             {r1, lr}
429cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #4
430cabdff1aSopenharmony_ci        add             r0,  r0,  #8
431cabdff1aSopenharmony_ci        add             r1,  r1,  #6
432cabdff1aSopenharmony_ci        mov             r3,  #16
433cabdff1aSopenharmony_ci        b               \type\()_rv40_qpel8_h_lowpass_neon
434cabdff1aSopenharmony_ciendfunc
435cabdff1aSopenharmony_ci
436cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc30_neon, export=1
437cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
438cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
439cabdff1aSopenharmony_ci        b               .L\type\()_rv40_qpel16_h
440cabdff1aSopenharmony_ciendfunc
441cabdff1aSopenharmony_ci
442cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc01_neon, export=1
443cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
444cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
445cabdff1aSopenharmony_ci.L\type\()_rv40_qpel16_v:
446cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
447cabdff1aSopenharmony_ci        push            {r1, lr}
448cabdff1aSopenharmony_ci        vpush           {d8-d15}
449cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lowpass_neon
450cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #2
451cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lowpass_neon
452cabdff1aSopenharmony_ci        ldr             r1,  [sp, #64]
453cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #4
454cabdff1aSopenharmony_ci        add             r0,  r0,  #8
455cabdff1aSopenharmony_ci        add             r1,  r1,  #8
456cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lowpass_neon
457cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #2
458cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lowpass_neon
459cabdff1aSopenharmony_ci        vpop            {d8-d15}
460cabdff1aSopenharmony_ci        pop             {r1, pc}
461cabdff1aSopenharmony_ciendfunc
462cabdff1aSopenharmony_ci
463cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc11_neon, export=1
464cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
465cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
466cabdff1aSopenharmony_ci        push            {r1, lr}
467cabdff1aSopenharmony_ci        vpush           {d8-d15}
468cabdff1aSopenharmony_ci        sub             sp,  sp,  #44*8
469cabdff1aSopenharmony_ci        add             r12, sp,  #7
470cabdff1aSopenharmony_ci        bic             r12, r12, #7
471cabdff1aSopenharmony_ci        mov             r3,  #20
472cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
473cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
474cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
475cabdff1aSopenharmony_ci        ldr             r1,  [sp, #416]
476cabdff1aSopenharmony_ci        add             r1,  r1,  #8
477cabdff1aSopenharmony_ci        mov             r3,  #20
478cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
479cabdff1aSopenharmony_ci.L\type\()_rv40_qpel16_v_s6:
480cabdff1aSopenharmony_ci        add             r1,  sp,  #7
481cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
482cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
483cabdff1aSopenharmony_ci        sub             r1,  r1,  #40
484cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
485cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #4
486cabdff1aSopenharmony_ci        add             r0,  r0,  #8
487cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
488cabdff1aSopenharmony_ci        sub             r1,  r1,  #40
489cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s6_neon
490cabdff1aSopenharmony_ci        add             sp,  sp,  #44*8
491cabdff1aSopenharmony_ci        vpop            {d8-d15}
492cabdff1aSopenharmony_ci        pop             {r1, pc}
493cabdff1aSopenharmony_ciendfunc
494cabdff1aSopenharmony_ci
495cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc21_neon, export=1
496cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
497cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
498cabdff1aSopenharmony_ci        push            {r1, lr}
499cabdff1aSopenharmony_ci        vpush           {d8-d15}
500cabdff1aSopenharmony_ci        sub             sp,  sp,  #44*8
501cabdff1aSopenharmony_ci        add             r12, sp,  #7
502cabdff1aSopenharmony_ci        bic             r12, r12, #7
503cabdff1aSopenharmony_ci        mov             r3,  #20
504cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
505cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
506cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
507cabdff1aSopenharmony_ci        ldr             r1,  [sp, #416]
508cabdff1aSopenharmony_ci        add             r1,  r1,  #8
509cabdff1aSopenharmony_ci        mov             r3,  #20
510cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
511cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
512cabdff1aSopenharmony_ci        b               .L\type\()_rv40_qpel16_v_s6
513cabdff1aSopenharmony_ciendfunc
514cabdff1aSopenharmony_ci
515cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc31_neon, export=1
516cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
517cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
518cabdff1aSopenharmony_ci        push            {r1, lr}
519cabdff1aSopenharmony_ci        vpush           {d8-d15}
520cabdff1aSopenharmony_ci        sub             sp,  sp,  #44*8
521cabdff1aSopenharmony_ci        add             r12, sp,  #7
522cabdff1aSopenharmony_ci        bic             r12, r12, #7
523cabdff1aSopenharmony_ci        mov             r3,  #20
524cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
525cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
526cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
527cabdff1aSopenharmony_ci        ldr             r1,  [sp, #416]
528cabdff1aSopenharmony_ci        add             r1,  r1,  #8
529cabdff1aSopenharmony_ci        mov             r3,  #20
530cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
531cabdff1aSopenharmony_ci        vswp            d0,  d1
532cabdff1aSopenharmony_ci        b               .L\type\()_rv40_qpel16_v_s6
533cabdff1aSopenharmony_ciendfunc
534cabdff1aSopenharmony_ci
535cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc12_neon, export=1
536cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
537cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
538cabdff1aSopenharmony_ci        push            {r1, lr}
539cabdff1aSopenharmony_ci        vpush           {d8-d15}
540cabdff1aSopenharmony_ci        sub             sp,  sp,  #44*8
541cabdff1aSopenharmony_ci        add             r12, sp,  #7
542cabdff1aSopenharmony_ci        bic             r12, r12, #7
543cabdff1aSopenharmony_ci        mov             r3,  #20
544cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
545cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
546cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
547cabdff1aSopenharmony_ci        ldr             r1,  [sp, #416]
548cabdff1aSopenharmony_ci        add             r1,  r1,  #8
549cabdff1aSopenharmony_ci        mov             r3,  #20
550cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
551cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
552cabdff1aSopenharmony_ci.L\type\()_rv40_qpel16_v_s5:
553cabdff1aSopenharmony_ci        add             r1,  sp,  #7
554cabdff1aSopenharmony_ci        bic             r1,  r1,  #7
555cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s5_neon
556cabdff1aSopenharmony_ci        sub             r1,  r1,  #40
557cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s5_neon
558cabdff1aSopenharmony_ci        sub             r0,  r0,  r2,  lsl #4
559cabdff1aSopenharmony_ci        add             r0,  r0,  #8
560cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s5_neon
561cabdff1aSopenharmony_ci        sub             r1,  r1,  #40
562cabdff1aSopenharmony_ci        bl              \type\()_rv40_qpel8_v_lp_packed_s5_neon
563cabdff1aSopenharmony_ci        add             sp,  sp,  #44*8
564cabdff1aSopenharmony_ci        vpop            {d8-d15}
565cabdff1aSopenharmony_ci        pop             {r1, pc}
566cabdff1aSopenharmony_ciendfunc
567cabdff1aSopenharmony_ci
568cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc22_neon, export=1
569cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
570cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
571cabdff1aSopenharmony_ci        push            {r1, lr}
572cabdff1aSopenharmony_ci        vpush           {d8-d15}
573cabdff1aSopenharmony_ci        sub             sp,  sp,  #44*8
574cabdff1aSopenharmony_ci        add             r12, sp,  #7
575cabdff1aSopenharmony_ci        bic             r12, r12, #7
576cabdff1aSopenharmony_ci        mov             r3,  #20
577cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
578cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
579cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
580cabdff1aSopenharmony_ci        ldr             r1,  [sp, #416]
581cabdff1aSopenharmony_ci        add             r1,  r1,  #8
582cabdff1aSopenharmony_ci        mov             r3,  #20
583cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
584cabdff1aSopenharmony_ci        b               .L\type\()_rv40_qpel16_v_s5
585cabdff1aSopenharmony_ciendfunc
586cabdff1aSopenharmony_ci
587cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc32_neon, export=1
588cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
589cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
590cabdff1aSopenharmony_ci        push            {r1, lr}
591cabdff1aSopenharmony_ci        vpush           {d8-d15}
592cabdff1aSopenharmony_ci        sub             sp,  sp,  #44*8
593cabdff1aSopenharmony_ci        add             r12, sp,  #7
594cabdff1aSopenharmony_ci        bic             r12, r12, #7
595cabdff1aSopenharmony_ci        mov             r3,  #20
596cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
597cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
598cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
599cabdff1aSopenharmony_ci        ldr             r1,  [sp, #416]
600cabdff1aSopenharmony_ci        add             r1,  r1,  #8
601cabdff1aSopenharmony_ci        mov             r3,  #20
602cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
603cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
604cabdff1aSopenharmony_ci        b               .L\type\()_rv40_qpel16_v_s5
605cabdff1aSopenharmony_ciendfunc
606cabdff1aSopenharmony_ci
607cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc03_neon, export=1
608cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
609cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
610cabdff1aSopenharmony_ci        b               .L\type\()_rv40_qpel16_v
611cabdff1aSopenharmony_ciendfunc
612cabdff1aSopenharmony_ci
613cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc13_neon, export=1
614cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
615cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
616cabdff1aSopenharmony_ci        push            {r1, lr}
617cabdff1aSopenharmony_ci        vpush           {d8-d15}
618cabdff1aSopenharmony_ci        sub             sp,  sp,  #44*8
619cabdff1aSopenharmony_ci        add             r12, sp,  #7
620cabdff1aSopenharmony_ci        bic             r12, r12, #7
621cabdff1aSopenharmony_ci        mov             r3,  #20
622cabdff1aSopenharmony_ci        vmov.i8         d0,  #52
623cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
624cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
625cabdff1aSopenharmony_ci        ldr             r1,  [sp, #416]
626cabdff1aSopenharmony_ci        add             r1,  r1,  #8
627cabdff1aSopenharmony_ci        mov             r3,  #20
628cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s6_neon
629cabdff1aSopenharmony_ci        vswp            d0,  d1
630cabdff1aSopenharmony_ci        b               .L\type\()_rv40_qpel16_v_s6
631cabdff1aSopenharmony_ciendfunc
632cabdff1aSopenharmony_ci
633cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc23_neon, export=1
634cabdff1aSopenharmony_ci        sub             r1,  r1,  r2,  lsl #1
635cabdff1aSopenharmony_ci        sub             r1,  r1,  #2
636cabdff1aSopenharmony_ci        push            {r1, lr}
637cabdff1aSopenharmony_ci        vpush           {d8-d15}
638cabdff1aSopenharmony_ci        sub             sp,  sp,  #44*8
639cabdff1aSopenharmony_ci        add             r12, sp,  #7
640cabdff1aSopenharmony_ci        bic             r12, r12, #7
641cabdff1aSopenharmony_ci        mov             r3,  #20
642cabdff1aSopenharmony_ci        vmov.i8         d0,  #20
643cabdff1aSopenharmony_ci        vmov.i8         d1,  #20
644cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
645cabdff1aSopenharmony_ci        ldr             r1,  [sp, #416]
646cabdff1aSopenharmony_ci        add             r1,  r1,  #8
647cabdff1aSopenharmony_ci        mov             r3,  #20
648cabdff1aSopenharmony_ci        bl              put_rv40_qpel8_h_lp_packed_s5_neon
649cabdff1aSopenharmony_ci        vmov.i8         d1,  #52
650cabdff1aSopenharmony_ci        b               .L\type\()_rv40_qpel16_v_s6
651cabdff1aSopenharmony_ciendfunc
652cabdff1aSopenharmony_ci
653cabdff1aSopenharmony_cifunction ff_\type\()_rv40_qpel16_mc33_neon, export=1
654cabdff1aSopenharmony_ci        mov             r3,  #16
655cabdff1aSopenharmony_ci        b               X(ff_\type\()_pixels16_xy2_neon)
656cabdff1aSopenharmony_ciendfunc
657cabdff1aSopenharmony_ci.endm
658cabdff1aSopenharmony_ci
659cabdff1aSopenharmony_ci        rv40_qpel       put
660cabdff1aSopenharmony_ci        rv40_qpel       avg
661cabdff1aSopenharmony_ci
662cabdff1aSopenharmony_ci.macro  rv40_weight
663cabdff1aSopenharmony_ci        vmovl.u8        q8,  d2
664cabdff1aSopenharmony_ci        vmovl.u8        q9,  d3
665cabdff1aSopenharmony_ci        vmovl.u8        q10, d4
666cabdff1aSopenharmony_ci        vmovl.u8        q11, d5
667cabdff1aSopenharmony_ci        vmull.u16       q2,  d16, d0[2]
668cabdff1aSopenharmony_ci        vmull.u16       q3,  d17, d0[2]
669cabdff1aSopenharmony_ci        vmull.u16       q8,  d18, d0[2]
670cabdff1aSopenharmony_ci        vmull.u16       q9,  d19, d0[2]
671cabdff1aSopenharmony_ci        vmull.u16       q12, d20, d0[0]
672cabdff1aSopenharmony_ci        vmull.u16       q13, d21, d0[0]
673cabdff1aSopenharmony_ci        vmull.u16       q14, d22, d0[0]
674cabdff1aSopenharmony_ci        vmull.u16       q15, d23, d0[0]
675cabdff1aSopenharmony_ci        vshrn.i32       d4,  q2,  #9
676cabdff1aSopenharmony_ci        vshrn.i32       d5,  q3,  #9
677cabdff1aSopenharmony_ci        vshrn.i32       d6,  q8,  #9
678cabdff1aSopenharmony_ci        vshrn.i32       d7,  q9,  #9
679cabdff1aSopenharmony_ci        vshrn.i32       d16, q12, #9
680cabdff1aSopenharmony_ci        vshrn.i32       d17, q13, #9
681cabdff1aSopenharmony_ci        vshrn.i32       d18, q14, #9
682cabdff1aSopenharmony_ci        vshrn.i32       d19, q15, #9
683cabdff1aSopenharmony_ci        vadd.u16        q2,  q2,  q8
684cabdff1aSopenharmony_ci        vadd.u16        q3,  q3,  q9
685cabdff1aSopenharmony_ci        vrshrn.i16      d2,  q2,  #5
686cabdff1aSopenharmony_ci        vrshrn.i16      d3,  q3,  #5
687cabdff1aSopenharmony_ci.endm
688cabdff1aSopenharmony_ci
689cabdff1aSopenharmony_ci/* void ff_rv40_weight_func_16_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2,
690cabdff1aSopenharmony_ci                                    int w1, int w2, int stride) */
691cabdff1aSopenharmony_cifunction ff_rv40_weight_func_16_neon, export=1
692cabdff1aSopenharmony_ci        ldr             r12, [sp]
693cabdff1aSopenharmony_ci        vmov            d0,  r3,  r12
694cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]
695cabdff1aSopenharmony_ci        mov             r3,  #16
696cabdff1aSopenharmony_ci1:
697cabdff1aSopenharmony_ci        vld1.8          {q1},     [r1,:128], r12
698cabdff1aSopenharmony_ci        vld1.8          {q2},     [r2,:128], r12
699cabdff1aSopenharmony_ci        rv40_weight
700cabdff1aSopenharmony_ci        vst1.8          {q1},     [r0,:128], r12
701cabdff1aSopenharmony_ci        subs            r3,  r3,  #1
702cabdff1aSopenharmony_ci        bne             1b
703cabdff1aSopenharmony_ci        bx              lr
704cabdff1aSopenharmony_ciendfunc
705cabdff1aSopenharmony_ci
706cabdff1aSopenharmony_ci/* void ff_rv40_weight_func_8_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2,
707cabdff1aSopenharmony_ci                                   int w1, int w2, int stride) */
708cabdff1aSopenharmony_cifunction ff_rv40_weight_func_8_neon, export=1
709cabdff1aSopenharmony_ci        ldr             r12, [sp]
710cabdff1aSopenharmony_ci        vmov            d0,  r3,  r12
711cabdff1aSopenharmony_ci        ldr             r12, [sp, #4]
712cabdff1aSopenharmony_ci        mov             r3,  #8
713cabdff1aSopenharmony_ci1:
714cabdff1aSopenharmony_ci        vld1.8          {d2},     [r1,:64], r12
715cabdff1aSopenharmony_ci        vld1.8          {d3},     [r1,:64], r12
716cabdff1aSopenharmony_ci        vld1.8          {d4},     [r2,:64], r12
717cabdff1aSopenharmony_ci        vld1.8          {d5},     [r2,:64], r12
718cabdff1aSopenharmony_ci        rv40_weight
719cabdff1aSopenharmony_ci        vst1.8          {d2},     [r0,:64], r12
720cabdff1aSopenharmony_ci        vst1.8          {d3},     [r0,:64], r12
721cabdff1aSopenharmony_ci        subs            r3,  r3,  #2
722cabdff1aSopenharmony_ci        bne             1b
723cabdff1aSopenharmony_ci        bx              lr
724cabdff1aSopenharmony_ciendfunc
725cabdff1aSopenharmony_ci
726cabdff1aSopenharmony_cifunction ff_rv40_h_loop_filter_strength_neon, export=1
727cabdff1aSopenharmony_ci        pkhbt           r2,  r3,  r2,  lsl #18
728cabdff1aSopenharmony_ci
729cabdff1aSopenharmony_ci        ldr             r3,  [r0]
730cabdff1aSopenharmony_ci        ldr_dpre        r12, r0,  r1
731cabdff1aSopenharmony_ci        teq             r3,  r12
732cabdff1aSopenharmony_ci        beq             1f
733cabdff1aSopenharmony_ci
734cabdff1aSopenharmony_ci        sub             r0,  r0,  r1,  lsl #1
735cabdff1aSopenharmony_ci
736cabdff1aSopenharmony_ci        vld1.32         {d4[]},   [r0,:32], r1  @ -3
737cabdff1aSopenharmony_ci        vld1.32         {d0[]},   [r0,:32], r1  @ -2
738cabdff1aSopenharmony_ci        vld1.32         {d4[1]},  [r0,:32], r1  @ -1
739cabdff1aSopenharmony_ci        vld1.32         {d5[]},   [r0,:32], r1  @  0
740cabdff1aSopenharmony_ci        vld1.32         {d1[]},   [r0,:32], r1  @  1
741cabdff1aSopenharmony_ci        vld1.32         {d5[0]},  [r0,:32], r1  @  2
742cabdff1aSopenharmony_ci
743cabdff1aSopenharmony_ci        vpaddl.u8       q8,  q0                 @ -2, -2, -2, -2,  1,  1,  1,  1
744cabdff1aSopenharmony_ci        vpaddl.u8       q9,  q2                 @ -3, -3, -1, -1,  2,  2,  0,  0
745cabdff1aSopenharmony_ci        vdup.32         d30, r2                 @ beta2, beta << 2
746cabdff1aSopenharmony_ci        vpadd.u16       d16, d16, d17           @ -2, -2,  1,  1
747cabdff1aSopenharmony_ci        vpadd.u16       d18, d18, d19           @ -3, -1,  2,  0
748cabdff1aSopenharmony_ci        vabd.u16        d16, d18, d16
749cabdff1aSopenharmony_ci        vclt.u16        d16, d16, d30
750cabdff1aSopenharmony_ci
751cabdff1aSopenharmony_ci        ldrd            r2,  r3,  [sp, #4]
752cabdff1aSopenharmony_ci        vmovl.u16       q12, d16
753cabdff1aSopenharmony_ci        vtrn.16         d16, d17
754cabdff1aSopenharmony_ci        vshr.u32        q12, q12, #15
755cabdff1aSopenharmony_ci        ldr             r0,  [sp]
756cabdff1aSopenharmony_ci        vst1.32         {d24[1]}, [r2,:32]
757cabdff1aSopenharmony_ci        vst1.32         {d25[1]}, [r3,:32]
758cabdff1aSopenharmony_ci
759cabdff1aSopenharmony_ci        cmp             r0,  #0
760cabdff1aSopenharmony_ci        it              eq
761cabdff1aSopenharmony_ci        bxeq            lr
762cabdff1aSopenharmony_ci
763cabdff1aSopenharmony_ci        vand            d18, d16, d17
764cabdff1aSopenharmony_ci        vtrn.32         d18, d19
765cabdff1aSopenharmony_ci        vand            d18, d18, d19
766cabdff1aSopenharmony_ci        vmov.u16        r0,  d18[0]
767cabdff1aSopenharmony_ci        bx              lr
768cabdff1aSopenharmony_ci1:
769cabdff1aSopenharmony_ci        ldrd            r2,  r3,  [sp, #4]
770cabdff1aSopenharmony_ci        mov             r0,  #0
771cabdff1aSopenharmony_ci        str             r0,  [r2]
772cabdff1aSopenharmony_ci        str             r0,  [r3]
773cabdff1aSopenharmony_ci        bx              lr
774cabdff1aSopenharmony_ciendfunc
775cabdff1aSopenharmony_ci
776cabdff1aSopenharmony_cifunction ff_rv40_v_loop_filter_strength_neon, export=1
777cabdff1aSopenharmony_ci        sub             r0,  r0,  #3
778cabdff1aSopenharmony_ci        pkhbt           r2,  r3,  r2,  lsl #18
779cabdff1aSopenharmony_ci
780cabdff1aSopenharmony_ci        vld1.8          {d0},     [r0], r1
781cabdff1aSopenharmony_ci        vld1.8          {d1},     [r0], r1
782cabdff1aSopenharmony_ci        vld1.8          {d2},     [r0], r1
783cabdff1aSopenharmony_ci        vld1.8          {d3},     [r0], r1
784cabdff1aSopenharmony_ci
785cabdff1aSopenharmony_ci        vaddl.u8        q0,  d0,  d1
786cabdff1aSopenharmony_ci        vaddl.u8        q1,  d2,  d3
787cabdff1aSopenharmony_ci        vdup.32         q15, r2
788cabdff1aSopenharmony_ci        vadd.u16        q0,  q0,  q1            @ -3, -2, -1,  0,  1,  2
789cabdff1aSopenharmony_ci        vext.16         q1,  q0,  q0,  #1       @ -2, -1,  0,  1,  2
790cabdff1aSopenharmony_ci        vabd.u16        q0,  q1,  q0
791cabdff1aSopenharmony_ci        vclt.u16        q0,  q0,  q15
792cabdff1aSopenharmony_ci
793cabdff1aSopenharmony_ci        ldrd            r2,  r3,  [sp, #4]
794cabdff1aSopenharmony_ci        vmovl.u16       q1,  d0
795cabdff1aSopenharmony_ci        vext.16         d1,  d0,  d1,  #3
796cabdff1aSopenharmony_ci        vshr.u32        q1,  q1,  #15
797cabdff1aSopenharmony_ci        ldr             r0,  [sp]
798cabdff1aSopenharmony_ci        vst1.32         {d2[1]},  [r2,:32]
799cabdff1aSopenharmony_ci        vst1.32         {d3[1]},  [r3,:32]
800cabdff1aSopenharmony_ci
801cabdff1aSopenharmony_ci        cmp             r0,  #0
802cabdff1aSopenharmony_ci        it              eq
803cabdff1aSopenharmony_ci        bxeq            lr
804cabdff1aSopenharmony_ci
805cabdff1aSopenharmony_ci        vand            d0,  d0,  d1
806cabdff1aSopenharmony_ci        vtrn.16         d0,  d1
807cabdff1aSopenharmony_ci        vand            d0,  d0,  d1
808cabdff1aSopenharmony_ci        vmov.u16        r0,  d0[0]
809cabdff1aSopenharmony_ci        bx              lr
810cabdff1aSopenharmony_ciendfunc
811cabdff1aSopenharmony_ci
812cabdff1aSopenharmony_ci.macro  rv40_weak_loop_filter
813cabdff1aSopenharmony_ci        vdup.16         d30, r2                 @ filter_p1
814cabdff1aSopenharmony_ci        vdup.16         d31, r3                 @ filter_q1
815cabdff1aSopenharmony_ci        ldrd            r2,  r3,  [sp]
816cabdff1aSopenharmony_ci        vdup.16         d28, r2                 @ alpha
817cabdff1aSopenharmony_ci        vdup.16         d29, r3                 @ beta
818cabdff1aSopenharmony_ci        ldr             r12, [sp, #8]
819cabdff1aSopenharmony_ci        vdup.16         d25, r12                @ lim_p0q0
820cabdff1aSopenharmony_ci        ldrd            r2,  r3,  [sp, #12]
821cabdff1aSopenharmony_ci        vsubl.u8        q9,  d5,  d4            @ x, t
822cabdff1aSopenharmony_ci        vabdl.u8        q8,  d5,  d4            @ x, abs(t)
823cabdff1aSopenharmony_ci        vneg.s16        q15, q15
824cabdff1aSopenharmony_ci        vceq.i16        d16, d19, #0            @ !t
825cabdff1aSopenharmony_ci        vshl.s16        d19, d19, #2            @ t << 2
826cabdff1aSopenharmony_ci        vmul.u16        d18, d17, d28           @ alpha * abs(t)
827cabdff1aSopenharmony_ci        vand            d24, d30, d31           @ filter_p1 & filter_q1
828cabdff1aSopenharmony_ci        vsubl.u8        q1,  d0,  d4            @ p1p2, p1p0
829cabdff1aSopenharmony_ci        vsubl.u8        q3,  d1,  d5            @ q1q2, q1q0
830cabdff1aSopenharmony_ci        vmov.i16        d22, #3
831cabdff1aSopenharmony_ci        vshr.u16        d18, d18, #7
832cabdff1aSopenharmony_ci        vadd.i16        d22, d22, d24           @ 3 - (filter_p1 & filter_q1)
833cabdff1aSopenharmony_ci        vsubl.u8        q10, d0,  d1            @ src[-2] - src[1]
834cabdff1aSopenharmony_ci        vcle.u16        d18, d18, d22
835cabdff1aSopenharmony_ci        vand            d20, d20, d24
836cabdff1aSopenharmony_ci        vneg.s16        d23, d25                @ -lim_p0q0
837cabdff1aSopenharmony_ci        vadd.s16        d19, d19, d20
838cabdff1aSopenharmony_ci        vbic            d16, d18, d16           @ t && u <= 3 - (fp1 & fq1)
839cabdff1aSopenharmony_ci        vtrn.32         d4,  d5                 @ -3,  2, -1,  0
840cabdff1aSopenharmony_ci        vrshr.s16       d19, d19, #3
841cabdff1aSopenharmony_ci        vmov            d28, d29                @ beta
842cabdff1aSopenharmony_ci        vswp            d3,  d6                 @ q1q2, p1p0
843cabdff1aSopenharmony_ci        vmin.s16        d19, d19, d25
844cabdff1aSopenharmony_ci        vand            d30, d30, d16
845cabdff1aSopenharmony_ci        vand            d31, d31, d16
846cabdff1aSopenharmony_ci        vadd.s16        q10, q1,  q3            @ p1p2 + p1p0, q1q2 + q1q0
847cabdff1aSopenharmony_ci        vmax.s16        d19, d19, d23           @ diff
848cabdff1aSopenharmony_ci        vabs.s16        q1,  q1                 @ abs(p1p2), abs(q1q2)
849cabdff1aSopenharmony_ci        vand            d18, d19, d16           @ diff
850cabdff1aSopenharmony_ci        vcle.u16        q1,  q1,  q14
851cabdff1aSopenharmony_ci        vneg.s16        d19, d18                @ -diff
852cabdff1aSopenharmony_ci        vdup.16         d26, r3                 @ lim_p1
853cabdff1aSopenharmony_ci        vaddw.u8        q2,  q9,  d5            @ src[-1]+diff, src[0]-diff
854cabdff1aSopenharmony_ci        vhsub.s16       q11, q10, q9
855cabdff1aSopenharmony_ci        vand            q1,  q1,  q15
856cabdff1aSopenharmony_ci        vqmovun.s16     d4,  q2                 @ -1,  0
857cabdff1aSopenharmony_ci        vand            q9,  q11, q1
858cabdff1aSopenharmony_ci        vdup.16         d27, r2                 @ lim_q1
859cabdff1aSopenharmony_ci        vneg.s16        q9,  q9
860cabdff1aSopenharmony_ci        vneg.s16        q14, q13
861cabdff1aSopenharmony_ci        vmin.s16        q9,  q9,  q13
862cabdff1aSopenharmony_ci        vtrn.32         d0,  d1                 @ -2,  1,  -2,  1
863cabdff1aSopenharmony_ci        vmax.s16        q9,  q9,  q14
864cabdff1aSopenharmony_ci        vaddw.u8        q3,  q9,  d0
865cabdff1aSopenharmony_ci        vqmovun.s16     d5,  q3                 @ -2,  1
866cabdff1aSopenharmony_ci.endm
867cabdff1aSopenharmony_ci
868cabdff1aSopenharmony_cifunction ff_rv40_h_weak_loop_filter_neon, export=1
869cabdff1aSopenharmony_ci        sub             r0,  r0,  r1,  lsl #1
870cabdff1aSopenharmony_ci        sub             r0,  r0,  r1
871cabdff1aSopenharmony_ci
872cabdff1aSopenharmony_ci        vld1.32         {d4[]},   [r0,:32], r1
873cabdff1aSopenharmony_ci        vld1.32         {d0[]},   [r0,:32], r1
874cabdff1aSopenharmony_ci        vld1.32         {d4[1]},  [r0,:32], r1
875cabdff1aSopenharmony_ci        vld1.32         {d5[]},   [r0,:32], r1
876cabdff1aSopenharmony_ci        vld1.32         {d1[]},   [r0,:32], r1
877cabdff1aSopenharmony_ci        vld1.32         {d5[0]},  [r0,:32]
878cabdff1aSopenharmony_ci
879cabdff1aSopenharmony_ci        sub             r0,  r0,  r1,  lsl #2
880cabdff1aSopenharmony_ci
881cabdff1aSopenharmony_ci        rv40_weak_loop_filter
882cabdff1aSopenharmony_ci
883cabdff1aSopenharmony_ci        vst1.32         {d5[0]},  [r0,:32], r1
884cabdff1aSopenharmony_ci        vst1.32         {d4[0]},  [r0,:32], r1
885cabdff1aSopenharmony_ci        vst1.32         {d4[1]},  [r0,:32], r1
886cabdff1aSopenharmony_ci        vst1.32         {d5[1]},  [r0,:32], r1
887cabdff1aSopenharmony_ci
888cabdff1aSopenharmony_ci        bx              lr
889cabdff1aSopenharmony_ciendfunc
890cabdff1aSopenharmony_ci
891cabdff1aSopenharmony_cifunction ff_rv40_v_weak_loop_filter_neon, export=1
892cabdff1aSopenharmony_ci        sub             r12, r0,  #3
893cabdff1aSopenharmony_ci        sub             r0,  r0,  #2
894cabdff1aSopenharmony_ci
895cabdff1aSopenharmony_ci        vld1.8          {d4},     [r12], r1
896cabdff1aSopenharmony_ci        vld1.8          {d5},     [r12], r1
897cabdff1aSopenharmony_ci        vld1.8          {d2},     [r12], r1
898cabdff1aSopenharmony_ci        vld1.8          {d3},     [r12], r1
899cabdff1aSopenharmony_ci
900cabdff1aSopenharmony_ci        vtrn.16         q2,  q1
901cabdff1aSopenharmony_ci        vtrn.8          d4,  d5
902cabdff1aSopenharmony_ci        vtrn.8          d2,  d3
903cabdff1aSopenharmony_ci
904cabdff1aSopenharmony_ci        vrev64.32       d5,  d5
905cabdff1aSopenharmony_ci        vtrn.32         q2,  q1
906cabdff1aSopenharmony_ci        vdup.32         d0,  d3[0]
907cabdff1aSopenharmony_ci        vdup.32         d1,  d2[0]
908cabdff1aSopenharmony_ci
909cabdff1aSopenharmony_ci        rv40_weak_loop_filter
910cabdff1aSopenharmony_ci
911cabdff1aSopenharmony_ci        vtrn.32         q2,  q3
912cabdff1aSopenharmony_ci        vswp            d4,  d5
913cabdff1aSopenharmony_ci
914cabdff1aSopenharmony_ci        vst4.8          {d4[0],d5[0],d6[0],d7[0]}, [r0], r1
915cabdff1aSopenharmony_ci        vst4.8          {d4[1],d5[1],d6[1],d7[1]}, [r0], r1
916cabdff1aSopenharmony_ci        vst4.8          {d4[2],d5[2],d6[2],d7[2]}, [r0], r1
917cabdff1aSopenharmony_ci        vst4.8          {d4[3],d5[3],d6[3],d7[3]}, [r0], r1
918cabdff1aSopenharmony_ci
919cabdff1aSopenharmony_ci        bx              lr
920cabdff1aSopenharmony_ciendfunc
921